Tuesday, April 30, 2013

RG#96: Basic point and line graph with error bars (publication purpose)


myd <- data.frame (X = c(1:12,1:12),
                   Y = c(8, 12, 13, 18,  22, 16, 24, 29,  34, 15, 8, 6,
                         9, 10, 12, 18, 26, 28, 28, 30, 20, 10, 9, 9),
                   group = rep (c("A-group", "B-group"), each = 12),
                   error = rep (c(2.5, 3.0), each = 12))
                   
require(ggplot2)
require(grid)
# line and point plot
f1 = ggplot(data = myd, aes(x = X, y = Y, group = group) )  # lesion becomes a classifying factor
f2 <- f1 + geom_errorbar(aes(ymin = Y - error, ymax = Y + error), width=0.3) +
geom_line() + geom_point(aes(shape=group, fill=group), size=5)

 f3 <- f2 +  scale_x_continuous("X (units)", breaks=1:12) +
     scale_y_continuous("Y (units)", limits = c(0, 40), breaks=seq(0, 40, by = 5)) +
     scale_shape_manual(values=c(24,21)) +
     scale_fill_manual(values=c("white","black")) +
     stat_abline(intercept=0, slope=0, linetype="dotted") +
     annotate("text", x=11, y=10, label="X") +
     theme_bw()

   optns <- theme (
          plot.title = element_text(face="bold", size=14),
          axis.title.x = element_text(face="bold", size=12),
          axis.title.y = element_text(face="bold", size=12, angle=90),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          legend.position = c(0.2,0.8),
          legend.title = element_blank(),
          legend.text = element_text(size=12),
          legend.key.size = unit(1.5, "lines"),
          legend.key = element_blank()
     )
f3 +  ggtitle ( "MY awsome plot for publication") + optns



Monday, April 29, 2013

RG#95: Interactive Biplot


# data
set.seed(1234)
P <- vector()
DF <- as.data.frame(matrix(rep(NA, 100), nrow=10))
names(DF) <- c(paste("M",1:10, sep=""))
for(i in 1:10) {
DF[,i] <- rnorm(10, 10, 3)
}
rownames (DF) <- paste("O", 1:10, sep = "")

require(BiplotGUI)


Biplots(Data = DF, groups = rep(1, nrow(DF)),PointLabels = rownames(DF),
AxisLabels = colnames(DF))



# you can work in the interactive menu window



RG#94: Hive plot (diagram)

require(HiveR)

# test random data, nodes = 200, edge 200   
td1 <- ranHiveData(nx = 3, nn = 200, ne = 200)
plotHive(td1, bkgnd = "cornsilk"
 
 

 

RG#94: Arch Diagram

# 'devtools' need to be installed if not installed 
install.packages("devtools") 

# load devtools
library(devtools)

# install 'arcdiagram'
install_github('arcdiagram',  username='gastonstat')
 
 
 library(arcdiagram)

# star graph with 20 nodes
star_graph = graph.star(20, mode="out")

# extract edgelist
staredges = get.edgelist(star_graph)

# default arc diagram
arcplot(staredges)
  
 
 # different ordering, arc widths, arc colors, and node sizes
set.seed(1234)
orderv <- sample (1:20)
labelv <- paste("nd",1:20,sep="-")
archlinewd <- 4*runif(20,.5,2)
colarcs <- heat.colors (19)
 nodesize <- runif(20,1,3)
arcplot(staredges, ordering=orderv, labels= labelv,
   lwd.arcs= archlinewd, col.arcs= colarcs,
   show.nodes=TRUE, pch.nodes=21, cex.nodes= nodesize,
   col.nodes="lightseagreen", bg.nodes="midnightblue", lwd.nodes= 2)
 
 
 
 
 
 
 
 

Sunday, April 28, 2013

RG#93: Add countour or heat map plot to XY scatter plot

# data
set.seed(1234)
n <- 10000
X = rnorm (n, 10, 4)
Y = X*1.5 + rnorm (n, 0, 8)


## colour brewing
library(RColorBrewer)
g = 11
my.cols <- rev(brewer.pal(g, "RdYlBu"))

#compute 2D kernel density

# kernel density using MASS 
library(MASS)
z <- kde2d(X, Y, n=50)

plot(X, Y, xlab="X", ylab="Y", pch=19, cex=.3, col = "gray60")
contour(z, drawlabels=FALSE, nlevels=g, col=my.cols, add=TRUE, lwd = 2)
abline(h=mean(Y), v=mean(X), lwd=2, col = "black")
legend("topleft", paste("r=", round(cor(X, Y),2)), bty="n")

## estimate the z counts
prob <- c(.99, .95, .90, .8, .5, .1, 0.05)
dx <- diff(z$x[1:2])
dy <- diff(z$y[1:2])
sz <- sort(z$z)

c1 <- cumsum(sz) * dx * dy

levels <- sapply(prob, function(x) {
              approx(c1, sz, xout = 1 - x)$y })

plot(X,Y, col = "gray80", pch = 19, cex = 0.3)
contour(z, levels= round (levels,7), add=T, col = "red")




# smooth scatter
require(KernSmooth)
smoothScatter(X, Y, nrpoints=.3*n, colramp=colorRampPalette(my.cols), pch=19, cex=.3, col = "green1")





 

Thursday, April 25, 2013

RG#91: Plot bar or pie chart over world map using rworldmap package

require (rworldmap)
require(rworldxtra)

# get world map 

plot(getMap(resolution = "high" ))


##getting example data
dataf <- getMap()@data 
mapBars( dataf, nameX="LON", nameY="LAT" , nameZs=c('GDP_MD_EST',
'GDP_MD_EST','GDP_MD_EST') , mapRegion='asia' , symbolSize=2  ,
 barOrient = 'horiz' )

mapBars( dataf, nameX="LON", nameY="LAT" , nameZs=c('GDP_MD_EST','GDP_MD_EST',
'GDP_MD_EST') , mapRegion='asia' , symbolSize=3  , barOrient = 'vert' ,  
oceanCol = "blue1", landCol = "lightgreen")



 
mapPies( dataf,nameX="LON", nameY="LAT", nameZs=c('GDP_MD_EST','GDP_MD_EST',
'GDP_MD_EST','GDP_MD_EST'),mapRegion='asia', oceanCol = "lightseagreen",
 landCol = "gray50")





RG#90: fluctutation diagram: graphical representation of a contingency table

set.seed (1234)
myd <- data.frame (x1 = rnorm (1000, 15, 5), x2 = sample (c("A", "B", "C"), 1000, replace = TRUE), x3 = sample (c(1,2,2), 1000, replace = TRUE))


# fluctuation plot
require (ggplot2)

ggfluctuation(table(myd$x2, myd$x3))+theme_bw()

ggfluctuation(table(myd$x2, myd$x3), type="colour")+theme_bw()

ggfluctuation(table(cut (myd$x1,4), myd$x3), type="colour")+theme_bw()



ggfluctuation(table(cut (myd$x1,5), myd$x3))+theme_bw()





RG#89: Raster in R


 require(grid)
grid.raster(matrix(colors()[1:600], ncol=20))





set.seed(1234)

mt <- matrix (sample(c("red","red1", "yellow", "purple", "green1", "green4", "blue"), 10000, replace = TRUE), ncol = 100)

grid.raster(mt)


 
rgb.palette <- colorRampPalette(c("red", "orange", "blue"),
                                space = "rgb") 
 
bg.palette <- colorRampPalette(c("blue", "green"), space = "rgb")

gr.palette <-  colorRampPalette(c("green", "red"),
                                space = "rgb")
 
 
colrs <- matrix(c(rgb.palette(20),bg.palette(20),gr.palette(20)),   nrow=6, ncol=10)
grid.newpage()
 grid.raster(colrs)
  grid.raster(colrs, interpolate=FALSE)
# raster in ggplot2
require(ggplot2)

# Generate data
funp <- function (n,r=2) {
 xv <- seq(-r*pi, r*pi, len=n)
 df1 <- expand.grid(x=xv, y=xv)
 df1$r <- sqrt(df1$x^2 + df1$y^2)
 df1$z <- cos(df1$r^2)*exp(-df1$r/6)
 df1
}
qplot(x, y, data = funp(1000), fill = z, geom = "raster") + theme_bw()










Wednesday, April 24, 2013

RG#88: Plot pie over google map

require(ggplot2)
library(ggmap)
require(grid)

 # blank theme function
blk_theme <- function (){theme(
     line = element_blank(),
     rect = element_blank(),
     text = element_blank(),
     axis.ticks.length = unit(0, "cm"),
     axis.ticks.margin = unit(0.01, "cm"),
     legend.position = "none",
     panel.margin = unit(0, "lines"),
     plot.margin = unit(c(0, 0, -.5, -.5), "lines"),
     complete = TRUE
   )
   }
  
dha = get_map(location = c(lon = 80.56410278, lat = 28.7089375), zoom = 14, maptype = 'roadmap', source = "google")
dhamp <-  ggmap(dha) + blk_theme ()


# data 1
d1 <- sample ( c("A", "B", "C", "D"), 100, replace = TRUE)
d2 <- sample ( c("A", "B", "C"), 100, replace = TRUE)
d3 <- sample ( c("A", "B"), 100, replace = TRUE)
d4 <- sample ( c("A", "B"), 100, replace = TRUE)
d5 <- sample ( c("A", "B", "C", "D"), 100, replace = TRUE)

myd <- data.frame(x = factor(1),d1, d2, d3, d4, d5)

# pie charts
pie1 <- qplot(x, d1, data = myd, geom = 'bar', fill = d1) +
  coord_polar(theta = 'y') + blk_theme()
pie2 <- qplot(x, d2, data = myd, geom = 'bar', fill = d2) +
  coord_polar(theta = 'y') + blk_theme()
pie3 <- qplot(x, d3, data = myd, geom = 'bar', fill = d3) +
  coord_polar(theta = 'y') + blk_theme()
pie4 <- qplot(x, d4, data = myd, geom = 'bar', fill = d4) +
  coord_polar(theta = 'y') + blk_theme()
pie5 <- qplot(x, d5, data = myd, geom = 'bar', fill = d5) +
  coord_polar(theta = 'y') + blk_theme()

# plotting and viewports
# function
vplayout <- function(x, y)viewport(layout.pos.row = x, layout.pos.col = y)
####
grid.newpage()
pushViewport(viewport(layout = grid.layout(20,20)))
# print mainmap
print(dhamp, vp = vplayout(1:20, 1:20))

  # need to find manually, maximum and minimum lon and lat
 # click on google earth to find
 maximum.lon <- 80.591
 minimum.lon <- 80.538
 maximum.lat <- 28.735
 minimum.lat <- 28.685

 # X and Y cordinates where pie need to draw
 pieposX <- c(28.72, 28.73, 28.70,28.73,28.69 )
 pieposY <- c(80.55, 80.58, 80.58, 80.55,80.58)

ycoord <- 20 * ((pieposX - minimum.lat)/(maximum.lat - minimum.lat))
xcoord <- 20 * ((pieposY - minimum.lon)/(maximum.lon - minimum.lon))

# plot pie over layer
print(pie1, vp = vplayout(xcoord[1], ycoord[1]))
print(pie2, vp = vplayout(xcoord[2], ycoord[2]))
print(pie3, vp = vplayout(xcoord[3], ycoord[3]))
print(pie4, vp = vplayout(xcoord[4], ycoord[4]))
print(pie5, vp = vplayout(xcoord[5], ycoord[5]))



# bigger pie plot  

grid.newpage()
pushViewport(viewport(layout = grid.layout(7,7)))
# print mainmap
print(dhamp, vp = vplayout(1:7, 1:7))

  # need to find manually, maximum and minimum lon and lat
 # click on google earth to find
 maximum.lon <- 80.591
 minimum.lon <- 80.538
 maximum.lat <- 28.735
 minimum.lat <- 28.685

 # X and Y cordinates where pie need to draw
 pieposX <- c(28.72, 28.73, 28.70,28.73,28.69 )
 pieposY <- c(80.55, 80.58, 80.58, 80.55,80.58)

ycoord <- 7 * ((pieposX - minimum.lat)/(maximum.lat - minimum.lat))
xcoord <- 7 * ((pieposY - minimum.lon)/(maximum.lon - minimum.lon))

# plot pie over layer 
print(pie1, vp = vplayout(xcoord[1], ycoord[1]))
print(pie2, vp = vplayout(xcoord[2], ycoord[2]))
print(pie3, vp = vplayout(xcoord[3], ycoord[3]))
print(pie4, vp = vplayout(xcoord[4], ycoord[4]))
print(pie5, vp = vplayout(xcoord[5], round (ycoord[5],0)))







RG#87: histogram / bar chart over map

library(ggsubplot)
library(ggplot2)
library(maps)
library(plyr)

#Get world map info
world_map <- map_data("world")

#Create a base plot
p <- ggplot()  + geom_polygon(data=world_map,aes(x=long, y=lat,group=group), col = "blue4", fill = "lightgray") + theme_bw()

# Calculate the mean longitude and latitude per region (places where subplots are plotted),
cntr <- ddply(world_map,.(region),summarize,long=mean(long),lat=mean(lat))

# example data
 myd <- data.frame (region = rep (c("USA","China","USSR","Brazil", "Australia","India", "Canada"),5),
                    categ = rep (c("A", "B", "C", "D", "E"),7), frequency = round (rnorm (35, 8000, 4000), 0))
                   

subsetcntr  <- subset(cntr, region %in% c("USA","China","USSR","Brazil", "Australia","India", "Canada"))

simdat <- merge(subsetcntr, myd)
colnames(simdat) <- c( "region","long","lat", "categ", "myvar" )


 myplot  <- p+geom_subplot2d(aes(long, lat, subplot = geom_bar(aes(x = categ, y = myvar, fill = categ, width=1), position = "identity")), ref = NULL, data = simdat)

print(myplot)






RG#86: 3D XY plot with sphare plots (interactive)

# data
myd <-data.frame(name =c("A","B","C","D", "E"),
var_x=c(6,7,11,1,8),
var_y=c(9,2,9,4, 2),
var_z=c(4,1,6,5,1),
point_size=c(6,3,6,3, 5)
)
myd$pradius <- myd$point_size*0.15

require(rgl)

spheres3d(myd[,2:4], radius = myd[,6], col = c("darkred", "green", "yellow", "orange", "purple"), alpha = 0.8)
axes3d(box = TRUE)

#title3d(xlab = "var_x", ylab = "var_y", zlab = "var_z")
#text3d(myd[1,2:5], texts = "A")

segs <- rbind(myd[1:2,2:5], myd[2:3,2:5], myd[3:4,2:5], myd[4:5,2:5], myd[c(5,1),2:5])
segments3d(segs, col = "blue", lwd = 2)

# take a SNPshot
rgl.snapshot ("my3dplot.png", fmt = "png")





# you can rotate the axis and take another snapshot, and shave as different name
rgl.snapshot ("my3dplot2.png", fmt = "png")

Tuesday, April 23, 2013

RG#85: Plotting XY plot with cluster and adding ellipse to it


# data 
set.seed (1234)
c1 <- rnorm (40, 0.1, 0.02); c2 <- rnorm (40, 0.3, 0.01)
c3 <- rnorm (40, 0.5, 0.01); c4 <- rnorm (40, 0.7, 0.01)
c5 <- rnorm (40, 0.9, 0.03)
Yv <- 0.3 + rnorm (200, 0.05, 0.05)
myd <- data.frame (Xv = round (c(c1, c2, c3, c4, c5), 2), Yv = round (Yv, 2),
 cltr = factor (rep(1:5, each = 40)))

library(devtools)
require(ggplot2)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R")

ggplot(myd, aes(x=Xv, y=Yv, color=cltr)) + ylim (c(0, 0.1 + max(myd$Yv))) + stat_ellipse() +
xlim (c(0, 0.1 + max(myd$Xv))) +
 geom_point(shape=20, size = 5) +
 scale_colour_manual ( values = c("red", "green", "purple", "yellow", "blue4")) +
 theme_bw()



# plot using base:

plot(myd$Xv, myd$Yv, col = myd$cltr, pch = 19, cex = 1.5)


# interactively identifying the points, use stop when done  
identify (myd$Xv, myd$Yv,labels = row.names(myd))

plot(myd$Xv, myd$Yv, col = myd$cltr, pch = 19, cex = 1.5)


# add points by clicking, use stop when done  
pl <- locator (type = "p", pch = 2, col = "green1")
pl # see the coordinates on added points 

plot(myd$Xv, myd$Yv, col = myd$cltr, pch = 19, cex = 1.5)
# add lines by clicking 
cord <- locator (type = "l", col = "green1", lwd = 2)
cord # display coordinates 


# interactive scatter plot 
require(iplots) 
iplot(myd$Xv, myd$Yv, col = myd$cltr, pch = 19, size = 5, ylim = c(0, 1), xlim = c(0,1))






RG#84: Ruler plot (Scale plot)

require(ggplot2)

# function 
ruler.bar.plot <-function(gg, nn, mjtick =1, mntick = 0.2, mjtickcol = "black", mntickcol = "white"){
seq.list<-list()
for(i in 1:length(gg)){
  ystart<-seq(mntick ,gg[i],mntick )
  yend<-ystart
  xstart<-rep(i-0.45,length(ystart))
  xend<-xstart+0.1
  nam.val<-c(nn[i],rep(NA,length(ystart)-1))
  numb.val<-c(gg[i],rep(NA,length(ystart)-1))
  seq.list[[i]]<-data.frame(nam.val,numb.val,xstart,xend,ystart,yend)
}
df<-as.data.frame(do.call(rbind, seq.list))
p <- ggplot(df, aes(nam.val))
p + geom_bar(aes(y=numb.val,fill=nam.val),stat="identity",width=0.5,color=mjtickcol,lwd=1.1) +
        geom_segment(aes(x=xstart,y=ystart,xend=xend,yend=yend), color=mjtickcol) +
        ylim(c(0,max(gg)+0.5))  + scale_x_discrete(limits= nn) +
            geom_hline(yintercept=seq(mjtick,max(gg),mjtick),color=mntickcol,lwd=1.1)+   geom_text(aes (y = numb.val, label = numb.val), vjust= - 1 ) +
    guides(fill=FALSE) +
         theme_bw()+
    theme(axis.title=element_blank(),
               axis.text.y=element_blank(),
               axis.text.x=element_text(angle=90,face="bold",size=rel(1.5)),
               axis.ticks=element_blank(),
               panel.background = element_rect(fill = mntickcol),
               panel.border=element_blank(),
               panel.grid=element_blank(),
               legend.position = "none")
 }


# human height in inches, ticking done at each 12
htinch <- c(66, 72, 88, 54)
id <- c("A", "B", "C", "D")
ruler.bar.plot(htinch, id, mjtick =12, mntick = 1,  mjtickcol = "black", mntickcol = "white")





 ruler.bar.plot(htinch, id, mjtick =12, mntick = 2,  mjtickcol = "red", mntickcol = "lightgoldenrodyellow")



VV = c(0.13, 0.33, 0.82, 0.46)
LV = c("A", "C", "L", "N")
ruler.bar.plot(VV, LV, mjtick =0.1, mntick = 0.02,  mjtickcol = "black", mntickcol = "white")




Thursday, April 18, 2013

RG#83: Multi-faceted (Trellis) plot of time series plot (weather data example)


st <- as.Date("2009-1-1")
en <- as.Date("2011-12-28")
date1 <- seq(st, en, "1 day")
avgtm <- round (rnorm (length(date1), 50,5), 1)
maxtm <- round (avgtm + 8 + abs(rnorm (length (avgtm), 0, 1)),1)
mintm <-  round (avgtm - 8 + abs(rnorm (length (avgtm), 0, 1)), 1)
myd <- data.frame(date1, maxtm, mintm, avgtm)

# extract month
month <- function(x)format(x, '%m')
year <- function(x)format(x, '%Y')

require(lattice)
require(latticeExtra)

xyplot(avgtm ~ date1 | year(date1), data=myd,
       type='l', layout=c(1, 3),
       scales=list(x=list(relation='free')),
       xlab='', ylab='',
       panel=function(x, y, ...){
           panel.xblocks(x, month,
                         col = c("lightblue", "lightgreen"),
                         border = "darkgray")
           panel.xyplot(x, y, lwd = 1, col='red', ...)
           })


RG#82: Time series plot (weather data with monthly averages connected)


# data 
st <- as.Date ("2009-1-1")
en <- as.Date ("2009-6-28")
date1 <- seq(st, en, "1 day")
year <- format(date1, "%Y")
month <- format (date1, "%b")
day <- as.numeric (format(date1, "%d"))

set.seed(1234)

# average temperature 
avgtm <- round (rnorm (length(date1), 50,5), 1)
# maximum temperature 
maxtm <- round (avgtm + 8 + abs(rnorm (length (avgtm), 0, 1)),1)
# minimum temperature 
mintm <-  round (avgtm - 8 + abs(rnorm (length (avgtm), 0, 1)), 1)
# record maximum temperature 
rmaxtm <- round (maxtm + 15 + abs(rnorm (length (avgtm), 0, 1)), 1)
# record minimum temperature 
rmintm <-  round (mintm - 15 + abs(rnorm (length (avgtm), 0, 1)), 1)
myd <- data.frame ( year, month, day, avgtm, maxtm, mintm, rmaxtm, rmintm )
myd$date <- as.Date(paste(myd$year, myd$month, myd$day), format='%Y %b %d')
levels (myd$month) <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug","Sep", "Oct", "Nov", "Dec")

# find the position to plot for week points or lines 
# for weeks lines
tw = as.numeric (as.Date (seq(st, en, "weeks")), origin = "1970-1-1")
# find the position to plot for month points or lines 
# for month lines
tm = as.numeric (as.Date (seq(st, en, "months")), origin = "1970-1-1")


# plot
  plot(myd$date, myd$avgtm, type = "p", col = "black", xlab = "Date", ylab = "temperature", pch = 19, ylim = c(20, 80), cex = 0.5)

points(myd$date, myd$maxtm, type = "p", col = "red", xlab = "Date", ylab = "temperature", pch = 19, cex = 0.5)
  points(myd$date, myd$mintm, type = "p", col = "green4", xlab = "Date", ylab = "temperature", pch = 19, cex = 0.5)
  points(myd$date, myd$rmintm, type = "p", col = "lightgreen", xlab = "Date", ylab = "temperature", pch = 1, cex = 0.5)
  points(myd$date, myd$rmaxtm, type = "p", col = "pink", xlab = "Date", ylab = "temperature", pch = 1, cex = 0.5)

abline(v = tw, lty = 1, col = "gray50", lwd = 1)
abline(v = tm, lty = 1, col = "blue4", lwd=3)



 # calculating average mean
out1 <- data.frame (with (myd,  tapply(avgtm, month, mean)))
names(out1) <- c("meanavgtm")
out1$grp <- rownames (out1)
out1$tm <- tm

# ploting mean connected with lines
points (out1$tm, out1$meanavgtm, type = "b", col = "black", pch = 19, cex = 3)
text (out1$tm, out1$meanavgtm, labels = round (out1$meanavgtm, 0), col = "white", font = 2)

 # calculating mean maximum
out2 <- data.frame (with (myd,  tapply(maxtm, month, mean)))
names(out2) <- c("meanavgtm")
out2$grp <- rownames (out2)
out2$tm <- tm

# ploting mean connected with lines
points (out2$tm, out2$meanavgtm, type = "b", col = "red4", pch = 19, cex = 3)
text (out2$tm, out2$meanavgtm, labels = round (out2$meanavgtm, 0), col = "white", font = 2)

 # calculating mean minimum
out3 <- data.frame (with (myd,  tapply(mintm, month, mean)))
names(out3) <- c("meanavgtm")
out3$grp <- rownames (out3)
out3$tm <- tm

# ploting mean connected with lines
points (out3$tm, out3$meanavgtm, type = "b", col = "green4", pch = 19, cex = 3)
text (out3$tm, out3$meanavgtm, labels = round (out3$meanavgtm, 0), col = "white", font = 2)



RG#81: plotting scatter plot with means and samples (means are connected with line while all samples as scatter plot)

set.seed(1234)
Xv <- data.frame (group = rep(1:10, each = 500),
Y = c(rnorm (500, 20, 5), rnorm (500, 35, 10), rnorm (500, 45, 15),
rnorm (500, 65, 18), rnorm (500, 50,15), rnorm( 500, 30, 10),
rnorm (500, 20, 10), rnorm (500, 20, 10),
rnorm (500, 15, 5), rnorm (500, 10,5)))

 # point plot with transparency in color
 with (Xv, plot(group, Y, pch = "-", cex=1.5, col = rgb(red=0, green=0.5, blue=0.5, alpha=0.25)))

 # calculating mean
out1 <- data.frame (with (Xv,  tapply( Y, factor(group), mean)))
names(out1) <- c("meanY")
out1$grp <- rownames (out1)

# ploting mean connected with lines
points (out1$grp, out1$meanY, type = "b", col = "red", pch = 19)



# Hexbin plot may be useful in situation of large number of data points
set.seed(1234)
Xv <- data.frame (group = rep(1:10, each = 5000),
Y = round (c(rnorm (5000, 20, 5), rnorm (5000, 35, 10), rnorm (5000, 45, 15),
rnorm (5000, 65, 18), rnorm (5000, 50,15), rnorm( 5000, 30, 10),
rnorm (5000, 20, 10), rnorm (5000, 20, 10),
rnorm (5000, 15, 5), rnorm (5000, 10,5)), 0))



   require(ggplot2)
require(hexbin)
plt <- ggplot(Xv,aes(x=group,y=Y)) + stat_binhex() + scale_fill_gradientn(colours=c("yellow","red"),name = "Frequency",na.value=NA) + theme_bw()

 # calculating mean
out1 <- data.frame (with (Xv,  tapply( Y, factor(group), mean)))
names(out1) <- c("meanY")
out1$grp <- as.numeric (rownames (out1))

# ploting mean connected with lines
plt1 <- plt + geom_point (aes(grp, meanY), data = out1, pch = 19, col = "blue", cex = 3)



# connecting with line 
plt1 +  geom_line (aes(grp, meanY), data = out1, col = "green1", lwd = 1)


RG#80: Plotting boxplot and histogram (overlayed or in margin)

 # data
set.seed(4566)
data <- rnorm(100)


# layout where the boxplot is at top 
nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE),  height = c(1,3))
par(mar=c(3.1, 3.1, 1.1, 2.1))
boxplot(data, horizontal=TRUE,  outline=TRUE,ylim=c(-4,4), frame=F, col = "green1")
hist(data,xlim=c(-4,4), col = "pink")


# layout boxplot is at the bottom 
nf <- layout(mat = matrix(c(1,2),2,1, byrow=TRUE),  height = c(3,1))
par(mar=c(3.1, 3.1, 1.1, 2.1))
hist(data,xlim=c(-4,4), col = "pink")
boxplot(data, horizontal=TRUE,  outline=TRUE,ylim=c(-4,4), frame=F, col = "green1", width = 10)


# Added to the  plot:
par(mar=c(3.1, 3.1, 1.1, 2.1))
hist(data,xlim=c(-4,4), col = "pink")
boxplot(data, horizontal=TRUE,  outline=TRUE,  ylim=c(-4,4), frame=F, col = "green1", add = TRUE)





RG#79: Heatmap with overlayed circle (size and color)


set.seed (78888)
rectheat = sample(c(rnorm (10, 5,1), NA, NA), 150, replace = T)
circlefill =  rectheat*10 + rnorm (length (rectheat), 0, 3)
circlesize = rectheat*1.5 + rnorm (length (rectheat), 0, 3)
myd <- data.frame (rowv = rep (1:10, 15), columnv = rep(1:15, each = 10),
          rectheat, circlesize, circlefill)
          
          
require(ggplot2)
 pl1 <-  ggplot(myd, aes(y = factor(rowv),  x = factor(columnv))) +  geom_tile(aes(fill = rectheat)) +  scale_fill_continuous(low = "blue", high = "green")


  pl1  +      geom_point(aes(colour = circlefill,  size =circlesize))  +    scale_color_gradient(low = "yellow",   high = "red")+     scale_size(range = c(1, 20))+   theme_bw()


Wednesday, April 17, 2013

RG#78: Time series area plot (with temperature data as example)


st <- as.Date ("2009-1-1")
en <- as.Date ("2009-12-28")
date1 <- seq(st, en, "1 day")
year <- format(date1, "%Y")
month <- format (date1, "%b")
day <- as.numeric (format(date1, "%d"))

# average daily temperature avgtm <- round (rnorm (length(date1), 50,1), 1)

# maximum daily temperature 
maxtm <- round (avgtm + 5 + abs(rnorm (length (avgtm), 0, 1)),2)

# minimum daily temperature 
mintm <-  round (avgtm - 5 + abs(rnorm (length (avgtm), 0, 1)), 2)

# record maximum daily temperature 
rmaxtm <- round (maxtm + 10 + abs(rnorm (length (avgtm), 0, 3)), 2)

# record minimum daily temperature 
rmintm <-  round (mintm - 10 +  abs(rnorm (length (avgtm), 0, 1)), 3)


myd <- data.frame ( year, month, day, avgtm, maxtm, mintm, rmaxtm, rmintm )
myd$date <- as.Date(paste(myd$year, myd$month, myd$day), format='%Y %b %d')

# for weeks lines 
tw = as.numeric (as.Date (seq(st, en, "weeks")), origin = "1970-1-1")
tw <- data.frame (tw=tw)# for month lines 
tm = as.numeric (as.Date (seq(st, en, "months")), origin = "1970-1-1")
tm <- data.frame (tm=tm)



# plot
require(ggplot2)  # need to install ggplot2 
plt <- ggplot(myd, aes(x= date))
plt1 <- plt + geom_ribbon(aes(ymin= rmintm,  ymax= mintm), fill ="lightblue"geom_ribbon(aes(ymin= mintm,  ymax= avgtm),fill="blue") + 
geom_ribbon(aes(ymin= avgtm,  ymax= maxtm),fill="red") +
geom_ribbon(aes(ymin= maxtm,  ymax= rmaxtm),fill="pink") +  geom_line(aes(y=avgtm), col = "black", lwd = 1.5) + theme_bw()
print (plt1) 



# adding vertical lines at week and month interval 
 plt1 +  geom_vline(data = tm, aes(xintercept = tm), lwd = 1.5, col = "yellow") + geom_vline(data = tw, aes(xintercept = tw), lwd = 0.5, col = "black")



# multiple years 

st <- as.Date ("2009-1-1")
en <- as.Date ("2011-6-28")
date1 <- seq(st, en, "1 day")
year <- format(date1, "%Y")
month <- format (date1, "%b")
day <- as.numeric (format(date1, "%d"))

avgtm <- round (rnorm (length(date1), 50,1), 1)
maxtm <- round (avgtm + 5 + abs(rnorm (length (avgtm), 0, 1)),2)
mintm <-  round (avgtm - 5 + abs(rnorm (length (avgtm), 0, 1)), 2)
rmaxtm <- round (maxtm + 10 + abs(rnorm (length (avgtm), 0, 3)), 2)
rmintm <-  round (mintm - 10 +  abs(rnorm (length (avgtm), 0, 1)), 3)


myd <- data.frame ( year, month, day, avgtm, maxtm, mintm, rmaxtm, rmintm )
myd$date <- as.Date(paste(myd$year, myd$month, myd$day), format='%Y %b %d')

# for weeks lines 
ty = as.numeric (as.Date (seq(st, en, "year")), origin = "1970-1-1")
ty <- data.frame (ty=ty)# for month lines 
tm = as.numeric (as.Date (seq(st, en, "months")), origin = "1970-1-1")
tm <- data.frame (tm=tm)



# plot
require(ggplot2)  # need to install ggplot2 
plt <- ggplot(myd, aes(x= date))
plt1 <- plt + geom_ribbon(aes(ymin= rmintm,  ymax= mintm), fill ="lightblue"geom_ribbon(aes(ymin= mintm,  ymax= avgtm),fill="blue") + 
geom_ribbon(aes(ymin= avgtm,  ymax= maxtm),fill="red") +
geom_ribbon(aes(ymin= maxtm,  ymax= rmaxtm),fill="pink") +  geom_line(aes(y=avgtm), col = "black", lwd = 1.5) + theme_bw()
print (plt1) 




# adding vertical lines at month and year interval 
 plt1 +  geom_vline(data = ty, aes(xintercept = ty), lwd = 1.5, col = "yellow") + geom_vline(data = tm, aes(xintercept = tm), lwd = 0.5, col = "black")