brain of mat kelcey


ggplot posixct cheat sheet

March 18, 2012

ggplot posixct cheat sheet

after having to google this stuff three times in the last few months i'm writing it down here so i can just cut and paste next time...

data with arbitrary date time stamp

> d = read.delim('data.tsv',header=F,as.is=T,col.names=c('dts_str','freq'))
> # YEAR MONTH DAY HOUR
> head(d,3)
        dts_str  freq
1 2012_01_01_00 18393
2 2012_01_01_01 20536
3 2012_01_01_02 91840
> tail(d,3)
          dts_str   freq
732 2012_01_31_21 103107
733 2012_01_31_22 108921
734 2012_01_31_23  78629
> summary(d$freq)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10590   63620   82680   86770  105700  169900 

parse arbitrary strange format to a datetime

> d$dts = as.POSIXct(d$dts_str, format="%Y_%m_%d_%H")

> head(d,3)
        dts_str  freq                 dts
1 2012_01_01_00 18393 2012-01-01 00:00:00
2 2012_01_01_01 20536 2012-01-01 01:00:00
3 2012_01_01_02 91840 2012-01-01 02:00:00

> ggplot(d, aes(dts, freq)) + geom_point() + scale_x_datetime(major="10 days", minor="1 day", format="%d-%b-%Y")

plots by day of week; summary

> d$dow = as.factor(format(d$dts, format="%a"))  # day of week
> head(d,3)
        dts_str  freq                 dts dow
1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun
2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun
3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun
> ggplot(d,aes(dow,freq)) 
 + geom_boxplot()
 + geom_smooth(aes(group=1)) 
 + scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) # provide explicit factor ordering
 + xlab('day of week') + ylab('freq') + opts(title='freq by day of week')

plots by day of week; totals

> by_dow = ddply(d, "dow", summarize, freq=sum(freq))
> ggplot(by_dow,aes(dow,freq)) + geom_bar() + 
 scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) + 
 xlab('day of week') + ylab('freq') + opts(title='total freq by day of week')  

plots by hour of day; summary

> d$hr = format(d$dts, format="%H")
> head(d,3)
        dts_str  freq                 dts dow hr
1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun 00
2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun 01
3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun 02
> ggplot(d,aes(hr,freq)) + geom_boxplot() + geom_smooth(aes(group=1)) + 
 xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')

plots by hour of day; totals

> by_hr = ddply(d, "hr", summarize, freq=sum(freq))
> ggplot(by_hr,aes(hr,freq)) + geom_bar() + 
 xlab('hr of day') + ylab('freq') + opts(title='total freq by hr of day')

seperate weekday from weekend

d$weekend = 'weekday'
d[d$dow=='Sat'|d$dow=='Sun',]$weekend = 'weekend' # terrible style :(
ggplot(d,aes(hr,freq)) 
 + geom_boxplot(aes(fill=weekend))
 + geom_smooth(aes(group=weekend)) 
 + xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')