brain of mat kelcey...


ggplot posixct cheat sheet

March 18, 2012 at 08:00 PM | categories: Uncategorized

ggplot posixct cheat sheet

after having to google this stuff three times in the last few months i'm writing it down here so i can just cut and paste next time...

data with arbitrary date time stamp

> d = read.delim('data.tsv',header=F,as.is=T,col.names=c('dts_str','freq'))
> # YEAR MONTH DAY HOUR
> head(d,3)
        dts_str  freq
1 2012_01_01_00 18393
2 2012_01_01_01 20536
3 2012_01_01_02 91840
> tail(d,3)
          dts_str   freq
732 2012_01_31_21 103107
733 2012_01_31_22 108921
734 2012_01_31_23  78629
> summary(d$freq)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  10590   63620   82680   86770  105700  169900 

parse arbitrary strange format to a datetime

> d$dts = as.POSIXct(d$dts_str, format="%Y_%m_%d_%H")

> head(d,3)
        dts_str  freq                 dts
1 2012_01_01_00 18393 2012-01-01 00:00:00
2 2012_01_01_01 20536 2012-01-01 01:00:00
3 2012_01_01_02 91840 2012-01-01 02:00:00

> ggplot(d, aes(dts, freq)) + geom_point() + scale_x_datetime(major="10 days", minor="1 day", format="%d-%b-%Y")

plots by day of week; summary

> d$dow = as.factor(format(d$dts, format="%a"))  # day of week
> head(d,3)
        dts_str  freq                 dts dow
1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun
2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun
3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun
> ggplot(d,aes(dow,freq)) 
 + geom_boxplot()
 + geom_smooth(aes(group=1)) 
 + scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) # provide explicit factor ordering
 + xlab('day of week') + ylab('freq') + opts(title='freq by day of week')

plots by day of week; totals

> by_dow = ddply(d, "dow", summarize, freq=sum(freq))
> ggplot(by_dow,aes(dow,freq)) + geom_bar() + 
 scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) + 
 xlab('day of week') + ylab('freq') + opts(title='total freq by day of week')  

plots by hour of day; summary

> d$hr = format(d$dts, format="%H")
> head(d,3)
        dts_str  freq                 dts dow hr
1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun 00
2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun 01
3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun 02
> ggplot(d,aes(hr,freq)) + geom_boxplot() + geom_smooth(aes(group=1)) + 
 xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')

plots by hour of day; totals

> by_hr = ddply(d, "hr", summarize, freq=sum(freq))
> ggplot(by_hr,aes(hr,freq)) + geom_bar() + 
 xlab('hr of day') + ylab('freq') + opts(title='total freq by hr of day')

seperate weekday from weekend

d$weekend = 'weekday'
d[d$dow=='Sat'|d$dow=='Sun',]$weekend = 'weekend' # terrible style :(
ggplot(d,aes(hr,freq)) 
 + geom_boxplot(aes(fill=weekend))
 + geom_smooth(aes(group=weekend)) 
 + xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')