brain of mat kelcey...
ggplot posixct cheat sheet
March 18, 2012 at 08:00 PM | categories: Uncategorizedggplot posixct cheat sheet
after having to google this stuff three times in the last few months i'm writing it down here so i can just cut and paste next time...
data with arbitrary date time stamp
> d = read.delim('data.tsv',header=F,as.is=T,col.names=c('dts_str','freq')) > # YEAR MONTH DAY HOUR > head(d,3) dts_str freq 1 2012_01_01_00 18393 2 2012_01_01_01 20536 3 2012_01_01_02 91840 > tail(d,3) dts_str freq 732 2012_01_31_21 103107 733 2012_01_31_22 108921 734 2012_01_31_23 78629 > summary(d$freq) Min. 1st Qu. Median Mean 3rd Qu. Max. 10590 63620 82680 86770 105700 169900
parse arbitrary strange format to a datetime
> d$dts = as.POSIXct(d$dts_str, format="%Y_%m_%d_%H") > head(d,3) dts_str freq dts 1 2012_01_01_00 18393 2012-01-01 00:00:00 2 2012_01_01_01 20536 2012-01-01 01:00:00 3 2012_01_01_02 91840 2012-01-01 02:00:00 > ggplot(d, aes(dts, freq)) + geom_point() + scale_x_datetime(major="10 days", minor="1 day", format="%d-%b-%Y")
plots by day of week; summary
> d$dow = as.factor(format(d$dts, format="%a")) # day of week > head(d,3) dts_str freq dts dow 1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun 2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun 3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun > ggplot(d,aes(dow,freq)) + geom_boxplot() + geom_smooth(aes(group=1)) + scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) # provide explicit factor ordering + xlab('day of week') + ylab('freq') + opts(title='freq by day of week')
plots by day of week; totals
> by_dow = ddply(d, "dow", summarize, freq=sum(freq)) > ggplot(by_dow,aes(dow,freq)) + geom_bar() + scale_x_discrete(limits=c('Mon','Tue','Wed','Thu','Fri','Sat','Sun')) + xlab('day of week') + ylab('freq') + opts(title='total freq by day of week')
plots by hour of day; summary
> d$hr = format(d$dts, format="%H") > head(d,3) dts_str freq dts dow hr 1 2012_01_01_00 18393 2012-01-01 00:00:00 Sun 00 2 2012_01_01_01 20536 2012-01-01 01:00:00 Sun 01 3 2012_01_01_02 91840 2012-01-01 02:00:00 Sun 02 > ggplot(d,aes(hr,freq)) + geom_boxplot() + geom_smooth(aes(group=1)) + xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')
plots by hour of day; totals
> by_hr = ddply(d, "hr", summarize, freq=sum(freq)) > ggplot(by_hr,aes(hr,freq)) + geom_bar() + xlab('hr of day') + ylab('freq') + opts(title='total freq by hr of day')
seperate weekday from weekend
d$weekend = 'weekday' d[d$dow=='Sat'|d$dow=='Sun',]$weekend = 'weekend' # terrible style :( ggplot(d,aes(hr,freq)) + geom_boxplot(aes(fill=weekend)) + geom_smooth(aes(group=weekend)) + xlab('hr of day') + ylab('freq') + opts(title='freq by hr of day')