I recently had to visualize some data for a client that involved identifying the number of members that were under the age of 18. Thanks to some help from Robert Kabacoff’s Quick-R site, I put together a histogram with density plot overlay. This is how I did it:
set.seed(123)
agedata <- round(20 + rnorm(1000, 25, 20),0)
xrange <- "Jan-2013 to Jun-2013"
par(mar=c(5.1, 4.1, 4.1, 11.5))
x1 <- hist( agedata, breaks = 100, xlim=range(0, 100), lwd=2,yaxt='n', xaxt='n', ylab="", xlab="", main="")
hist( agedata, breaks = 100, xlim=range(0, 100), lwd=2,yaxt='n', xaxt='n', ylab="", xlab="", main="")
u <- par("usr")
rect(u[1], u[3], u[2], u[4], col = "gray88", border = "black")
abline(h = pretty(x1$counts), col='white') ## draw h lines
abline(v = (pretty(range(1,100))), col='white') ## draw v lines
par(new=T)
hist(agedata, breaks=100, xlim=range(0, 100), ylim=range(u[3:4]), col="yellow", xlab="Age", main=paste("Drug Users by Age fills from", xrange) )
xx1 <- data.frame(table(agedata)) ## I will use this next chunk of code to "color in" only those bars that are below 18years to highlight them
xx1$agedata <- as.numeric(levels(xx1$agedata))[xx1$agedata]
xx1 <- xx1[order(xx1$agedata),]
xx1 <- xx1[xx1$agedata < 18,]
vec <- NULL
for( i in 1:nrow(xx1)){ ## this is where I create my new data
run <- rep(xx1$agedata[i], xx1$Freq[i])
vec <- c(vec, run)
}
par(new=T)
hist(vec, breaks=c(x1$breaks[x1$breaks<18]) ,xlim=range(0, 100), ylim=range(u[3:4]), col="darksalmon", xlab="", main= "" ) # notice I have to use breaks from the previous hist call to ensure that the breaks for the new hist line up with the previous.
abline(v=17, lwd=2, lty=2, col="red" )
count <- nrow(genage)
avage <- round(mean(agedata),2)
medage <- round(median(agedata), 2)
sdage <- round(sd(agedata),2)
# text(xpd=TRUE, x= 35, y=35, offset=-.2, labels=paste("Count of Users:", count, "\n", "Mean Age:", avage, "\n", "Median Age:", medage, "\n", "St. Dev.:", sdage), font=2)
legend(xpd=TRUE,'topright', inset=c(-0.3,0), legend=paste(" Count of Users:", count, "\n", "Mean Age:", avage, "\n", "Median Age:", medage, "\n", "St. Dev.:", sdage), pch=NA, ,title="Summary", bty='n' , cex=1) ## you are going to have to create the strings that go into the legend youself, I used some that I had previously created
d <- density(agedata)
par(new=T)
plot(d, main="", xlim=range(0,100), xlab="", ylab="", xaxt='n', yaxt="n")