Home » Uncategorized

Time based heatmaps in R

2808360384

Tutorial Scenario

In this tutorial, we are going to be looking at heatmaps of Seattle 911 calls by various time periods and by type of incident.  This awesome dataset is available as part of the data.gov open data project.  

Steps

The code below walks through 6 main steps:

  1. Install and load packages
  2. Load data files
  3. Add time variables
  4. Create summary table
  5. Create heatmap
  6. Celebrate

2808360321

Code

#################### Import and Install Packages ####################

install.packages(“plyr”)

install.packages(“lubridate”)

install.packages(“ggplot2”)

install.packages(“dplyr”)

library(plyr)

library(lubridate)

library(ggplot2)

library(dplyr)

#################### Set Variables and Import Data ####################

#https://catalog.data.gov/dataset/seattle-police-department-911-inci…

incidents <-read.table(“https://data.seattle.gov/api/views/3k2p-39jp/rows.csv?accessType=DOWNLOAD”, head=TRUE, sep=”,”, fill=TRUE, stringsAsFactors=F)

col1 = “#d8e1cf”

col2 = “#438484”

head(incidents)

attach(incidents)

str(incidents)

#################### Transform ####################

#Convert dates using lubridate

incidents$ymd <-mdy_hms(Event.Clearance.Date)

incidents$month <- month(incidents$ymd, label = TRUE)

incidents$year <- year(incidents$ymd)

incidents$wday <- wday(incidents$ymd, label = TRUE)

incidents$hour <- hour(incidents$ymd)

attach(incidents)

head(incidents)

#################### Heatmap Incidents Per Hour ####################

#create summary table for heatmap – Day/Hour Specific

dayHour <- ddply(incidents, c( “hour”, “wday”), summarise,

N = length(ymd)

)

dayHour$wday <- factor(dayHour$wday, levels=rev(levels(dayHour$wday)))

attach(dayHour)

#overall summary

ggplot(dayHour, aes(hour, wday)) + geom_tile(aes(fill = N),colour = “white”, na.rm = TRUE) +

scale_fill_gradient(low = col1, high = col2) +

guides(fill=guide_legend(title=”Total Incidents”)) +

theme_bw() + theme_minimal() +

labs(title = “Histogram of Seattle Incidents by Day of Week and Hour”,

x = “Incidents Per Hour”, y = “Day of Week”) +

theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#################### Heatmap Incidents Year and Month ####################

#create summary table for heatmap – Month/Year Specific

yearMonth <- ddply(incidents, c( “year”, “month” ), summarise,

N = length(ymd)

)

yearMonth$month <- factor(summaryGroup$month, levels=rev(levels(summaryGroup$month)))

attach(yearMonth)

#overall summary

ggplot(yearMonth, aes(year, month)) + geom_tile(aes(fill = N),colour = “white”) +

scale_fill_gradient(low = col1, high = col2) +

guides(fill=guide_legend(title=”Total Incidents”)) +

labs(title = “Histogram of Seattle Incidents by Year and Month”,

x = “Month”, y = “Year”) +

theme_bw() + theme_minimal() +

theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#################### Heatmap Incidents Per Hour by Incident Group ####################

#create summary table for heatmap – Group Specific

groupSummary <- ddply(incidents, c( “Event.Clearance.Group”, “hour”), summarise,

N = length(ymd)

)

#overall summary

ggplot(groupSummary, aes( hour,Event.Clearance.Group)) + geom_tile(aes(fill = N),colour = “white”) +

scale_fill_gradient(low = col1, high = col2) +

guides(fill=guide_legend(title=”Total Incidents”)) +

labs(title = “Histogram of Seattle Incidents by Event and Hour”,

x = “Hour”, y = “Event”) +

theme_bw() + theme_minimal() +

theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Please see here for the full tutorial and steps