science_giving <- read.csv("science_federal_giving.csv")
attach(science_giving)
library(dplyr)
library(magrittr)
library(tidyr)
x <- science_giving %>%
group_by(cleanedoccupation, cycle, state, cand_pty_affiliation, classification) %>%
summarise(n=sum(X2016_dollars),
ct=n()) %>%
arrange(desc(n))
x <- x[0:41325,]
gph1 <- x %>%
group_by(cleanedoccupation, cand_pty_affiliation, classification) %>%
filter(cand_pty_affiliation %in% c("REP", "DEM")) %>%
filter(classification %in% c("Engineer", "Scientist")) %>%
summarise(total = sum(n)) %>%
arrange(desc(total)) %>%
ungroup() %>%
group_by(cand_pty_affiliation, classification) %>%
top_n(n=7) %>%
arrange(classification, desc(total))
options(scipen = 999)
xdem <- subset(x, cand_pty_affiliation=="DEM")
xrep <- subset(x, cand_pty_affiliation=="REP")
xdem <- subset(xdem, cleanedoccupation %in% xrep$cleanedoccupation)
xrep_group <- xrep %>%
group_by(cleanedoccupation, cand_pty_affiliation, classification) %>%
summarise(total = sum(n)) %>%
arrange(cleanedoccupation)
xdem_group <- xdem %>%
group_by(cleanedoccupation, cand_pty_affiliation, classification) %>%
summarise(total = sum(n)) %>%
arrange(cleanedoccupation)
xrep_group <- subset(xrep_group, cleanedoccupation %in% xdem_group$cleanedoccupation)
xdem_group$difference <- xdem_group$total - xrep_group$total
dem_top <- xdem_group %>%
arrange(desc(difference))
dem_top <- dem_top[0:13,]
dem_top$party <- "dem"
dem_bottom <- xdem_group %>%
arrange(difference)
dem_bottom <- dem_bottom[0:13,]
dem_bottom$party <- "rep"
topbottom <- rbind(dem_top, dem_bottom)
plot <- ggplot(data=topbottom, aes(x=reorder(cleanedoccupation, difference), y=difference))+
geom_col(aes(fill=party))+
coord_flip()+
theme_bw(base_size=13)+
xlab("")+
ylab("")+
theme(legend.position = "bottom",
panel.grid.major.y=element_blank(),
panel.grid.minor.y=element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_line(color="grey90"),
axis.ticks = element_line(color="grey90"),
axis.ticks.y=element_blank(),
axis.title.x=element_blank(),
strip.background=element_blank(),
plot.margin=margin(0.6,0.6,0,0.6, "cm"),
legend.box.margin=margin(0,0,0,0, "cm"),
legend.box.spacing=unit(0, "cm"),
legend.title=element_text(size=11),
plot.title = element_text(size=12),
panel.border=element_rect(color="grey60"))+
ggtitle("Party differences in political donations by scientists, 2008-16")+
scale_fill_manual(values=c("#377eb8", "#e41a1c"), labels=c("Democrats", "Republicans"), name="Millions of dollars more donated to")+
scale_y_continuous(expand=c(0,0), limits=c(-2500000,7500000), labels=c("2.5", "0", "2.5", "5.0", "7.5"))
extra.calendarHeat <- function(dates,
values,
ncolors=99,
color="b2w",
pch.symbol = 15:20,
cex.symbol =2,
col.symbol ="#00000044",
pvalues=values,
varname="Values",
date.form = "%Y-%m-%d", ...) {
require(latticeExtra)
require(grid)
require(chron)
# Reshape data ------------------------------------------------------------------
transformdata <- function(dates, values,pvalues, date.form = "%Y-%m-%d", ...) {
if (class(dates) == "character" | class(dates) == "factor" ) {
dates <- strptime(dates, date.form)
}
caldat <- data.frame( dates = dates,value = values,pvalue=pvalues)
min.date <- as.Date(paste(format(min(dates), "%Y"),
"-1-1",sep = ""))
max.date <- as.Date(paste(format(max(dates), "%Y"),
"-12-31", sep = ""))
dates.f <- seq(min.date, max.date, by="days")
# Merge moves data by one day, avoid
caldat <- data.frame(value = values, dates = dates,pvalue=pvalues)
caldat <- data.frame(date.seq = seq(min.date, max.date, by="days") ,
value = NA,
pvalue=NA)
dates <- as.Date(dates)
caldat$value[match(dates, caldat$date.seq)] <- values
caldat$pvalue[match(dates, caldat$date.seq)] <- pvalues
caldat$dotw <- as.numeric(format(caldat$date.seq, "%w"))
caldat$woty <- as.numeric(format(caldat$date.seq, "%U")) + 1
caldat$yr <- as.factor(format(caldat$date.seq, "%Y"))
caldat$month <- as.numeric(format(caldat$date.seq, "%m"))
yrs <- as.character(unique(caldat$yr))
d.loc <- as.numeric()
for (m in min(yrs):max(yrs)) {
d.subset <- which(caldat$yr == m)
sub.seq <- seq(1,length(d.subset))
d.loc <- c(d.loc, sub.seq)
}
caldat <- cbind(caldat, seq=d.loc)
caldat$pvalue.n <- round(caldat$pvalue/max(caldat$pvalue,na.rm=T),2)
caldat
}
dat <- transformdata(dates, values,pvalues,date.form)
## color style -----------------------------------------
#color styles------------------------------------------------
col.sty <- switch(color,
"r2b"=c("#0571B0", "#92C5DE", "#F7F7F7", "#F4A582", "#CA0020"), #red to blue
'r2g'=c("#D61818", "#FFAE63", "#FFFFBD", "#B5E384") , #red to green
'w2b'=c("#045A8D", "#2B8CBE", "#74A9CF", "#BDC9E1", "#F1EEF6"), #white to blue
'b2w'=grey.colors(5) )## black to white
# assign("col.sty", get(color))
calendar.pal <- colorRampPalette((col.sty), space = "Lab")
## scales ------------------------------------------------------------------
yrs <- (unique(dat$yr))
nyr <- length(yrs)
scales = list(
x = list( at= c(seq(2.9, 52, by=4.42)),
labels = month.abb,
alternating = c(1, rep(0, (nyr-1))),
tck=0,
cex =1),
y=list(
at = c(0, 1, 2, 3, 4, 5, 6),
labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday"),
alternating = 1,
cex =1,
tck=0))
# theme ------------------------------------------------------------------
calendar.theme <- list(
xlab=NULL,ylab=NULL,
strip.background = list(col = "transparent"),
strip.border = list(col = "transparent"),
axis.line = list(col="transparent"),
par.strip.text=list(cex=2))
#calendar divsion-----------------------------------------------------------------------
calendar.division <- function(...)
{
xyetc <- list(...)
subs <- dat[xyetc$subscripts,]
dates.fsubs <- dat[dat$yr == unique(subs$yr),]
y.start <- dates.fsubs$dotw[1]
y.end <- dates.fsubs$dotw[nrow(dates.fsubs)]
dates.len <- nrow(dates.fsubs)
adj.start <- dates.fsubs$woty[1]
for (k in 0:6) {
if (k < y.start) {
x.start <- adj.start + 0.5
} else {
x.start <- adj.start - 0.5
}
if (k > y.end) {
x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] - 0.5
} else {
x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] + 0.5
}
grid.lines(x = c(x.start, x.finis), y = c(k -0.5, k - 0.5),
default.units = "native", gp=gpar(col = "grey", lwd = 1))
}
if (adj.start < 2) {
grid.lines(x = c( 0.5, 0.5), y = c(6.5, y.start-0.5),
default.units = "native", gp=gpar(col = "grey", lwd = 1))
grid.lines(x = c(1.5, 1.5), y = c(6.5, -0.5), default.units = "native",
gp=gpar(col = "grey", lwd = 1))
grid.lines(x = c(x.finis, x.finis),
y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
gp=gpar(col = "grey", lwd = 1))
if (dates.fsubs$dotw[dates.len] != 6) {
grid.lines(x = c(x.finis + 1, x.finis + 1),
y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
gp=gpar(col = "grey", lwd = 1))
}
grid.lines(x = c(x.finis, x.finis),
y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
gp=gpar(col = "grey", lwd = 1))
}
for (n in 1:51) {
grid.lines(x = c(n + 1.5, n + 1.5),
y = c(-0.5, 6.5), default.units = "native", gp=gpar(col = "grey", lwd = 1))
}
x.start <- adj.start - 0.5
if (y.start > 0) {
grid.lines(x = c(x.start, x.start + 1),
y = c(y.start - 0.5, y.start - 0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start + 1, x.start + 1),
y = c(y.start - 0.5 , -0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start, x.start),
y = c(y.start - 0.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
if (y.end < 6 ) {
grid.lines(x = c(x.start + 1, x.finis + 1),
y = c(-0.5, -0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start, x.finis),
y = c(6.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
} else {
grid.lines(x = c(x.start + 1, x.finis),
y = c(-0.5, -0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start, x.finis),
y = c(6.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
}
} else {
grid.lines(x = c(x.start, x.start),
y = c( - 0.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
}
if (y.start == 0 ) {
if (y.end < 6 ) {
grid.lines(x = c(x.start, x.finis + 1),
y = c(-0.5, -0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start, x.finis),
y = c(6.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
} else {
grid.lines(x = c(x.start + 1, x.finis),
y = c(-0.5, -0.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.start, x.finis),
y = c(6.5, 6.5), default.units = "native",
gp=gpar(col = "black", lwd = 1.75))
}
}
for (j in 1:12) {
last.month <- max(dates.fsubs$seq[dates.fsubs$month == j])
x.last.m <- dates.fsubs$woty[last.month] + 0.5
y.last.m <- dates.fsubs$dotw[last.month] + 0.5
grid.lines(x = c(x.last.m, x.last.m), y = c(-0.5, y.last.m),
default.units = "native", gp=gpar(col = "black", lwd = 1.75))
if ((y.last.m) < 6) {
grid.lines(x = c(x.last.m, x.last.m - 1), y = c(y.last.m, y.last.m),
default.units = "native", gp=gpar(col = "black", lwd = 1.75))
grid.lines(x = c(x.last.m - 1, x.last.m - 1), y = c(y.last.m, 6.5),
default.units = "native", gp=gpar(col = "black", lwd = 1.75))
} else {
grid.lines(x = c(x.last.m, x.last.m), y = c(- 0.5, 6.5),
default.units = "native", gp=gpar(col = "black", lwd = 1.75))
}
}
}
# major plot ------------------------------------------------------------------
p <- levelplot(value~woty*dotw | yr, data=dat, border = "black",
layout = c(1, nyr%%7),
col.regions = calendar.pal(ncolors),
aspect='iso',
between = list(x=0, y=c(1,1)),
strip=TRUE,
main =varname,
panel = function(x,y,z,...) {
panel.levelplot(x,y,z,...)
ll <- list(...)
at = pretty(dat$pvalue)
ind.pch <- cut(dat$pvalue, at, include.lowest = TRUE, labels = FALSE)
pch.symbols <- pch.symbol[ind.pch]
subscripts <- ll$subscripts
x <- x[subscripts]
y <- y[subscripts]
pch.symbols <- pch.symbols[subscripts]
panel.xyplot(x, y, fill =col.symbol ,col.symbol =col.symbol,
pch = pch.symbols, cex=cex.symbol,
, ...)
calendar.division(...)
},
scales= scales,
xlim =extendrange(dat$woty,f=0.01),
xlab="",ylab="",
ylim=extendrange(dat$dotw,f=0.1),
cuts= ncolors - 1,
colorkey= list(col = calendar.pal(ncolors), width = 0.6, height = 0.5),
subscripts=TRUE,
par.settings = calendar.theme)
p
}
##----------- example---------------------------------------------------
p1 <- extra.calendarHeat(dates= mydatamile$X_startDate, values = mydatamile$n,
varname="How many miles I walked each day", color="r2b", pch.symbol=15)
library(ggplot2)
library(lubridate)
library(dplyr)
library(magittr)
mydata <- read.csv("convertcsv.csv")
mydatacount <- subset(mydata, X_unit=="count")
mydatacount$X_startDate <- lubridate::as_datetime(mydatacount$X_startDate)
mydatacount$X_value <- as.numeric(mydatacount$X_value)
mydatahourly <- mydatacount %>%
group_by(rounded_hour = floor_date(X_startDate, unit="hour")) %>%
summarise(hourly_sum=sum(as.numeric(as.character(X_value))))
mydatacount$hour <- hour(mydatacount$X_startDate)
mydatacount$minute <- minute(mydatacount$X_startDate)
mydatacount$month <- month(mydatacount$X_startDate)
mydatacount$year <- year(mydatacount$X_startDate)
mydatacount <- subset(mydatacount, year==2017)
mydatacount$bin <- 0
mydatacount$bin[mydatacount$minute <= 29 & mydatacount$minute >= 0] <- 1
mydatacount$bin[mydatacount$minute <= 59 & mydatacount$minute >= 30] <- 2
mydatacount$degree <- mydatacount$bin * (mydatacount$hour+1)
graph_data <- mydatacount %>%
group_by(hour, month) %>%
summarise(n=sum(as.numeric(as.character(X_value))))
graph_data$mm <- 0
graph_data$mm[graph_data$month == 1] <- "Jan"
graph_data$mm[graph_data$month == 2] <- "Feb"
graph_data$mm[graph_data$month == 3] <- "Mar"
graph_data$mm[graph_data$month == 4] <- "Apr"
graph_data$mm[graph_data$month == 5] <- "May"
graph_data$mm[graph_data$month == 6] <- "Jun"
graph_data$mm[graph_data$month == 7] <- "Jul"
graph_data$mm[graph_data$month == 8] <- "Aug"
graph_data$mm[graph_data$month == 9] <- "Sept"
graph_data$mm[graph_data$month == 10] <- "Oct"
graph_data$mm[graph_data$month == 11] <- "Nov"
graph_data$mm <- ordered(graph_data$mm, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sept", "Oct", "Nov"))
averages <- graph_data %>%
group_by(hour) %>%
summarise(n=mean(n))
circle1 <- ggplot(data=graph_data, aes(x=hour, y=n))+
geom_area(data=averages,aes(x=hour,y=n), color="grey70", fill="grey70")+
geom_line()+
theme_bw()+
facet_wrap(~mm, nrow=3)+
geom_vline(xintercept=12, linetype="dashed", color="grey30")+
theme(strip.background = element_blank(),
panel.grid.minor.x=element_blank(),
panel.grid.minor.y=element_blank(),
panel.border=element_rect(color="grey30"))+
scale_x_continuous(breaks=c(0,3,6,9,12,15,18,21), labels=c("0","","6 am","","12","","6 pm",""))+
scale_y_continuous(breaks=c(0,15000,30000,45000), labels=c("0","15,000", "30,000", "45,000"))+
ggtitle("Number of steps I took at each hour of the day, Jan-Nov 2017")+
ylab("")+
xlab("")
print(circle1)
require(dplyr)
mydatamile <- mydata %>%
filter(X_unit=="mi")
require(lubridate)
mydatamile$X_startDate <- as_datetime(mydatamile$X_startDate)
mydatamile$X_startDate <- floor_date(mydatamile$X_startDate, unit="day")
mydatamile <- mydatamile %>%
group_by(X_startDate) %>%
summarise(n=sum(as.numeric(as.character(X_value))))
ozone <- read.csv("indexed_data.csv")
ozone_data <- ozone %>%
filter(age_group == "total") %>%
filter(Pollutant == "NO2") %>%
group_by(date, provinces) %>%
summarise(ozone = mean(Concentration), temp = mean(Temp_C))
ozone_data$date <- as.Date(ozone_data$date, format='%Y-%m-%d')
ozone_data <- ozone_data %>%
arrange(date)
ozone_data <- ozone_data[0:2862,]
ozone_data <- subset(ozone_data, !(provinces %in% c("Malaga", "Murcia")))
dateplot <- ggplot(data=ozone_data, aes(x=date, y=ozone))+
geom_line(aes(group=provinces), alpha=0.2, color="grey30")+
geom_line(data=subset(ozone_data, provinces=="Madrid"), color="red")+
theme_bw()+
ylab(expression(paste("Nitrogen dioxide ", ug/m^3)))+
xlab("")+
theme(panel.grid.minor.y=element_blank(),
legend.title=element_blank(),
legend.position=c(0.75,1.06),
panel.grid.major.y=element_blank(),
panel.grid.minor.x=element_blank(),
axis.ticks = element_line(color="grey90"))+
scale_x_date(expand=c(0,0), date_breaks = "1 month", date_labels = "%b")+
ggtitle("Nitrogen dioxide concentrations are higher in Madrid than in the rest of Spain")
min <- loess(employment$P25th/1000 ~ employment$ShareWomen)$fitted
max <- loess(employment$P75th/1000 ~ employment$ShareWomen)$fitted
plot1 <- ggplot(data=employment, aes(x=ShareWomen, y=Median/1000))+
geom_point(alpha=0.9, aes(color=science.other))+
geom_smooth(se=FALSE, color="grey50")+
geom_smooth(aes(x=ShareWomen, y=P25th/1000), se=FALSE, color="grey85")+
geom_smooth(aes(x=ShareWomen, y=P75th/1000), se=FALSE, color="grey85")+
geom_ribbon(aes(ymin=min, ymax=max), alpha=0.08)+
theme_bw()+
xlab("Proportion of women")+
ylab("Income (1,000 USD)")+
ggtitle("Higher earning college majors are disproportionately men")+
scale_color_brewer(palette="Set1")+
theme(panel.grid.minor=element_blank(),
panel.grid.major.x=element_blank(),
legend.title = element_blank(),
legend.position = c(0.85, 0.8),
legend.background=element_blank(),
legend.text = element_text(size=10))+
scale_x_continuous(labels=c("0%", "25%", "50%", "75%","100%"))
plot3 <- ggplot(data=los.angeles, aes(x=date, y=actual_mean_temp))+
theme_bw()+
geom_linerange(aes(ymin=record_min_temp, ymax=record_max_temp, color="Record"), size=1.5)+
geom_linerange(aes(ymin=actual_min_temp, ymax=actual_max_temp, color="Actual"), size=1.5)+
geom_linerange(aes(ymin=average_min_temp, ymax=average_max_temp, color="Average"), size=1.5, alpha=0) +
geom_ribbon(aes(ymin=average_min_temp, ymax=average_max_temp), color="transparent", fill="grey50", size=0.5, alpha=0.5) +
geom_point(size=0.7, color="grey30")+
scale_colour_manual(name="x", labels=c("Actual", "Average", "Record"), values=c("grey70", "grey90", "bisque2"))+
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_line(linetype="dashed"),
legend.title=element_blank(),
legend.direction = "horizontal",
legend.position = c(0.18,0.05),
legend.background=element_rect(colour="transparent", fill="transparent"))+
scale_y_continuous(breaks=c(20,30,40,50,60,70,80,90,100,110,120))+
scale_x_date(expand=c(0, 0), date_breaks="1 month", date_labels = "%b") +
xlab("")+
ylab("Temperature (F)")+
ggtitle("2014 Weather in Los Angeles, CA")