#Take output of sigma analysis for BEA and Census data and make graphs of
#Family income analysis to be included in final paper

library(dplyr)
library(Hmisc)
library(maps)
library(stringr)
library(RColorBrewer)
library(scales)
library(readstata13)
library(ggplot2)
library(reshape2)
library(ggthemes)

setwd('~/projects/regionineq/')

#Make clean graphs directory
dir.create('output/sigmagraphs')

#Read in BEA results 
gdp = read.csv('output/bea/cz/devstats/bea_cz_devstats.csv')
gdpgraph = gdp %>% select(year,wsd,wsdlog,wiqr,wd19) %>% melt(id.vars = 'year')
# gdpgraph = filter(gdpgraph, variable != 'wiqr')

#Graph of multiple measures of sigma divergence for BEA, relative to 1980
#For supplement
gdpgraph$variable = factor(gdpgraph$variable, levels = c('wsd','wsdlog','wiqr','wd19'),labels = c('Coef. of variation','SD of log','Inter-quartile range','10-90 range'))
gdpg80 = filter(gdpgraph, year == 1980)
gdpgraph = merge(gdpgraph, gdpg80, by = 'variable',suffixes = c('','_start'))
gdpgraph$relval = gdpgraph$value / gdpgraph$value_start

#Output figure for Appendix 1
pdf('output/sigmagraphs/sigma_bea.pdf',height = 6)
gplt = ggplot(gdpgraph, aes(x = year,y = relval,lty = variable)) +
  geom_line() +
  theme_bw() +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = 'Year') +
  scale_y_continuous(label = percent, limits = c(0,max(gdpgraph$relval)*1.1),name = 'Value relative to 1980') +
  theme(legend.position = 'bottom') +
  guides(linetype = guide_legend(nrow = 2), color = guide_legend(nrow = 2))

  # ggtitle('A: Measures of sigma divergence in per capita personal income\nacross Commuting Zones since 1969, BEA data')
print(gplt)
dev.off()

#Now do family income from IPUMS - Figure 3
faminc = read.csv('output/ipums/cz/devstats/dev_cz.csv') %>% filter(samp == 'fam') %>% select(year,stat,wsd,wsdlog,wd19,wiqr)
famgr = melt(faminc,id.vars = c('year','stat'))
# famgr = filter(famgr, variable != 'wiqr')
famgr$stat = factor(famgr$stat, levels = c('mean','median'),labels = c('Mean','Median')) #levels = c('median','mean'),labels = c('Median','Mean'))
famgr$sort = as.numeric(as.character(factor(famgr$stat, levels = c('Mean','Median'),labels = 1:2)))

famgr$variable = factor(famgr$variable, levels = c('wsd','wsdlog','wiqr','wd19'),labels = c('Coef. of variation','SD of log','Inter-quartile range','10-90 range'))
fam80 = filter(famgr,year == 1980)
famgr = merge(famgr,fam80,by = c('stat','variable'), suffixes = c('','_start'))
famgr$relval = famgr$value / famgr$value_start
famgr = arrange(famgr, -sort)

pdf('output/sigmagraphs/sigma_ipums.pdf',height = 6)
gplt = ggplot(famgr, aes(x = year,y = relval,lty = variable,color = stat)) +
  geom_line() +
  geom_point() +
  theme_bw() +
  # scale_
  # scale_alpha_discrete(range = c(0.3,1),name = '') +
  scale_color_manual(values = c('black','gray'),name ='') +
  # scale_poin
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = 'Year') +
  scale_y_continuous(label = percent, limits = c(0,max(famgr$relval)*1.1),name = 'Value relative to 1980') +
  theme(legend.position = 'bottom') +
  guides(linetype = guide_legend(nrow = 2), color = guide_legend(nrow = 2))
  # ggtitle('Measures of sigma divergence in family income\nacross Commuting Zones since 1980')
print(gplt)
dev.off()
