# Create graphs comparing the baseline specification to alternative specifications for the appendix

library(tidyverse); theme_set(theme_bw())
library(scales)
library(ggrepel)
options(tibble.width = Inf)
library(weights)


dir.create('output/sc_robustness')

## Make graphs of main specification vs loc, vs school, vs all property
# Baseline
    bigunit = 'cbsa'
    smallunit = 'loc2'
    
    tfq_bl = read_csv(paste0('output/tfq/tfq_',bigunit, '_', smallunit,'.csv')) %>% filter(!is.na(bigname))
    
# Bring in school districts
    smallunit = 'school'
    tfq_sch = read_csv(paste0('output/tfq/tfq_',bigunit, '_', smallunit,'.csv')) %>% filter(!is.na(bigname)) %>% 
      select(bigint,tfq_val_tot,govts100k_tot,tfq_val_res)

# Bring in version including townships   
    smallunit = 'loc'
    tfq_loc = read_csv(paste0('output/tfq/tfq_',bigunit, '_', smallunit,'.csv')) %>% filter(!is.na(bigname)) %>% 
      select(bigint,tfq_val_tot,govts100k_tot,tfq_val_res)
    
# Join
    tfq = left_join(tfq_bl, tfq_sch, by = 'bigint',suffix = c('','_sch'))
    tfq = left_join(tfq, tfq_loc, by = 'bigint',suffix = c('','_loc'))
    
    
    # Make scatter plots
    
    # Baseline vs residential value 
    tfq$dif = abs(tfq$tfq_val_tot - tfq$tfq_val_res)
    pdf(paste0('output/sc_robustness/fig_a322_sc_',bigunit,'_bl_vs_res.pdf'),height = 4.5) 
    gplt = ggplot(tfq, aes(x = tfq_val_tot, y = tfq_val_res, size = pop_tot)) + 
      geom_point(alpha = .2) + 
      geom_abline(lty = 2, alpha = .4) + 
      geom_text_repel(data = filter(tfq,(dif > .2 & pop_tot > 25000) | (dif > .04 & pop_tot > 700000) ), aes(label = bigname),
                      size = 1.45,segment.size = .15,seed = 1234 ,min.segment.length = .1) +
      scale_y_continuous(limits = c(0,.6), name = 'TFQ - residential property') + 
      scale_x_continuous(limits = c(0,.6), name = 'TFQ - all property (baseline)') + 
      scale_size_continuous(range = c(0,6), labels = comma, name = 'Population')
    print(gplt)
    dev.off()
    
    # Corrleation
    wtd.cor(tfq$tfq_val_tot, tfq$tfq_val_res, weight = tfq$pop_tot) #0.7538726
    
    
    # Baseline vs schools
    tfq$dif = abs(tfq$tfq_val_tot - tfq$tfq_val_tot_sch)
    pdf(paste0('output/sc_robustness/fig_a312_sc_',bigunit,'_bl_vs_sch.pdf'),height = 4.5) 
    gplt = ggplot(tfq, aes(x = tfq_val_tot, y = tfq_val_tot_sch, size = pop_tot)) + 
      geom_point(alpha = .2) + 
      geom_abline(lty = 2, alpha = .4) + 
      geom_text_repel(data = filter(tfq,(dif > .2 & pop_tot > 20000) | (dif > .04 & pop_tot > 500000)), aes(label = bigname),
                      size = 1.45,segment.size = .15,seed = 1234 ,min.segment.length = .1) +
      scale_x_continuous(limits = c(0,.7), name = 'TFQ across general purpose governments (baseline)') + 
      scale_y_continuous(limits = c(0,.7), name = 'TFQ across school districts') + 
      scale_size_continuous(range = c(0,6), labels = comma, name = 'Population')
    print(gplt)
    dev.off()
    
    wtd.cor(tfq$tfq_val_tot, tfq$tfq_val_tot_sch, weight = tfq$pop_tot) #0.6965374
    
    
    # Baseline vs loc
    tfq$dif = abs(tfq$tfq_val_tot - tfq$tfq_val_tot_loc)
    pdf(paste0('output/sc_robustness/fig_a332_sc_',bigunit,'_bl_vs_loc.pdf'),height = 4.5) 
    gplt = ggplot(tfq, aes(x = tfq_val_tot, y = tfq_val_tot_loc, size = pop_tot)) + 
      geom_point(alpha = .2) + 
      geom_abline(lty = 2, alpha = .4) + 
      geom_text_repel(data = filter(tfq,dif > .07 ),  aes(label = bigname),
                      size = 1.45,segment.size = .15,seed = 1234 ,min.segment.length = .1) +
      scale_x_continuous(limits = c(0,.7), name = 'TFQ - baseline') + 
      scale_y_continuous(limits = c(0,.7), name = 'TFQ - including all townships') + 
      scale_size_continuous(range = c(0,6), labels = comma, name = 'Population')
    print(gplt)
    dev.off()
    
    wtd.cor(tfq$tfq_val_tot, tfq$tfq_val_tot_loc, weight = tfq$pop_tot) #0.9930648
    