# Make scatterplots of TFQ versus reliance on property taxes

library(tidyverse) ; theme_set(theme_bw()); options(tibble.width = Inf)
library(ggrepel)
library(scales)

dir.create('output/sc_proptax_reliance')

# Bring in TFQ and property tax reliance data
tfq = read_csv('output/tfq/tfq_cbsa_loc2.csv')
fisc = read_csv('output/fisc/fisc_cbsa.csv')

# Join
dta = left_join(tfq, fisc, by = c('bigint'='cbsa') ,suffix = c('','_fisc') )
sum(is.na(dta$pop)) # 0

dta = filter(dta, !is.na(bigint))

# Loop through different variables
for(v1 in c('tfq_val_tot')){
  for(v2 in c('ptr_denom_tot_wst','ptr_denom_own_wst','ptr_denom_loc_all')){
    
    dta$v1int = dta %>% select(all_of(v1)) %>% unlist()
    dta$v2int = dta %>% select(all_of(v2)) %>% unlist()
    
    upperlim = 1
    if(v2 %in% c('ptr_denom_tot_wst')) upperlim = .3
    
    namesdat = filter(dta, pop > 2000000)
    
    # Make graphs
    pdf(paste0('output/sc_proptax_reliance/sc_',v1,'_',v2,'.pdf'),height = 5)
    gplt = ggplot(dta, aes(x = v2int, y = v1int, size = pop)) + 
      geom_point(alpha = .2) + 
      geom_text_repel(data = namesdat, aes(label = bigname), size = 1.5) +
      scale_size_continuous(range = c(0,6), labels = comma) +
      scale_x_continuous(name = v2, limits = c(0,upperlim)) + 
      scale_y_continuous(name = v1) + 
    ggtitle(paste(v1,v2))
    print(gplt)
    dev.off()
    
    
  }
}


# Make final graph for the appendix
namesdat = filter(dta, pop > 4000000 | (tfq_val_tot > .25 & pop > 300000) | (tfq_val_tot < .1 & pop > 800000))

# Make tfq tot vs total
pdf(paste0('output/sc_proptax_reliance/sc_clean_tfq_totwst.pdf'),height = 5)
gplt = ggplot(dta, aes(x = ptr_denom_tot_wst, y = tfq_val_tot, size = pop)) +
  geom_point(alpha = .1) +
  geom_text_repel(seed = 123,data = namesdat, aes(label = bigname), size = 1.5,segment.size = .3,min.segment.length = .1) +
  scale_size_continuous(range = c(0,6), labels = comma, name = 'Population, 2020') +
  scale_x_continuous(name = 'Fraction of all state and local revenue from property taxes, 2017', limits = c(0,.4), labels = percent) +
  scale_y_continuous(name = 'TFQ, 2019', limits = c(0,.36))
print(gplt)
dev.off()
