# Identify municipal tax havens and fiscally impoverished jurisdictions

library(tidyverse)


dir.create('output/hilow')

# Bring in region labels
regions = read_csv('nonproprietary_data/census_regions/state-geocodes-v2016.csv') %>% select(
  st = `State (FIPS)`,region = Region
)

# Cutoff is 3x
threshold = 3

for(bigunit in c('cbsa','cz','csa')) { 
  
  for(smallunit in c('loc2','loc','school','stcnty')) { 

    # Import joined data from analysis_tfq. This has already been cleaned
    fudta = read_csv(paste0('output/tfq/tfq_fu/fu_big_',bigunit,'_', smallunit,'_tot.csv'))
    
    # If a jurisdiction is split across CBSAs, calculate total tax base per capita and use that
    fudta = fudta %>% group_by(st,fuint) %>% mutate(
      splitpopcen = sum(cenpop),
      splitpopcl = sum(clpop),
      splittot = sum(value_tot),
      splitnonres = sum(value_nonres),
      splittotpc = splittot/splitpopcl,
      splitnonrespc = splitnonres/splitpopcl,
    ) %>% ungroup()
    
    fudta = filter(fudta, !is.na(bigint),cleansample_val_tot == T)
    stopifnot(fudta$cleanflag == T)
    fudta = left_join(fudta, regions, by = 'st')
    
    # Get the fiscal capacity ratio
    fudta$fcr = fudta$splittotpc / fudta$bigpc
    
    # Identify tax havens and FIJs
    fudta = mutate(fudta, 
                   over1k = clpop >= 50000,
                   low_rel = fcr <= 1/threshold,
                   hi_rel = fcr >= threshold,

                   )
    
    # Proportion of value that's residential property
    fudta$respct = fudta$value_res/ fudta$value
    
    fudta %>% arrange(-fcr) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc = splittotpc, fcr, bigpc,respct,low_rel, hi_rel) %>% 
      write_csv(paste0('output/hilow/fu_all_',threshold,'_',bigunit,'_',smallunit,'.csv'))
    
    # Now export jurisdictions by metro 
    outjur = fudta %>% arrange(bigint,st,fuintname) %>% select(state = st, jurisdiction_code = fuint,jurisdiction_name = fuintname,bigint, bigname, pop = clpop, value_tot = value,value_pc = splittotpc,  bigpc,fcr,pct_residential = respct,fisc_impov = low_rel, tax_haven = hi_rel) 
    names(outjur)[4] = paste0(bigunit,'_code')
    names(outjur)[5] = paste0(bigunit,'_name')
    names(outjur)[9] = paste0('value_pc_',bigunit)
    if(smallunit == 'loc2') smallname = 'gen_purpose_govts'
    if(smallunit == 'loc') smallname = 'gen_purpose_govts_incl_townships'
    if(smallunit == 'stcnty') smallname = 'counties'
    if(smallunit == 'school') smallname = 'school_districts'
    
    outjur %>% write_csv(paste0('clean_data/jurisdiction_data_',bigunit,'_',smallname,'.csv'))
    
    
    # Export table of tax havens and of fisc impov (for these want to drop duplicates: 
      # If a jurisdiction is flagged in more than one CBSA, keep just the portion with the larger population
      # If a jurisdiction crosses CBSA lines but is only flagged in one, keep that portion
      
    fuhi = fudta %>% filter(hi_rel == T) %>% arrange(-clpop) %>% distinct(st,fuint,.keep_all = T) %>% arrange(-fcr) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    fulow = fudta %>% filter(low_rel == T) %>% arrange(-clpop) %>% distinct(st,fuint,.keep_all = T) %>% arrange(fcr) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    
    fuhi %>% write_csv(paste0('output/hilow/fu_hi_',threshold,'_',bigunit,'_',smallunit,'.csv'))
    fulow %>% write_csv(paste0('output/hilow/fu_lo_',threshold,'_',bigunit,'_',smallunit,'.csv'))
    
    # Export just the top 10  by FCR and population
    fuhival = fudta %>% filter(hi_rel == T) %>% arrange(-fcr) %>% distinct(st,fuint,.keep_all = T) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    fuhipop = fudta %>% filter(hi_rel == T) %>% arrange(-clpop) %>% distinct(st,fuint,.keep_all = T) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    fulowval = fudta %>% filter(low_rel == T) %>% arrange(fcr) %>% distinct(st,fuint,.keep_all = T) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    fulowpop = fudta %>% filter(low_rel == T) %>% arrange(-clpop) %>% distinct(st,fuint,.keep_all = T) %>% select(bigint,metro = bigname, fuint,fuintname, clpop, value,valpc,bigpc, fcr,respct)
    
    fuhiexp = bind_rows(fuhival[1:10,], fuhipop[1:10,])
    fuloexp = bind_rows(fulowval[1:10,], fulowpop[1:10,])
    
    fuhiexp %>% write_csv(paste0('output/hilow/fu_hi20_',threshold,'_',bigunit,'_',smallunit,'.csv'))
    fuloexp %>% write_csv(paste0('output/hilow/fu_lo20_',threshold,'_',bigunit,'_',smallunit,'.csv'))
    
    
  }
}
    