# Calculate property taxes as fraction of total state and local revenue by CBSA
# Allocate state spending per capita

library(tidyverse)

dir.create('output/fisc')

fst = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_StateData/StateData.csv')
fmuni = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_MunicipalData/MunicipalData.csv')
fsch = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_SchoolDistrictData/SchoolDistrictData.csv')
fsp = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_SpecialDistrictData/SpecialDistrictData.csv')
fcnty = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_CountyData/CountyData.csv')
ftwsp = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_TownshipData/TownshipData.csv')

# They include all years, but number of entries is way bigger in 2 or 7 years bc it's a Census
fmuni %>% count(Year4)

# Just look at 2017
fmuni = filter(fmuni, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'Municipal')
fst = filter(fst, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'State')
fsch = filter(fsch, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'School')
fsp = filter(fsp, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'Special District')
fcnty = filter(fcnty, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'County')
ftwsp = filter(ftwsp, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'Township')


# Bring in CBSAs
cbsa = read.csv('nonproprietary_data/msa_crosswalks/list1_2020.csv')

# Clean up CBSA
cbsa = mutate(cbsa, st = str_pad(FIPS.State.Code, 2,pad = '0'),
              cnty = str_pad(FIPS.County.Code, 3, pad = '0'),cbsa = str_pad(CBSA.Code,5,pad = '0'),
              csa = str_pad(CSA.Code, 3,pad = '0'),
              micro = Metropolitan.Micropolitan.Statistical.Area == 'Micropolitan Statistical Area') %>% select(st,cnty,cbsa,csa,micro)


# Number we want is total revenue minus transfers out = no double counting. Measure transfers out as local + state transfers in
# Second number we want is total state + local own source = don't have to worry about double counting, loses federal trasnfers

fst$denom_tot = fst$Total_Revenue - fst$Total_State_IG_Revenue - fst$Tot_Local_IG_Rev
fst$denom_own = fst$Total_Rev_Own_Sources

fst = mutate(fst, across(all_of(c('denom_tot','denom_own','Property_Tax')),~ .x / Population, .names = 'pc_{.col}'))

fns = bind_rows(fmuni,fsp,fcnty,ftwsp,fsch)

fns$denom_tot = fns$Total_Revenue - fns$Total_State_IG_Revenue - fns$Tot_Local_IG_Rev
fns$denom_loc_all = fns$Total_Revenue - fns$Tot_Local_IG_Rev # All local government (but excluding state)
fns$denom_own = fns$Total_Rev_Own_Sources


# Aggregate to county level based on the assigned codes
# First fix CT, which is using the councils of govts already

fns = fns %>% mutate(
  cnty = case_when(!(FIPS_Code_State == '09') ~ FIPS_County,
                   FIPS_Code_State == '09' ~ case_when(
                     FIPS_County == '110' ~ '003', # Hartford and New Britain
                     FIPS_County == '120' ~ '001',
                     FIPS_County == '130' ~ '007', # Middletown
                     FIPS_County == '140' ~ '009', # Waterbury, bristol
                     FIPS_County == '150' ~ '015', # NE corner
                     FIPS_County == '160' ~ '005', #Litchfield
                     FIPS_County == '170' ~ '009', #New Haven
                     FIPS_County == '180' ~ '011', # New London
                     FIPS_County == '190' ~ '001', # Stamford
                     
                   ))
)

# Make type
fns$gtype2 = case_when(fns$gtype %in% c('Municipal','Township','County') ~ 'Locality',
                       fns$gtype == 'School' ~ 'School',
                       .default = 'Other')

fisc = fns %>% group_by(FIPS_Code_State, cnty) %>% summarise(
  pt_sch = sum(Property_Tax * (gtype2 == 'School')),
  pt_gp = sum(Property_Tax * (gtype2 == 'Locality')),
  pt_oth = sum(Property_Tax * (gtype2 == 'Other')),
  across(all_of(c('denom_tot','denom_loc_all','denom_own','Property_Tax')) , ~ sum(.x, na.rm = T))
)

# Add on cbsas
fisc = left_join(fisc, cbsa, by = c('FIPS_Code_State'= 'st', 'cnty'))

# Now need population
cpop = read_csv('nonproprietary_data/nhgis/nhgis0053_csv/nhgis0053_ds248_2020_county.csv')
cpop = select(cpop, st = STATEA, cnty = COUNTYA, pop = U7B001)

fisc = left_join(fisc, cpop, by = c('FIPS_Code_State'= 'st', 'cnty'))
sum(fisc$pop, na.rm = T)
filter(fisc, is.na(pop)) # One thing in Kansas with $11M, ok to drop

# Add in state revenues per capita - do this after aggregating
fisc = left_join(fisc, fst %>% select(FIPS_Code_State, starts_with('pc_')), by = 'FIPS_Code_State')

for(varint in c('denom_tot','denom_own','Property_Tax')){
  fisc[,paste0(varint,'_wst')] = fisc[,varint] + fisc$pop * fisc[,paste0('pc_',varint)]
}

# Now, aggregate to metro and compute ratios 

fiscmsa = fisc %>% group_by(cbsa) %>% summarise(
  across(all_of(c('pop','denom_tot_wst','denom_loc_all','denom_own_wst','Property_Tax_wst')),~sum(.x, na.rm = T))

  )

fiscmsa = mutate(fiscmsa, 
                 across(all_of(c('denom_tot_wst','denom_own_wst','denom_loc_all')),~ Property_Tax_wst / .x, .names = 'ptr_{.col}'),
)

write_csv(fiscmsa, 'output/fisc/fisc_cbsa.csv')
