# Run regressions of revenue by source on tax base

library(tidyverse); options(tibble.width = Inf); theme_set(theme_bw())
library(scales)
library(stargazer)
library(lm.beta)
library(lfe)
library(Hmisc)


# Bring in just 
fst = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_StateData/StateData.csv')
fmuni = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_MunicipalData/MunicipalData.csv') 
fsch = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_SchoolDistrictData/SchoolDistrictData.csv')
fsp = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_SpecialDistrictData/SpecialDistrictData.csv')
fcnty = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_CountyData/CountyData.csv')
ftwsp = read_csv('nonproprietary_data/TheGovernmentFinanceDatabase/TheGovernmentFinanceDatabase_TownshipData/TownshipData.csv')

fst = filter(fst, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),gtype = 'State')
fsch = filter(fsch, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),futype = 'School')
fsp = filter(fsp, Year4 == 2017) %>% mutate(FIPS_Place = as.character(FIPS_Place),futype = 'Special District')

# Just look at 2017
fmuni = filter(fmuni, Year4 == 2017) %>% mutate(fuint = as.character(FIPS_Place),FIPS_Place = as.character(FIPS_Place),futype = 'Place')
fcnty = filter(fcnty, Year4 == 2017) %>% mutate(fuint = as.character(FIPS_Place) %>% substr(3,5),FIPS_Place = as.character(FIPS_Place),futype = 'County')
ftwsp = filter(ftwsp, Year4 == 2017) %>% mutate(fuint = paste0(as.character(FIPS_County),'_',as.character(FIPS_Place)),FIPS_Place = as.character(FIPS_Place),futype = 'CSD')

# Bring in FUs
fudta = read_csv('output/tfq/tfq_fu/fu_big_cbsa_loc2_tot.csv') %>% filter(cleansample_val_tot == T)

fudta %>% count(futype)

# Modify CT CSDs to not have county
fudta = fudta %>% mutate(
  fuint_join = case_when(
    st == '09' & futype == 'CSD' ~ substr(fuint,5,9),
    !(st == '09' & futype == 'CSD') ~ fuint
  )
)

# Assign split FUs to CBSA with most people
fudta = fudta %>% group_by(st,fuint) %>% mutate(
  splitpopcl = sum(clpop),
  splittot = sum(value_tot),
  splittotpc = splittot/splitpopcl,
) %>% ungroup()

fudta = fudta %>% arrange(st,fuint,-clpop) %>% distinct(st,fuint,.keep_all = T)

# Get county level FU data 
fucnty = read_csv('output/tfq/tfq_fu/fu_big_cbsa_stcnty_tot.csv') %>% filter(cleansample_val_tot == T)

fucnty %>% count(futype)

fucnty = fucnty %>% mutate(
  fuint_join = case_when(
    st == '09' & futype == 'CSD' ~ substr(fuint,5,9),
    !(st == '09' & futype == 'CSD') ~ fuint
  ),
  cty = substr(fuint,3,5)
)


# Look at totals
sum(fudta$tax_tot) #576623401933 - 575896670517 after exempt
sum(fsp$Property_Tax) #21770446 # about 4%. 
sum(fsch$Property_Tax) #212073880 # about 37%. Not cou
sum(fst$Property_Tax) #19038089 # about 3%. Not cou
# So in total we get to 527774027000 / 576623401933 = 91%, and this is for 2017 not 2019



fisc = bind_rows(fmuni,fcnty,ftwsp) %>% select(futype,fuint ,FIPS_County, st = FIPS_Code_State, Name,Population,Total_Revenue,Total_Rev_Own_Sources,Gen_Rev_Own_Sources ,Total_Taxes,Property_Tax,Tot_Sales___Gr_Rec_Tax,Tot_Local_IG_Rev,
                                               Total_IG_Revenue,Total_Fed_IG_Revenue,Total_State_IG_Revenue,Tot_Chgs_and_Misc_Rev,Misc_General_Revenue,Fines_and_Forfeits,Total_Revenue,starts_with('Fed_IGR'),starts_with('State_IGR'),
                                               Total_General_Charges,Net_Lottery_Revenue,Interest_on_Gen_Debt,Total_Debt_Outstanding,Total_Long_Term_Debt_Out,Total_Interest_on_Debt,Total_Select_Sales_Tax,Total_Gen_Sales_Tax,Total_License_Taxes,Total_Income_Taxes,Misc_General_Rev_NEC,Prop_Sale_Total,Special_Assessments,Interest_Revenue,Rents_and_Royalties,Water_Utility_Revenue,Gas_Utility_Rev,Electric_Utility_Rev,Transit_Utility_Rev,
                                               Chg_Total_Education,Chg_Hospitals,Chg_Highways,Chg_Total_Nat_Res,Chg_Parking,Chg_Sewerage,Chg_Solid_Waste_Mgmt,Chg_Water_Transport,Chg_All_Other_NEC,Chg_Air_Transportation,Chg_Housing___Comm_Dev,Chg_Parking,Chg_Parks___Recreation,Total_Utility_Revenue,Total_Insur_Trust_Rev,Liquor_Stores_Revenue)
sum(fisc$Property_Tax) #274891612 # about 48% of amount in taxes paid column of CoreLogic 

# Check totals in Census of Governments compared to taxes paid in 
274891612 + 21770446+ 212073880 + 19038089
527774027 * 1000 / 576623401933 #0.9152837


# Make other categories for tabel
fisc$fed_other = fisc$Total_Fed_IG_Revenue - fisc$Fed_IGR_Hous_Com_Dev - fisc$Fed_IGR_Transit_Sub
fisc$tax_other = fisc$Total_License_Taxes + fisc$Total_Income_Taxes
fisc$util_other = fisc$Gas_Utility_Rev + fisc$Transit_Utility_Rev
fisc$state_trans = fisc$State_IGR_Transit_Sub + fisc$State_IGR_Highways
fisc$misstategen = is.na(fisc$State_IGR_Gen_Sup)
fisc[is.na(fisc$State_IGR_Gen_Sup),'State_IGR_Gen_Sup'] = 0 # If missing state general transfers replace with 0
fisc$state_other = fisc$Total_State_IG_Revenue - fisc$State_IGR_Education - fisc$State_IGR_Gen_Sup - fisc$State_IGR_Public_Welf - fisc$state_trans
fisc$ch_other = fisc$Total_General_Charges - fisc$Chg_Air_Transportation - fisc$Chg_Sewerage - fisc$Chg_Solid_Waste_Mgmt - fisc$Chg_Hospitals
fisc$utch_other = fisc$util_other + fisc$ch_other
fisc$misc_other = fisc$Misc_General_Revenue - fisc$Fines_and_Forfeits - fisc$Interest_Revenue - fisc$Special_Assessments + fisc$Liquor_Stores_Revenue + fisc$Total_Insur_Trust_Rev
fisc$misc_tot = fisc$Misc_General_Revenue + fisc$Liquor_Stores_Revenue + fisc$Total_Insur_Trust_Rev
fisc$ch_plus_util = fisc$Total_General_Charges + fisc$Total_Utility_Revenue

# Format variables for regression
fisc = fisc %>% mutate(across(c(Total_Revenue,Total_Rev_Own_Sources,Gen_Rev_Own_Sources ,Total_Taxes,Property_Tax,Tot_Sales___Gr_Rec_Tax,Tot_Local_IG_Rev,
                                Total_IG_Revenue,Total_Fed_IG_Revenue,Total_State_IG_Revenue,Tot_Chgs_and_Misc_Rev,Misc_General_Revenue,Fines_and_Forfeits, starts_with('Fed_IGR'),starts_with('State_IGR'),Total_General_Charges,Net_Lottery_Revenue,Interest_on_Gen_Debt,Total_Debt_Outstanding,Total_Long_Term_Debt_Out,Total_Interest_on_Debt,Water_Utility_Revenue,Gas_Utility_Rev,Electric_Utility_Rev,Transit_Utility_Rev,
                                Total_Select_Sales_Tax,Total_Gen_Sales_Tax,Total_License_Taxes,Total_Income_Taxes,Misc_General_Rev_NEC,Prop_Sale_Total,Special_Assessments,Interest_Revenue,Rents_and_Royalties,
                                Chg_Total_Education,Chg_Hospitals,Chg_Highways,Chg_Total_Nat_Res,Chg_Parking,Chg_Sewerage,Chg_Solid_Waste_Mgmt,Chg_Water_Transport,Chg_All_Other_NEC,Chg_Air_Transportation,Chg_Housing___Comm_Dev,Chg_Parking,Chg_Parks___Recreation,
                                fed_other,State_IGR_Gen_Sup,state_trans,state_other,ch_other,misc_other,Total_Utility_Revenue,ch_plus_util,tax_other,utch_other,util_other,Total_Insur_Trust_Rev,Liquor_Stores_Revenue,misc_tot),
                                .fns = list(pct =  ~ .x / Total_Revenue ,
                                          lpct = ~ if(all(Total_Revenue == 0)) 0 else log((.x+1)/ Total_Revenue),
                                          pc = ~ (1000* .x) / Population,
                                          ln = ~ case_when(
                                            .x > 0 ~ log((.x) / Population),
                                            .x <= 0 ~ log((1) / Population))) , .names = '{.fn}_{.col}'),
                       stcnty = paste0(st,FIPS_County))


# Get total IG revenue
fisc %>% summarise(
  across(c(Total_Revenue,Total_Rev_Own_Sources,Total_IG_Revenue,Total_Fed_IG_Revenue,Total_State_IG_Revenue),sum)
)

fsch %>% summarise(
  across(c(Total_Revenue,Total_Rev_Own_Sources,Total_IG_Revenue,Total_Fed_IG_Revenue,Total_State_IG_Revenue),sum)
)
fsp %>% summarise(
  across(c(Total_Revenue,Total_Rev_Own_Sources,Total_IG_Revenue,Total_Fed_IG_Revenue,Total_State_IG_Revenue),sum)
)


# Manually clean some geographic identifiers
fisc = fisc %>% mutate(
  
  futype_join = case_when(
    st == '25' & fuint %in% c('017_24960','005_46598','021_55955','023_08085') ~ 'Place',
    .default = futype
  ),
  
  fuint_join = case_when(
    st == '09' & futype == 'CSD' ~ substr(fuint,5,9),
    st == '25' & fuint %in% c('017_24960','005_46598','021_55955' ) ~ substr(fuint,5,9),
    st == '25' & fuint %in% c('023_08085') ~ '08130',
    st == '25' & fuint %in% c('27100') ~ '27060',
    .default = fuint
  )
)

# Manually fix consolidated counties 
fisc[fisc$st == '18' & fisc$fuint_join == '36003', 'futype_join'] = 'County' # Indianapolis
fisc[fisc$st == '18' & fisc$fuint_join == '36003', 'fuint_join'] = '097'
fisc[fisc$st == '15' & fisc$fuint_join == '17000', 'futype_join'] = 'County' # Honolulu
fisc[fisc$st == '15' & fisc$fuint_join == '17000', 'fuint_join'] = '003'
fisc[fisc$st == '11' & fisc$fuint_join == '50000', 'futype_join'] = 'County' # DC
fisc[fisc$st == '11' & fisc$fuint_join == '50000', 'fuint_join'] = '001'
fisc[fisc$st == '47' & fisc$fuint_join == '52004', 'futype_join'] = 'County' # Nashville
fisc[fisc$st == '47' & fisc$fuint_join == '52004', 'fuint_join'] = '037'
fisc[fisc$st == '21' & fisc$fuint_join == '48003', 'futype_join'] = 'County' # Louisville
fisc[fisc$st == '21' & fisc$fuint_join == '48003', 'fuint_join'] = '111'
fisc[fisc$st == '22' & fisc$fuint_join == '05000', 'futype_join'] = 'County' # Baton Rouge
fisc[fisc$st == '22' & fisc$fuint_join == '05000', 'fuint_join'] = '055'
fisc[fisc$st == '22' & fisc$fuint_join == '40735', 'futype_join'] = 'County' # Lafayette
fisc[fisc$st == '22' & fisc$fuint_join == '40735', 'fuint_join'] = '033'
fisc[fisc$st == '13' & fisc$fuint_join == '04204', 'futype_join'] = 'County' # Richmond County GA
fisc[fisc$st == '13' & fisc$fuint_join == '04204', 'fuint_join'] = '245'
fisc[fisc$st == '13' & fisc$fuint_join == '03436', 'futype_join'] = 'County' # Clarke County GA
fisc[fisc$st == '13' & fisc$fuint_join == '03436', 'fuint_join'] = '059'
fisc[fisc$st == '22' & fisc$fuint_join == '36255', 'futype_join'] = 'County' # Houma/Terreboonne parish LA
fisc[fisc$st == '22' & fisc$fuint_join == '36255', 'fuint_join'] = '109'
fisc[fisc$st == '30' & fisc$fuint_join == '11390', 'futype_join'] = 'County' # Butte MT
fisc[fisc$st == '30' & fisc$fuint_join == '11390', 'fuint_join'] = '093'

filter(fisc, st == '25', fuint == '017_24960') %>% select(Name,futype,FIPS_County,Population,fuint_join,fuint) %>% print(n = 50)


# Merge on fiscal data
fufisc = left_join(fudta, fisc %>% select(-fuint,-futype), by = c('st','fuint_join','futype' = 'futype_join'))

sum(is.na(fufisc$Name)) #108 

# Counties
ctfisc = left_join(fucnty, fisc %>% select(-fuint,-futype), by = c('st','cty' = 'fuint_join','futype' = 'futype_join'))

sum(is.na(ctfisc$Name))  # 90 missing. These are mostly consolidated, or else NE counties with no legal existence 
filter(ctfisc, is.na(Name)) %>% select(bigname,fuintname,clpop,fuint) %>% arrange(-clpop)


# We have all but 108, and the top 10 by population remaining are all actually missing as far as possible to see. 

# Run regressions predicting different spending percentages and levels
# Regress each on total value, percent value, and both

fufisc$pct_valpc = fufisc$splittotpc / fufisc$bigpc
fufisc[fufisc$splittotpc <= 0,'splittotpc'] = 1
fufisc$ln_valpc = log(fufisc$splittotpc )
fufisc$hi_rel = fufisc$pct_valpc >= 3
fufisc$lo_rel = fufisc$pct_valpc <= 1/3


# Do as loop 
varints = fufisc %>% select(Total_Revenue,Total_Rev_Own_Sources,Gen_Rev_Own_Sources ,
                            Total_Taxes,Property_Tax,Tot_Sales___Gr_Rec_Tax,Total_Select_Sales_Tax,Total_Gen_Sales_Tax,Total_License_Taxes,Total_Income_Taxes,
                            Total_IG_Revenue,Total_Fed_IG_Revenue,starts_with('Fed_IGR'),
                            Total_State_IG_Revenue,starts_with('State_IGR'),tax_other,Tot_Local_IG_Rev,
                            Tot_Chgs_and_Misc_Rev,Total_General_Charges,Chg_Total_Education,Chg_Hospitals,Chg_Highways,Chg_Total_Nat_Res,Chg_Parking,Chg_Sewerage,Chg_Solid_Waste_Mgmt,Chg_Water_Transport,Chg_All_Other_NEC,Chg_Air_Transportation,Chg_Housing___Comm_Dev,Chg_Parking,Chg_Parks___Recreation,Total_Utility_Revenue,Water_Utility_Revenue,Gas_Utility_Rev,Electric_Utility_Rev,Transit_Utility_Rev,
                            Misc_General_Revenue,Fines_and_Forfeits,Net_Lottery_Revenue,Misc_General_Rev_NEC,Prop_Sale_Total,Special_Assessments,Interest_Revenue,Rents_and_Royalties,
                            Interest_on_Gen_Debt,Total_Debt_Outstanding,Total_Long_Term_Debt_Out,Total_Interest_on_Debt,misc_tot,
                            fed_other,State_IGR_Gen_Sup,state_trans,state_other,ch_other,misc_other,ch_plus_util,utch_other,util_other
) %>% names()

varsm = fufisc %>% select(Total_Revenue,Total_Rev_Own_Sources ,
                            Total_Taxes,Property_Tax,Tot_Sales___Gr_Rec_Tax,tax_other,
                            Total_IG_Revenue,Total_Fed_IG_Revenue,Fed_IGR_Hous_Com_Dev,Fed_IGR_Transit_Sub,fed_other,
                            Total_State_IG_Revenue,State_IGR_Education,State_IGR_Gen_Sup,State_IGR_Public_Welf,state_trans,state_other,Tot_Local_IG_Rev,
                            Total_General_Charges,Chg_Sewerage,Chg_Air_Transportation,Chg_Solid_Waste_Mgmt,Chg_Hospitals,ch_other,Total_Utility_Revenue,Water_Utility_Revenue,Electric_Utility_Rev,util_other,
                          misc_tot,Fines_and_Forfeits,Special_Assessments,Interest_Revenue,misc_other
                          
                          
) %>% names()

# Municipalities
fufisc = filter(fufisc, futype != 'County', !is.na(bigint))
ctfisc = filter(ctfisc, !is.na(bigint))

# Drop missing or zero total revenue
fufisc = fufisc %>% filter(!is.na(Total_Revenue), Total_Revenue != 0)
ctfisc = ctfisc %>% filter(!is.na(Total_Revenue), Total_Revenue != 0)

nrow(fufisc) #14886
nrow(ctfisc) #1728

coefsall = NULL

# Loop through variables and analysis types (logged total dollars and percent of spending)

for(varint in varsm){
  for(iv in c('ln')){ 
    
    coefsvar = NULL
    
    totrev = fufisc %>% select(varint) %>% sum(na.rm = T)
    
    for(outv in c('pct','ln')){ 
      
      fufisc$outint = fufisc %>% select(paste(outv,varint,sep = '_')) %>% unlist()
      
      fufisc$indint = fufisc %>% select(paste(iv,'valpc',sep = '_')) %>% unlist()
      
      fdta = filter(fufisc,outint != Inf)
      
      
      lm1 = felm(outint ~ indint | bigname + st, data = fdta, weights = fdta$clpop) # Sig, posiitve, small in magnitude

      coef = lm1$coefficients[1,1]
      p = lm1$pval
      n = lm1$N
      
      outrow = c(coef,p,n)
      coefsvar = c(coefsvar,outrow) 
    }
    coefsvar = c(varint,totrev,coefsvar)
    coefsall = rbind(coefsall, coefsvar)
  }
}
coefsall = data.frame(coefsall)
names(coefsall) = c('var','totrev','coefpct','ppct','npct','coefln','pln','nln') #,'coeflpct','plpct','nlpct'
write_csv(coefsall, 'output/fisc/fisc_reg_all_locality.csv')

# Do separate run dropping places missing state general support transfers (which are often missing)
coefsall = NULL
fufisc = fufisc %>% filter(misstategen == F)
for(varint in varsm){
  for(iv in c('ln')){ 
    
    coefsvar = NULL
    
    totrev = fufisc %>% select(varint) %>% sum(na.rm = T)
    
    for(outv in c('pct','ln')){ 
      
      fufisc$outint = fufisc %>% select(paste(outv,varint,sep = '_')) %>% unlist()
      
      fufisc$indint = fufisc %>% select(paste(iv,'valpc',sep = '_')) %>% unlist()
      
      fdta = filter(fufisc,outint != Inf)
      
      lm1 = felm(outint ~ indint | bigname + st, data = fdta, weights = fdta$clpop) # Sig, posiitve, small in magnitude

      coef = lm1$coefficients[1,1]
      p = lm1$pval
      n = lm1$N
      
      outrow = c(coef,p,n)
      coefsvar = c(coefsvar,outrow) 
    }
    coefsvar = c(varint,totrev,coefsvar)
    coefsall = rbind(coefsall, coefsvar)
  }
}
coefsall = data.frame(coefsall)
names(coefsall) = c('var','totrev','coefpct','ppct','npct','coefln','pln','nln') #,'coeflpct','plpct','nlpct'
write_csv(coefsall, 'output/fisc/fisc_reg_all_locality_table_nomissgen.csv')



# Now do counties
ctfisc$pct_valpc = ctfisc$valpc / ctfisc$bigpc
ctfisc$valpc10k = ctfisc$valpc / 10000
ctfisc$ln_valpc = log(ctfisc$valpc + 1)

coefsall = NULL

for(varint in varsm){
  for(iv in c('ln')){
    
    coefsvar = NULL
    totrev = ctfisc %>% select(varint) %>% sum(na.rm = T)
    
    for(outv in c('pct','ln')){ 
      
      ctfisc$outint = ctfisc %>% select(paste(outv,varint,sep = '_')) %>% unlist()
      
      ctfisc$indint = ctfisc %>% select(paste(iv,'valpc',sep = '_')) %>% unlist()
      
      fdta = filter(ctfisc,outint != Inf)
      
      lm1 = felm(outint ~ indint | bigname + st, data = fdta, weights = fdta$clpop) # Sig, posiitve, small in magnitude

      coef = lm1$coefficients[1,1]
      p = lm1$pval
      n = lm1$N
      
      outrow = c(coef,p,n)
      coefsvar = c(coefsvar,outrow) 
    }
    coefsvar = c(varint,totrev,coefsvar)
    coefsall = rbind(coefsall, coefsvar)
  }
}
coefsall = data.frame(coefsall)
names(coefsall) = c('var','totrev','coefpct','ppct','npct','coefln','pln','nln') #,'coeflpct','plpct','nlpct')
write_csv(coefsall, 'output/fisc/fisc_reg_all_county.csv')


