# Compute rank order information theory measure of economic segregation of property values across tracts

library(tidyverse)
library(segregation)
library(Hmisc)
library(sf)
library(tigris)

dir.create('output/econseg')

# Bring in CBSAs list
msas = read.csv('nonproprietary_data/msa_crosswalks/list1_2020.csv')

msas = mutate(msas, st = str_pad(FIPS.State.Code, 2,pad = '0'),
              cnty = str_pad(FIPS.County.Code, 3, pad = '0'),cbsa = str_pad(CBSA.Code,5,pad = '0'),
              csa = str_pad(CSA.Code, 3,pad = '0'),
              micro = Metropolitan.Micropolitan.Statistical.Area == 'Micropolitan Statistical Area') %>% select(st,cnty,cbsa,csa,micro)

msas = filter(msas, !is.na(as.numeric(cbsa)), !(cbsa %in% c('00000','27420','33380','42300')), st != '72')
cbsas = unique(msas$cbsa)

# Loop through CBSAs

cbseg = NULL

for(cbint in cbsas ){ 
  
  # Loop through all and residential
  for(valtype in c('all','res')){
    
    dta = read_csv(paste0('intermediate_data/cbsa_properties/tractdta/',valtype,'_tract_',cbint,'.csv'))
    dta = filter(dta, cnt > 0)
    
    dta = mutate(dta, geoid = paste0(st,cnty,tract))
    
    if(length(unique(dta$geoid)) <= 1) next()
    
    if(valtype == 'res') dta$pct_val = dta$pct_resval
     
    # Overall rank order seg is sum of seg at each pct, weighted by overall entrop at that pct
    # Loop through each pct val and dichotimize, then compute segregation
    # Get percentile values
    pcts = read_csv(paste0('intermediate_data/cbsa_properties/pctiles/pcts_',valtype,'_',cbint,'.csv'))
    pctsint = setdiff(pcts %>% select(pct) %>% unlist(),1)
    
    pctdta = NULL
    for(pctint in pctsint) {
      
      nreps = pcts %>% filter(pct == pctint) %>% select(size) %>% unlist()
      
      for(nrep in 1:nreps){
      
      dta = mutate(dta, segment = pct_val >= pctint)
      
      # Calculate the weighted mean of the theil scores
      pdta = dta %>% group_by(geoid,segment) %>% summarise(cnt = sum(cnt)) 
      
      stopifnot(sum(pdta$cnt) == sum(dta$cnt))
      
      seg = mutual_total(pdta, group = 'segment',unit = 'geoid',weight = 'cnt')
      theilh = seg %>% filter(stat == 'H') %>% select(est) %>% unlist()
      
      ent = entropy(pdta , group = 'segment',weight = 'cnt')
      
      prow = c(pctint - 1 + nrep, ent,theilh)
      pctdta = rbind(pctdta,prow)
      
    }
    }
    
    # Export measure by percentile for each CBSA
    pctdta = data.frame(pctdta)
    names(pctdta) = c('pct','ent','theilh')
    
    write_csv(pctdta, paste0('output/econseg/cbsa_econseg_',cbint,'_',valtype,'.csv'))
    
    econseg = wtd.mean(pctdta$theilh,pctdta$ent)
    
    cbrow = c(cbint, valtype,econseg)
    cbseg = rbind(cbseg, cbrow)
    
    }
    
    
# Export overall segregation for all cbsas
cbseg = data.frame(cbseg)
names(cbseg) = c('cbsa','valtype','econseg')

write_csv(cbseg,'output/econseg/econseg_cbsas.csv')

}
