# Make shapefiles for the fiscal units
# This is difficult to run on a desktop or laptop, recommended to run it on a server

library(tidyverse)
library(sf)


load(file = 'intermediate_data/fiscal_units/fu_block_crosswalk.Rdata')


# Make output directories
dir.create('intermediate_data/fiscal_units/shapefiles')
dir.create('intermediate_data/fiscal_units/shapefiles_national')

## Create shapefiles

# Start from block-level shapefiles at the state level downloaded from NHGIS
# Loop through states one by one. For each state, import block-level data, merge on the block info
# Then aggregate to fiscal units
# Then export the state, and also append it to a national shapefile
# Loop through CZs, CSAs, and CBSAs as metro area definitions

# Shapefiles have state abbreviations in the name. Bring in a lookup table
stabbr = read.csv('nonproprietary_data/states/stabbrev.csv')
stabbr$fips = str_pad(stabbr$fips,2,pad = '0')

# Limit to only state + PR since that's what the data are for
stabbr = filter(stabbr, !(fips %in% c('78','66','60'))) %>% arrange(fips)


# Loop through big units
for(bigint in c('cbsa','csa','cz')){
  
  exp_block$bigunit = exp_block[,bigint]
  
  # Loop through loc types
  for(locint in c('loc2','loc','school')){
    
    dir.create(paste0('ser_replication/fiscal_units/shapefiles/shp_',bigint,'_',locint))
    
    
    exp_block$smallunit = exp_block[,locint]
    exp_block$smallname = exp_block[,paste0(locint,'_name')]
    exp_block$smalltype = exp_block[,paste0(locint,'_type')]
    
    
    # Set up blank national shapefile 
    shp_nat = NULL
    
    # Now loop through states
    for(i in 1:nrow(stabbr)){
      
      fip_int = stabbr[i,'fips']
      abbr_int = stabbr[i, 'st']
      
      shppath = paste('nonproprietary_data/nhgis/nhgis0055_shape/nhgis0055_shapefile_tl2020_',fip_int,'0_block_2020/',abbr_int,'_block_2020.shp',sep = '')
      
      shp = st_read(shppath)
      shp_joined = left_join(shp,exp_block, by = 'GISJOIN')
      
      smuns = unique(shp_joined$smallunit) 
      
      shp_fu = NULL
      for(su in smuns) {
        print(su)
        
        thisu = filter(shp_joined, smallunit == su ) 
        
        this_fu = thisu %>% group_by(st,STATE,bigunit,smallunit,smallname,smalltype) %>% summarise( # This is the operation that seems to take the longest
          pop = sum(pop),
          area = sum(AREALAND) / 1000000 # Area in square km now instead of square meters for smaller field
        ) 
        
        shp_fu = rbind(shp_fu,this_fu)
        

        
      }      
      
      dir.create(paste('ser_replication/fiscal_units/shapefiles/shp_',bigint,'_',locint,'/shp_',bigint,'_',locint,'_',fip_int,sep = ''))
      
      outpath = paste('ser_replication/fiscal_units/shapefiles/shp_',bigint,'_',locint,'/shp_',bigint,'_',locint,'_',fip_int,'/shp_',bigint,'_',locint,'_',fip_int,'.shp',sep = '')
      st_write(shp_fu,outpath,delete_layer = T)
      
      # Append to national
      shp_nat = rbind(shp_nat,shp_fu)
      
      # Save as we go just to be careful
      dir.create(paste0('ser_replication/fiscal_units/shapefiles_national/shp_',bigint,'_',locint,'_national'))
      
      outnat = st_write(shp_nat,paste0('ser_replication/fiscal_units/shapefiles_national/shp_',bigint,'_',locint,'_national/shp_',bigint,'_',locint,'_national.shp'),delete_layer = T)
      
      # Remove objects to try to preserve memory 
      remove(shp)
      remove(shp_joined)
      remove(shp_fu)
      
      
    } # state
  } # loc type
} # big units

