# Aggregating individual property data to block level for merging
# This requires proprietary CoreLogic data to run. It is included in the replication package for transparency and reference

# The decision of which value column to used is based on the Lincoln Institute Significant Features of the Property Tax database 
# and various checks, as described in Supplementary Appendix 1

library(tidyverse); options(tibble.width = Inf)

dir.create('intermediate_data/state_blocks')
dir.create('intermediate_data/check_duplicates')
dir.create('intermediate_data/properties')

# Get house price index for Delware
delprice = read_csv('nonproprietary_data/fred/DESTHPI.csv')
dp1983 = delprice %>% filter(year == 1983) %>% select(DESTHPI) %>% unlist() %>% as.numeric()
dp1975 = delprice %>% filter(year == 1975) %>% select(DESTHPI) %>% unlist() %>% as.numeric()
dp1987 = delprice %>% filter(year == 1987) %>% select(DESTHPI) %>% unlist() %>% as.numeric()
dp2019 = delprice %>% filter(year == 2019) %>% select(DESTHPI) %>% unlist() %>% as.numeric()


# Get county-level EQRs (PA)
eqr_cnty = read_csv('nonproprietary_data/state_eqr_data/state_eqr_clean_cnty.csv') %>% mutate(st = str_pad(st,2,pad = '0'))

# Get loc2-level EQRs (NY)
eqr_loc2 = read_csv('nonproprietary_data/state_eqr_data/state_eqr_clean_loc2.csv') %>% mutate(st = str_pad(st,2,pad = '0'))


# Get block to municipality lookup for municipal EQRs
load('intermediate_data/fiscal_units/fu_block_crosswalk.Rdata')
bxw = exp_block %>% select(GEOID = fips,loc2) #loc2_name
rm(exp_block)

# Set up outputs
states = c('01','02','04','05','06','08','09','10','11',
           '12','13','15','16','17','18','19','20','21','22','23',
           '24','25','26','27','28','29','30','31','32','33','34',
           '35','36','37','38','39','40','41','42','44','45','46',
           '47','48','49','50','51','53','54','55','56')


fullcountry = NULL

for(st in states){
  
  stout = NULL
  stdup = NULL
  stprop = NULL
  
# Each state's data is broken into 14 chunks to be able to easily load. Process each chunk individually
  
for(i in 1:14){
  
dta = read_csv(paste('um_corelogic/state_exempt/smallst_',st,'/st_',st,'_',i,'_exempt.csv',sep = ''))
dta$STATE = str_pad(dta$STATE,2,pad = '0')
dta$COUNTY = str_pad(dta$COUNTY,3,pad = '0')
dta$TRACT = str_pad(dta$TRACT,6,pad = '0')
dta$BLOCK = str_pad(dta$BLOCK,4,pad = '0')
dta$GEOID = str_pad(dta$GEOID,15,pad = '0')

# Add on block to location crosswalk so that we can correct the NY data by municipality

# Add on equalization rates - county (PA)
  dta = left_join(dta, eqr_cnty, by =c('STATE' = 'st','COUNTY' = 'cnty'))
  stopifnot(!is.na(dta$eqr) | dta$STATE != '42')
  
  # Equalization rates by municipality NY 
  dta = left_join(dta, bxw, by = 'GEOID')
  dta = left_join(dta, eqr_loc2, by = c('STATE' = 'st', 'loc2' = 'fuint'), suffix = c('_cnty','_loc2'))
  
  stopifnot(!is.na(dta$eqr_loc2) | !(dta$STATE == '36' & dta$COUNTY %in% c('087','119'))) # dta$STATE != '42')
  
# Make a clean, better version of the appraised value, i.e. government's best guess of market value
# Details of the treatment of each state are provided in Supplementary Appendix 1

dta = mutate(dta, 
             
             # Residential indicator 
             res = case_when(
               ((!is.na(PROPERTY_INDICATOR_CODE) & PROPERTY_INDICATOR_CODE %in% c("10", "11", "21", "22")) | (!is.na(LAND_USE_CODE) & (LAND_USE_CODE  < 200 & !(LAND_USE_CODE %in% c("127", "130", "142", "164"))))) ~ T,
               !((!is.na(PROPERTY_INDICATOR_CODE) & PROPERTY_INDICATOR_CODE %in% c("10", "11", "21", "22")) | (!is.na(LAND_USE_CODE) & (LAND_USE_CODE  < 200 & !(LAND_USE_CODE %in% c("127", "130", "142", "164"))))) ~ F
             ),
             
             # Agriculture indicator
             ag = case_when(
               ((!is.na(PROPERTY_INDICATOR_CODE) & PROPERTY_INDICATOR_CODE == '70') | (!is.na(LAND_USE_CODE) & ((LAND_USE_CODE >= 500 & LAND_USE_CODE < 600) | LAND_USE_CODE %in% c('430')))) ~ T,
               !((!is.na(PROPERTY_INDICATOR_CODE) & PROPERTY_INDICATOR_CODE == '70') | (!is.na(LAND_USE_CODE) & ((LAND_USE_CODE >= 500 & LAND_USE_CODE < 600) | LAND_USE_CODE %in% c('430')))) ~ F
             ),
             
             # And remaining indicator
             oth = res == F & ag == F,
             
             val_raw = case_when(
               STATE %in% c('02','04','05','08','11',
                            '12','13','15','16','18',
                            '19','20','24','26',
                            '27','30','31','32','34',
                            '38','39','40','41',
                            '45','47','48','49','54','56') ~ MARKET_TOTAL_VALUE, # These states use market always
               STATE %in% c('06','23','25','33','50','51','55') ~ ASSESSED_TOTAL_VALUE, # These states use raw assessed always. Per Bob DeBoer don't use sales ratios for this purpose
               
               # In Alabama use market except for Colbert county where it's missing and use appraised instead
               # Market works for both residential and non-residential 
               STATE == '01' & COUNTY != '033' ~ MARKET_TOTAL_VALUE,
               STATE == '01' & COUNTY == '033' ~ APPRAISED_TOTAL_VALUE,
               
               # CT - Hartford is different from the rest of state
               STATE == '09' & loc2 != '37000' ~ ASSESSED_TOTAL_VALUE / 0.7, # CT data match assessed value on website, which is 70% of market value except for residential in Hartford.
               STATE == '09' & loc2 == '37000' & res == F ~ ASSESSED_TOTAL_VALUE / 0.7, # In Hartford only 37.5% of assessed value for non-apartment residential, 70% for apartments https://app.lincolninst.edu/classification/connecticut-classification-hartford-connecticut-2019
               STATE == '09' & loc2 == '37000' & res == T & PROPERTY_INDICATOR_CODE == '22' ~ ASSESSED_TOTAL_VALUE / 0.7, # In Hartford only 37.5% of assessed value for non-apartment residential, 70% for apartments https://app.lincolninst.edu/classification/connecticut-classification-hartford-connecticut-2019
               STATE == '09' & loc2 == '37000' & res == T & PROPERTY_INDICATOR_CODE != '22' ~ ASSESSED_TOTAL_VALUE / 0.3675, # In Hartford only 37.5% of assessed value for non-apartment residential, 70% for apartments https://app.lincolninst.edu/classification/connecticut-classification-hartford-connecticut-2019
               
               # Delaware has two issues: Sussex assessed at just 50% of value, and all 3 counties use prices from the 1970s and 1980s (per Lincoln Institute)
               # Use Delaware house price index from FRED to inflate to current dollars
               STATE == '10' & COUNTY %in% c('001') ~ ASSESSED_TOTAL_VALUE * dp2019 / dp1987, # In Kent, it's 100% of market value in 1987
               STATE == '10' & COUNTY %in% c('003') ~ ASSESSED_TOTAL_VALUE * dp2019 / dp1983, # In New Castle, it's 100% of market value in 1983
               STATE == '10' & COUNTY %in% c('005') ~ ASSESSED_TOTAL_VALUE / 0.5 * dp2019 / dp1975, # In Sussex, it's 50% of market value in 1974. FRED data only go to 1975 so using that. 
               
               # In Illinois use Market except for hotels outside Cook County, agriculture, and vacant land
               STATE == '17' & !(PROPERTY_INDICATOR_CODE %in% c('0','10','23','70','80')) ~ MARKET_TOTAL_VALUE,
               STATE == '17' & PROPERTY_INDICATOR_CODE == '23' & COUNTY == '031' ~ MARKET_TOTAL_VALUE, # Hotels in Cook County have market value observed
               STATE == '17' & PROPERTY_INDICATOR_CODE == '23' & COUNTY != '031' ~ ASSESSED_TOTAL_VALUE / 0.3333, # Hotels in rest of state, where everything assessed at 33.33%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '0' & COUNTY != '031' ~ ASSESSED_TOTAL_VALUE / 0.3333, # Other in rest of state. Doesn't appear to have any missing in Cook County
               STATE == '17' & PROPERTY_INDICATOR_CODE == '70' & COUNTY == '031' ~ ASSESSED_TOTAL_VALUE / 0.1, # Agricultural in Cook County all missing, use assessed ratio of 10%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '70' & COUNTY != '031' ~ ASSESSED_TOTAL_VALUE / 0.3333, # Farms in rest of state, where everything assessed at 33.33%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '80' & COUNTY == '031' ~ ASSESSED_TOTAL_VALUE / 0.1, # Vacant in Cook County all missing, use assessed ratio of 10%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '80' & COUNTY != '031' & !is.na(MARKET_TOTAL_VALUE)  ~ MARKET_TOTAL_VALUE, # Vacant in rest of state is not all missing, where everything assessed at 33.33%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '80' & COUNTY != '031' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE / 0.3333, # Vacant in rest of state is not all missing, where everything assessed at 33.33%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '32' & COUNTY == '031' ~ ASSESSED_TOTAL_VALUE / 0.25, # Amusement in Cook County all missing, assuming are income generating and therefore 25%
               STATE == '17' & PROPERTY_INDICATOR_CODE == '32' & COUNTY != '031' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE, # Amusement in rest of state sometimes but not always missing
               STATE == '17' & PROPERTY_INDICATOR_CODE == '32' & COUNTY != '031' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE / 0.3333, # Where missing, assume assessment ratio of 33.33% applies
               STATE == '17' & PROPERTY_INDICATOR_CODE == '10' & COUNTY == '031' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE, # Residential properties mostly have market value observed but not always. When not observed use assessed value / 10% for cook and 33.33% in rest of state
               STATE == '17' & PROPERTY_INDICATOR_CODE == '10' & COUNTY == '031' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE / 0.1, 
               STATE == '17' & PROPERTY_INDICATOR_CODE == '10' & COUNTY != '031' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE, # Residential properties mostly have market value observed but not always. When not observed use assessed value / 10% for cook and 33.33% in rest of state
               STATE == '17' & PROPERTY_INDICATOR_CODE == '10' & COUNTY != '031' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE / 0.3333, 
               
               # In Kentucky, use market except for agriclutre, where use market when present and assessed where not (which is in some entire counties)
               STATE == '21' & PROPERTY_INDICATOR_CODE != '70' ~ MARKET_TOTAL_VALUE,
               STATE == '21' & PROPERTY_INDICATOR_CODE == '70' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE,
               STATE == '21' & PROPERTY_INDICATOR_CODE == '70' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE,

               # In Louisiana use market except for Bossier and Iberia parishes, where it's missing. 
               # According to lincoln, residential and land are 10%, public service is 25%, other property is 15%
               # Note that according to Lincoln, only residential and land are assessed at 10%, other types at 15% or 25%
               # To decide which properties are which, I'm looking in rest of state and computing assess ratio by PRIND x LAND USE
               # We don't have a great way of identifying which properties are which rate, so going to use 10% everywhere for now
               STATE == '22' & !(COUNTY %in% c('015','045')) ~ MARKET_TOTAL_VALUE,
               STATE == '22' & (COUNTY %in% c('015','045') & res == T) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '20' & LAND_USE_CODE %in% c(251,248,237,211)) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '20' & !(LAND_USE_CODE %in% c(251,248,237,211))) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '23' & LAND_USE_CODE %in% c(127,142)) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '23' & !(LAND_USE_CODE %in% c(127,142))) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE %in% c('24','25','26','27','29','30','51','52')) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE %in% c('70','80','0')) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '28' & LAND_USE_CODE %in% c(336,364)) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '28' & !(LAND_USE_CODE %in% c(336,364))) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '31' & LAND_USE_CODE %in% c(226,255)) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '31' & !(LAND_USE_CODE %in% c(226,255))) ~ ASSESSED_TOTAL_VALUE / 0.1,               
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '32' & LAND_USE_CODE %in% c(700,755)) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '32' & !(LAND_USE_CODE %in% c(700,755))) ~ ASSESSED_TOTAL_VALUE / 0.15,               
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '50' & LAND_USE_CODE %in% c(300)) ~ ASSESSED_TOTAL_VALUE / 0.1,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '50' & !(LAND_USE_CODE %in% c(300))) ~ ASSESSED_TOTAL_VALUE / 0.15,   
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '53' & LAND_USE_CODE %in% c(801)) ~ ASSESSED_TOTAL_VALUE / 0.15,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '53' & LAND_USE_CODE %in% c(806)) ~ ASSESSED_TOTAL_VALUE / 0.25,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '53' & !(LAND_USE_CODE %in% c(801,806))) ~ ASSESSED_TOTAL_VALUE / 0.11,                 
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '54' & LAND_USE_CODE %in% c(818,872,886)) ~ ASSESSED_TOTAL_VALUE / 0.25,
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & PROPERTY_INDICATOR_CODE == '54' & !(LAND_USE_CODE %in% c(818,872,886))) ~ ASSESSED_TOTAL_VALUE / 0.15,                 
               
               STATE == '22' & (COUNTY %in% c('015','045') & res == F & is.na(PROPERTY_INDICATOR_CODE)) ~ ASSESSED_TOTAL_VALUE / 0.1,
               
               # In Mississippi market and appraised match when not missing. Use Market where present and appraised where not
               STATE == '28' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE,
               STATE == '28' & is.na(MARKET_TOTAL_VALUE) ~ APPRAISED_TOTAL_VALUE,
               
               # In Missouri, use market value except in Wright, Knox, Lawrence, Moniteau, Clay, Holt counties where it's missing. There use assessed value / assessment ratio for property indicator code, derived by looking at averages for non-missing parts of the state with largely match Lincoln Institute assessment ratios
               # 19% for residential, per Lincoln institute and checked online (Clay county was only one with working website)
               # Two questionable ones are PRIND 30 = hospitals, which are 19% at median but some have 32% and seems like they should be classified there, and missing where I'm using 19% because thats the average but lots of variation in data
               # Actually now thinking instead of using counties, just make this assigment for all places missing market value, which inclues about 60 properties besides these counties
               STATE == '29' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE,
               STATE == '29' & is.na(MARKET_TOTAL_VALUE) & PROPERTY_INDICATOR_CODE %in% c('0','10','11','21','22','80') ~ ASSESSED_TOTAL_VALUE / 0.19,
               STATE == '29' & is.na(MARKET_TOTAL_VALUE) & PROPERTY_INDICATOR_CODE %in% c('70') ~ ASSESSED_TOTAL_VALUE / 0.12,
               STATE == '29' & is.na(MARKET_TOTAL_VALUE) & PROPERTY_INDICATOR_CODE %in% c('20','23','24','25','26','27','28','29','30','31','32','50','51','52','53','54') ~ ASSESSED_TOTAL_VALUE / 0.32,
               STATE == '29' & is.na(MARKET_TOTAL_VALUE) & is.na(PROPERTY_INDICATOR_CODE) ~ ASSESSED_TOTAL_VALUE / 0.19, # Basing these ass
               
               # In New Mexico use market except for Otrero where market missing and assessed is actually market (according to county website) and De Baca where market missing and can't check
               STATE == '35' & !(COUNTY %in% c('011','035')) ~ MARKET_TOTAL_VALUE,
               STATE == '35' & (COUNTY %in% c('011','035')) ~ ASSESSED_TOTAL_VALUE,
               
               # In New York, after investigation of Westchester and Rockland counties, use assessed value / eqr for properties that seem not to be correct
               # In many towns in Westchester and Rockland market value is populated but incorrect (e.g. White Plains, Sleepy Hollow, Pleasantville)
               STATE == '36' & !(COUNTY %in% c('087','119')) ~ MARKET_TOTAL_VALUE, # NY Westchester and Rockland missing a lot
               STATE == '36' & COUNTY %in% c('087','119') & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE / (eqr_loc2), # When missing, use assessed * equalization rate from NYS
               STATE == '36' & COUNTY %in% c('087','119') & !is.na(MARKET_TOTAL_VALUE) & !is.na(TAX_AMOUNT) & MARKET_TOTAL_VALUE < 10 * TAX_AMOUNT ~ ASSESSED_TOTAL_VALUE / (eqr_loc2), # When market value present but very low relative to taxes paid, use assessed / eqr instead. Per https://www.tax.ny.gov/research/property/reports/fvtaxrates/overall_county_13.htm, highest property tax rate in state is about 5% in NYC
               STATE == '36' & COUNTY %in% c('087','119') & !is.na(MARKET_TOTAL_VALUE)  & is.na(TAX_AMOUNT) ~ MARKET_TOTAL_VALUE, # If market total value present and tax amount not, use marekt total value
               STATE == '36' & COUNTY %in% c('087','119') & !is.na(MARKET_TOTAL_VALUE) & !is.na(TAX_AMOUNT) & MARKET_TOTAL_VALUE >= 10 * TAX_AMOUNT ~ MARKET_TOTAL_VALUE, # When market value present and seems correct relative to tax rate, use that
               
               # In North Carolina use market except in Pender and Hertford counties where it's missing and assessed (and appraised too) looks like 100%
               STATE == '37' & !(COUNTY %in% c('091','141')) ~ MARKET_TOTAL_VALUE,
               STATE == '37' & COUNTY %in% c('091','141') ~ ASSESSED_TOTAL_VALUE,
               
               # In Pennsylvania apply common level ratios
               STATE == '42' ~ ASSESSED_TOTAL_VALUE / eqr_cnty ,
               
               # In Rhode Island, New Shoreham assessed at 80% until 2023 https://www.newshorehamri.gov/325/Tax-Assessment
               STATE == '44' & loc2 == '009_50500' ~ ASSESSED_TOTAL_VALUE / 0.8,
               STATE == '44' & loc2 != '009_50500' ~ ASSESSED_TOTAL_VALUE,

               # In South Dakota use market where present and assessed where not
               STATE == '46' & !is.na(MARKET_TOTAL_VALUE) ~ MARKET_TOTAL_VALUE,
               STATE == '46' & is.na(MARKET_TOTAL_VALUE) ~ ASSESSED_TOTAL_VALUE, 
               
               # In Washington, use Market except in Cowlitz county where only assessed is present
               STATE == '53' & COUNTY != '015' ~ MARKET_TOTAL_VALUE,
               STATE == '53' & COUNTY == '015' ~ ASSESSED_TOTAL_VALUE
               
             ),
              
             #Clean value column dropping exempt properties
             val_clean = val_raw * (exempt == F),
             
             # Instead of using the res value, make residential indicator and apply it to total value
             val_res_clean = res * val_clean,
             val_ag_clean = ag * val_clean,
             val_nonres_clean = (res == F) * val_clean,
             val_oth_clean = oth * val_clean,
             res_over1m_prop = val_res_clean >= 1000000,
             
             # Get tax rate
             taxrt = (TAX_AMOUNT / val_clean) * (exempt == F) ,
             tax30 = taxrt > .3 & !is.na(taxrt),
             taxcapped = case_when(
               tax30 == F & exempt == F ~ TAX_AMOUNT,
               tax30 == T & exempt == F~ 0.3 * val_clean,
               exempt == T ~ 0
             )
)

stopifnot(dta$res + dta$ag + dta$oth == 1) # Indicators should sum to 1, no missings

# Now aggregate to the block level
dblock = dta %>% group_by(importa,STATE,COUNTY, TRACT,BLOCK,GEOID) %>% summarise(
  value_raw = sum(val_raw, na.rm = T),
  value_tot = sum(val_clean, na.rm = T),
  value_res = sum(val_res_clean, na.rm = T),
  value_ag = sum(val_ag_clean, na.rm = T),
  value_oth = sum(val_oth_clean, na.rm = T),
  value_nonres = sum(val_nonres_clean, na.rm = T),
  
  tax_tot = sum(TAX_AMOUNT * (exempt == F), na.rm = T),
  tax_res = sum(TAX_AMOUNT * res * (exempt == F), na.rm = T),
  tax_ag = sum(TAX_AMOUNT * ag * (exempt == F), na.rm = T),
  tax_oth = sum(TAX_AMOUNT * oth * (exempt == F), na.rm = T), 
  tax_nonres = sum(TAX_AMOUNT * (res == F) * (exempt == F), na.rm = T),
  
  ctax_tot = sum(taxcapped, na.rm = T),
  ctax_res = sum(taxcapped * res, na.rm = T),
  ctax_ag = sum(taxcapped * ag, na.rm = T),
  ctax_oth = sum(taxcapped * oth, na.rm = T),
  ctax_nonres = sum(taxcapped * (res == F), na.rm = T),
  
  res_over1m = sum(res_over1m_prop),
  
  # Counts of total properties
  nprop_tot = length(GEOID),
  nprop_exempt = sum(exempt),
  nprop_res = sum(res * (exempt == F)),
  nprop_ag = sum(ag * (exempt == F)),
  nprop_oth = sum(oth * (exempt == F)),

  # Counts of properties missing values
  misval_raw = sum(is.na(val_raw)),
  misval_tot = sum(is.na(val_clean)),
  misval_res = sum(is.na(val_clean) * res),
  misval_ag = sum(is.na(val_clean) * ag),
  misval_oth = sum(is.na(val_clean) * oth),
  
  # Counts of properties missing taxes
  mistax_tot = sum(is.na(TAX_AMOUNT)),
  mistax_res = sum(is.na(TAX_AMOUNT) * res),
  mistax_ag = sum(is.na(TAX_AMOUNT) * ag),
  mistax_oth = sum(is.na(TAX_AMOUNT) * oth),
  
  # Count of properties missing tax rates (value OR taxes)
  mistaxrt_tot = sum(is.na(taxrt)),
  mistaxrt_res = sum(is.na(taxrt) * res),
  mistaxrt_ag = sum(is.na(taxrt) * ag),
  mistaxrt_oth = sum(is.na(taxrt) * oth),
  
  # Counts of properties with high tax rates but not missing
  tax30_tot = sum(tax30),
  tax30_res = sum(is.na(tax30) * res),
  tax30_ag = sum(is.na(tax30) * ag),
  tax30_oth = sum(is.na(tax30) * oth)

  
) %>% ungroup()


stout = bind_rows(stout,dblock)

stduprow = dta %>% select(OID_,id,CLIP = CLIP.x,COMPOSITE_PROPERTY_LINKAGE_KEY,TARGET_FID)
stdup = bind_rows(stdup,stduprow)

propout = dta %>% select(CLIP = CLIP.x,val_raw,val_clean, res,ag,oth,TAX_AMOUNT,taxcapped,TAX_YEAR,ASSESSED_YEAR,GEOID,PROPERTY_INDICATOR_CODE,LAND_USE_CODE,exempt)
stprop = bind_rows(stprop, propout)

}
  
# Export list of ids to check duplicates
write_csv(stdup, paste('intermediate_data/check_duplicates/idvars_',st,'.csv',sep = ''))
  

# Aggregate across all 14 chunks and export block-level data
stblock = stout %>% select(-importa) %>% group_by(STATE,COUNTY, TRACT,BLOCK,GEOID) %>% summarise_all(sum, na.rm = T) %>% 
  ungroup()
  
write_csv(stblock, paste('intermediate_data/state_blocks/st_block_',st,'.csv',sep = ''))

fullcountry = bind_rows(fullcountry, stblock)

# Export state properties
save(stprop, file = paste0('intermediate_data/properties/properties_',st,'.Rdata'))

}

write_csv(fullcountry, paste('intermediate_data/state_blocks/st_block_all.csv',sep = ''))


  