#Program 3: Takes output from raceineq_prep and creates figures for paper and for each sample
# These include:
  # Figure 1: Stylized depection of inequality and group disparities
  # Figure 2: Mean and medain black-white family and household income over time
  # Figure 3: 3 part graph of ratio, rank, income relative to nation at final rank (made for each sample individually as well)
  # Figure 4: Robustness graphs of rank and income at final rank for 8 robustness scenarios 
  # Figure 5: Rank and income at final rank at all deciles of the black income distribution (for each sample)
  # Figure 6: Counterfactual scenarios (for each sample)
# Additionally, creates the following figures not included in the paper:
  # Ratio of income at black final rank to income at white final rank over time (for each sample).
  # All parts of Figure 3 computed for whites (for each sample)
  # All parts of Figure 3 computed using mean rather than median for blacks and whites (for each sample; note that unlike the median the mean isn't consistent between dollars and ranks)
  # Graphs of over- and under-representation of blacks and whites in each national decile in 1968 and 2015
  # Auxilary robustness graph comparing the main sample to the full range of alternative samples, including most combinations of income measure x sample construction
# All figures used in the paper are exported to the main output folder. The graphs for each sample are exported to sample-specific subfolders

## Make Figure 1: Stylized graph of dispersion among groups, which doesn't require input data ##
stylerow1 = 5:14
stylerow2 = stylerow1 * (1 + 1:10/20)
stylerace = c('A','B','A','A','B','A','B','B','A','B')
styledf = cbind(stylerow1,stylerow2,stylerace) %>% data.frame()
styledf$stylerow1 = as.numeric(as.character(styledf$stylerow1))
styledf$stylerow2 = as.numeric(as.character(styledf$stylerow2))

stylesum = styledf %>% group_by(stylerace) %>% summarise(meanp1 = mean(stylerow1), meanp2 = mean(stylerow2), med1 = median(stylerow1), med2 = median(stylerow2)) %>% data.frame()
stylesum[3,] = c('Ratio',stylesum[1,2:5]  /stylesum[2,2:5])

styledf$p1 = paste('Period 1\nMedian Ratio:',percent(round(stylesum[3,'med1'],2)))
styledf$p2 = paste('Period 2\nMedian Ratio:',percent(round(stylesum[3,'med2'],2)))

pdf('output/f1_stylized.pdf', height = 5)
gplt = ggplot(styledf) +
  geom_segment(aes(x = p1, xend = p2, y = stylerow1, yend = stylerow2), lty = 3, alpha = .5) +
  geom_segment(data = filter(styledf, stylerow1 %in% c(8,11)),aes(x = p1, xend = p2, y = stylerow1, yend = stylerow2), lty = 1, alpha = 1) +
  geom_point(aes(x = 1, y = stylerow1, color = stylerace)) +
  geom_point(aes(x = 2, y = stylerow2, color = stylerace)) +
  geom_text(x = .43,y = -2,label = 'Dark lines connect group medians',hjust = 0,size = 3,alpha = .2)+
  theme_bw() + 
  geom_hline(yintercept = 0,alpha = .4,lty = 1)+
  theme(legend.position = 'bottom') +
  scale_color_grey(name = 'Group',start = .75,end = .25)+
  scale_y_continuous(limits = c(-2,23), name = 'Income', labels = dollar ) +
  scale_x_discrete(name = '') +
  ggtitle('Stylized illustration of inequality and group disparities')
  
print(gplt)
dev.off()

## Now bring in output from prep program ##
  
fdta = read.csv('output/raceineq.csv')
ndecs = read.csv('output/natdecs.csv')
npctiles = read.csv('output/natpctiles.csv')

#Clean names
snames = c('fambase','fambase_adult','fambase_prime','fambase_bw','fambase_exp','fambase_native','fambase_native_bw','fambase_native_bp','fambase_inst','fambase_inst_bw',
           'famcalc','famcalc_adult','famcalc_bw','famcalc_exp',
           'famnorm','famnorm_adult','famnorm_bw','famnorm_exp',
           'famsqrt','famsqrt_adult','famsqrt_bw','famsqrt_exp',
           'hhbase','hhbase_adult','hhbase_bw','hhbase_exp',
           'hhcalc','hhcalc_adult','hhcalc_bw','hhcalc_exp',
           'hhnorm','hhnorm_adult','hhnorm_bw','hhnorm_exp',
           'hhsqrt','hhsqrt_adult','hhsqrt_bw','hhsqrt_exp') 

tnames = c('family income','family income, adults only','family income, adults age 25-54 only','family income, blacks/whites only','family income, any black ancestry','family income, native-born citizens only','family income, native-born citizens only, blacks/whites only',
           'family income, native-born citizens\nof native-born parents only','family income, adjusting for\ninstitutionalized population','family income, adjusting for\ninstitutionalized population, blacks/whites only',
           'calculated family income','calculated family income, adults only','calculated family income, blacks/whites only','calculated family income, any black ancestry',
           'normalized family income','normalized family income, adults only','normalized family income, blacks/whites only','normalized family income, any black ancestry',
           'sqrt-normalized family income','sqrt-normalized family income, adults only','sqrt-normalized family income, blacks/whites only','sqrt-normalized family income, any black ancestry',
           'household income','household income, adults only','household income, blacks/whites only','household income, any black ancestry',
           'calculated household income','calculated household income, adults only','calculated household income, blacks/whites only','calculated household income, any black ancestry',
           'normalized household income','normalized household income, adults only','normalized household income, blacks/whites only','normalized household income, any black ancestry',
           'sqrt-normalized household income','sqrt-normalized household income, adults only','sqrt-normalized household income, blacks/whites only','sqrt-normalized household income, any black ancestry') #,

#Make clean name variable
fdta$cleanname = factor(fdta$samp, levels = snames,labels = str_to_title(tnames))


## Figure 2: Mean and median family and household income ratios over time ##

#Select samples and statistics of interest
mainsamps = c('fambase','hhbase')
pdta = fdta %>% filter(samp %in% mainsamps) %>% select(cleanname,yr,rat_pct5,rat_mean) %>% melt(id.vars = c('cleanname','yr'))
#Make clean label
pdta$variable = factor(pdta$variable,levels = c('rat_pct5','rat_mean'),labels = c('Median','Mean'))

#Make graph
pdf('output/f2_ratiostime.pdf', height = 5)
gplt = ggplot(pdta,aes(x = yr,y = value, color = variable,lty = cleanname)) +
  geom_line() +
  scale_color_grey(name = '') +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,1),labels = percent,name = 'Ratio of black to white income') +
  theme_bw()+
  theme(legend.position = 'bottom') +
  ggtitle('Average black-white income ratios over time')
print(gplt)
dev.off()


## Figure 4: Robustness ##

#Select robustness samples
robust = c('hhbase','famnorm','famsqrt','fambase_adult','fambase_bw','fambase_exp','fambase_inst','fambase_native_bp')
pdta = fdta %>% filter(samp %in% robust) %>% select(cleanname,samp,yr,rat_pct5) %>% melt(id.vars = c('cleanname','samp','yr'))

# Auxiliary robustness figure of median b-w ratio
pdf('output/f4x_robustness_medratio.pdf')
gplt = ggplot(pdta,aes(x = yr,y = value,lty = cleanname)) +
  geom_line() +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,1),labels = percent,name = 'Ratio of black to white median income') +
  theme_bw()+
  theme(legend.position = 'bottom') +
  guides(lty = guide_legend(nrow  = 4))+
  ggtitle('Black-white median income ratio over time, alternative\nincome definitions and sample constructions')
print(gplt)
dev.off()

# Figure 4A: Robustness graph of median black income rank #
pdta = fdta %>% filter(samp %in% robust) %>% select(cleanname,samp,yr,black_rpct5) %>% melt(id.vars = c('cleanname','samp','yr'))

pdf('output/f4a_robustness_medians.pdf')
gplt = ggplot(pdta,aes(x = yr,y = value,lty = cleanname)) +
  geom_line() +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,50),labels = comma,name = 'National income percentile ') +
  theme_bw()+
  theme(legend.position = 'bottom')+
  guides(lty = guide_legend(nrow  = 4))+
  ggtitle('A: Median black income rank over time, alternative\nincome definitions and sample constructions')
print(gplt)
dev.off()

# Fibure 4B: Robustness graph of income at final black rank over time

#Get final ranks for each robustness sample
robustfin = pdta %>% filter(yr == 2016) 
robustfin$pctile = robustfin$value

#Get those percentiles as fraction of mean national income over time
psamp = melt(npctiles, id.vars = c('samp','yr','natmean'))
psamp$pctile = gsub('pct','',psamp$variable)
pdta = merge(robustfin[,c('cleanname','samp','pctile')], psamp, by = c('samp','pctile'))
pdta$medrat = pdta$value / pdta$natmean

pdf('output/f4b_robustness_finrank.pdf')
gplt = ggplot(pdta,aes(x = yr,y = medrat,lty = cleanname)) +
  geom_line() +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,1),labels = percent,name = 'Income relative to national mean') +
  theme_bw()+
  theme(legend.position = 'bottom')+
  guides(lty = guide_legend(nrow  = 4))+
  ggtitle('B: Ratio of income at 2016 median black percentile to national mean over time,\nalternative income definitions and sample constructions')
print(gplt)
dev.off()

## Extra robustness figure: all samples on one graph
pdta = fdta %>% select(cleanname,samp,yr,rat_pct5) %>% melt(id.vars = c('cleanname','samp','yr'))
mdta = fdta %>% filter(samp == 'fambase') %>% select(cleanname,samp,yr,rat_pct5) %>% melt(id.vars = c('cleanname','samp','yr'))

# Auxiliary robustness figure of median b-w ratio
pdf('output/fX1_fullrobust_medratio.pdf', height = 5)
gplt = ggplot(pdta,aes(x = yr,y = value,group = cleanname)) +
  geom_line(alpha = .05) +
  geom_line(data = mdta, aes(x = yr, y = value)) +
  scale_linetype_discrete(name = '') +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,1),labels = percent,name = 'Ratio of black to white median income') +
  theme_bw()+
  theme(legend.position = 'none') +
  ggtitle('Black-white median income ratio over time, full set of robustness specifications.\nDark line is primary sample')
print(gplt)
dev.off()

# Auxilary robustness figure of median black rank over time
pdta = fdta %>% select(cleanname,samp,yr,black_rpct5) %>% melt(id.vars = c('cleanname','samp','yr'))
mdta = fdta %>% filter(samp == 'fambase') %>% select(cleanname,samp,yr,black_rpct5) %>% melt(id.vars = c('cleanname','samp','yr'))

pdf('output/fX2_fullrobust_rank.pdf',height = 5)
gplt = ggplot(pdta,aes(x = yr,y = value,group = cleanname)) +
  geom_line(alpha = .05) +
  geom_line(data = mdta, aes(x = yr, y = value)) +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,50),labels = comma,name = 'National income percentile ') +
  theme_bw()+
  theme(legend.position = 'none')+
  ggtitle('Median black income rank over time, full set of robustness specifications.\nDark line is primary sample')
print(gplt)
dev.off()

# Auxilary robustness figure of income at final black rank relative to nation over time
#Get final ranks for each robustness sample
robustfin = pdta %>% filter(yr == 2016) 
robustfin$pctile = robustfin$value

#Get those percentiles as fraction of mean national income over time
psamp = melt(npctiles, id.vars = c('samp','yr','natmean'))
psamp$pctile = gsub('pct','',psamp$variable)
pdta = merge(robustfin[,c('cleanname','samp','pctile')], psamp, by = c('samp','pctile'))
pdta$medrat = pdta$value / pdta$natmean
mdta = filter(pdta, samp == 'fambase')

pdf('output/fX3_fullrobust_finrank.pdf', height = 5)
gplt = ggplot(pdta,aes(x = yr,y = medrat,group = cleanname)) +
  geom_line(alpha = .05) +
  geom_line(data = mdta, aes(x = yr, y = medrat)) +
  scale_x_continuous(name = '') +
  scale_y_continuous(limits = c(0,1),labels = percent,name = 'Income relative to national mean') +
  theme_bw()+
  theme(legend.position = 'none')+
  ggtitle('Ratio of income at 2016 median black percentile to national mean over time,\nfull set of robustness specifications. Dark line is primary sample')
print(gplt)
dev.off()

## Now loop to make sample-specific graphs ##

for(s in 1:length(snames)){ 
  sname = snames[s]
  tname = tnames[s]
  print(sname)
  
  #Make output directory
  dir.create(paste('output/samp_',sname,sep = ''))
  
  #Select sample
  dta = filter(fdta, samp == sname)
  pctiles = filter(npctiles, samp == sname)
  decs = filter(ndecs,samp == sname)
  
  #Loop through median and mean
  for(v in c('pct5','mean')){
    dta$ratio = dta[,paste('rat_',v,sep = '')]
    dta$brank = dta[,paste('black_r',v,sep = '')]
    dta$wrank = dta[,paste('white_r',v,sep = '')]
    
    #Clean median name for titles now that we've got the variable
    if(v == 'pct5') v = 'median' 
    
    #Get final black and white rank
    brankfin = dta %>% filter(yr == 2016) %>% select(brank) %>% unlist() %>% round()
    wrankfin = dta %>% filter(yr == 2016) %>% select(wrank) %>% unlist() %>% round()
    
    #Get dollar value at that rank and ratio relative to national mean over time
    pctiles$bfin = pctiles[,paste('pct',brankfin,sep = '')]
    pctiles$wfin = pctiles[,paste('pct',wrankfin,sep = '')]
    pctiles$bfinwfin = pctiles$bfin / pctiles$wfin
    pctiles$brelnat = pctiles$bfin / pctiles$natmean
    pctiles$wrelnat = pctiles$wfin / pctiles$natmean
    
    #Figure 3A: Ratio of black to white average income
    pdf(paste('output/samp_',sname,'/f_',sname,'_ratio_',v,'.pdf',sep = ''),width = 6)
    gplt = ggplot(dta,aes(x = yr,y = ratio)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,1),labels = percent,name = paste('Ratio of',v,'black to',v,'white',tname)) +
      theme_bw()+
      ggtitle(paste('A: Ratio of',v,'black to',v,'white',tname,'over time','\n'))
    print(gplt)
    dev.off()
    
    #Export main sample into overall output folder
    if(sname == 'fambase' & v == 'median') {
      pdf(paste('output/f3a_ratio_',sname,'_',v,'.pdf',sep = ''),width = 6)
      print(gplt)
      dev.off()
    }
    
    #Figure 3B: Average black rank over time  
    pdf(paste('output/samp_',sname,'/f_',sname,'_rank_b_',v,'.pdf',sep = ''),width = 6)
    gplt = ggplot(dta,aes(x = yr,y = brank)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,50),name = 'National income percentile') +
      theme_bw()+
      ggtitle(paste('B:',str_to_title(v),'black income rank over time,',tname,'\n'))
    print(gplt)
    dev.off()
    
    #Export main sample into overall output folder
    if(sname == 'fambase' & v == 'median') {
      pdf(paste('output/f3b_rank_b_',sname,'_',v,'.pdf',sep = ''), width = 6)
      print(gplt)
      dev.off()
    }
    
    #Auxilary figure: Average white rank over time
    pdf(paste('output/samp_',sname,'/f_',sname,'_rank_w_',v,'.pdf',sep = ''), width = 6)
    gplt = ggplot(dta,aes(x = yr,y = wrank)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,65),name = '') +
      theme_bw()+
      ggtitle(paste(str_to_title(v),'white rank over time,',tname))
    print(gplt)
    dev.off()
    
    #Figure 3C: Ratio of income at final black percentile to national mean over time
    pdf(paste('output/samp_',sname,'/f_',sname,'_rfinnat_b_',v,'.pdf',sep = ''),width = 6)
    gplt = ggplot(pctiles,aes(x = yr,y = brelnat)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,1),name = 'Income relative to national mean',labels = percent) +
      theme_bw()+
      ggtitle(paste('C: Ratio of income at ',brankfin,'th percentile (',v,' for blacks in 2016)\nto national mean income over time, ',tname,sep = ''))
    print(gplt)
    dev.off()
    
    #Export main sample into overall output folder
    if(sname == 'fambase' & v == 'median') {
      pdf(paste('output/f3c_rfinnat_b_',sname,'_',v,'.pdf',sep = ''), width = 6)
      print(gplt)
      dev.off()
    }
    
    #Auxilary figure: Ratio of income at final white percentile to national mean over time 
    pdf(paste('output/samp_',sname,'/f_',sname,'_rfinnat_w_',v,'.pdf',sep = ''), width = 6)
    gplt = ggplot(pctiles,aes(x = yr,y = wrelnat)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,1),name = '',labels = percent) +
      theme_bw()+
      ggtitle(paste('Ratio of income at ',wrankfin,'th percentile (',v,' for whites in 2016)\nto national mean income over time, ',tname,sep = ''))
    print(gplt)
    dev.off()
    
    #Auxilary figure: Ratio of income at final black percentile to income at final white percentile over time
    pdf(paste('output/samp_',sname,'/f_',sname,'_rfinbfinw_',v,'.pdf',sep = ''),width = 6)
    gplt = ggplot(pctiles,aes(x = yr,y = bfinwfin)) +
      geom_line()+
      scale_x_continuous(name = '') +
      scale_y_continuous(limits = c(0,1),name = '',labels = percent) +
      theme_bw()+
      ggtitle(paste('Ratio of income at ',brankfin,'th percentile (',v,' for blacks in 2016)\nto income at ',wrankfin,'th percentile (',v,' for whites in 2016) over time\n',tname,sep = ''))
    print(gplt)
    dev.off()
  } #Mean/median
  
  #Auxilary figure: racial over/under representation in deciles at beginning and end of sample
  pdta = filter(decs, year %in% c(1968,2016))
  pdta$bratio = pdta$pop_black / pdta$pop_tot
  pdta$wratio = pdta$pop_white / pdta$pop_tot
  
  #Blacks
  pdf(paste('output/samp_',sname,'/f_',sname,'_overrep_decs_b_.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = dec, y = bratio, lty = as.character(year)))+
    geom_line() +
    geom_point() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,3),name = 'Concentration of blacks relative to nation',labels = percent) +
    scale_x_continuous(breaks = 1:10,minor_breaks = NULL,name = 'Income decile') +
    theme_bw() +
    theme(legend.position = 'bottom') +
    ggtitle(paste('Over- and under-representation of African Americans by income decile,\n',tname,', 1968 and 2016',sep = ''))
  print(gplt)
  dev.off()
  
  #Whites
  pdf(paste('output/samp_',sname,'/f_',sname,'_overrep_decs_w.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = dec, y = wratio, lty = as.character(year)))+
    geom_line() +
    geom_point() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,3),name = 'Concentration of whites relative to nation',labels = percent) +
    scale_x_continuous(breaks = 1:10,minor_breaks = NULL,name = 'Income decile') +
    theme_bw() +
    theme(legend.position = 'bottom') +
    ggtitle(paste('Over- and under-representation of whites by income decile,\n',tname,', 1968 and 2016',sep = ''))
  print(gplt)
  dev.off()
  
  #Auxilary figure: National deciles over time as percentage of the national mean income
  pdta = pctiles %>% select(yr,natmean,pct10,pct20,pct30,pct40,pct50,pct60,pct70,pct80,pct90) %>% melt(id.vars = c('yr','natmean'))
  pdta$ratio = pdta$value / pdta$natmean
  pdta$variable = factor(pdta$variable, levels = paste('pct',1:9*10,sep = ''),labels = paste(1:9*10,'th percentile',sep = ''))
  
  pdf(paste('output/samp_',sname,'/f_',sname,'_natdecs.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = yr, y = ratio, lty = variable))+
    geom_line() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,3),name = 'Income relative to national mean',labels = percent) +
    scale_x_continuous(name = '') +
    theme_bw() +
    ggtitle(paste('Income at each decile relative to national mean over time,\n',tname,sep = ''))
  print(gplt)
  dev.off()  
  
  
  #Figure 5A: National rank of each decile of the black distribution over time
  pdta = dta %>% select(yr,black_rpct1,black_rpct2,black_rpct3,black_rpct4,black_rpct5,black_rpct6,black_rpct7,black_rpct8,black_rpct9) %>% melt(id.vars = 'yr')
  pdta$variable = factor(pdta$variable, levels = paste('black_rpct',1:9,sep = ''), labels = paste(1:9*10,'th percentile',sep = ''))
  
  pdf(paste('output/samp_',sname,'/f_',sname,'_decranks_b.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = yr, y = value, lty = variable))+
    geom_line() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,100),name = 'National income percentile') +
    scale_x_continuous(name = '') +
    theme_bw() +
    ggtitle(paste('A: National rank at each decile of black income distribution over time,\n',tname,sep = ''))
  print(gplt)
  dev.off()  
  
  #Export main sample into overall output folder
  if(sname == 'fambase') {
    pdf(paste('output/f5a_decranks_b_',sname,'_',v,'.pdf',sep = ''))
    print(gplt)
    dev.off()
  }
  
  #Figure 5B: Income at final rank of each black decile relative to national mean over time
  decpcts = filter(pdta, yr == 2016)

  natdecpcts = paste('pct',round(decpcts$value),sep = '')
  pdta = pctiles[,c('yr','natmean',natdecpcts)] %>% melt(id.vars = c('yr','natmean'))
  pdta$ratio = pdta$value / pdta$natmean
  pdta$variable = factor(pdta$variable, levels = natdecpcts,labels = paste(1:9*10,'th/',decpcts$value,'th percentile',sep = ''))
  #Rename 81st from 81th
  pdta$variable = gsub('81th','81st',as.character(pdta$variable))
  
  #Graph
  pdf(paste('output/samp_',sname,'/f_',sname,'_decrelnat_b.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = yr, y = ratio, lty = variable))+
    geom_line() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,1.8),name = 'Income relative to national mean',labels = percent) +
    scale_x_continuous(name = '') +
    theme_bw() +
    ggtitle(paste('B: Income at 2016 ranks of each decile of the black income distribution\nrelative to national mean over time, ',tname,sep = ''))
  print(gplt)
  dev.off()  
  
  #Export main sample into overall output folder
  if(sname == 'fambase') {
    pdf(paste('output/f5b_decrelnat_b_',sname,'_',v,'.pdf',sep = ''))
    print(gplt)
    dev.off()
  }
  
  #Auxilary figure: black-white ratios at each decile over time
  pdta = filter(fdta,samp == sname)[,c('yr',grep('rat_pct',names(fdta),value = T))] %>% melt(id.vars = 'yr')
  pdta$variable = factor(pdta$variable, levels = paste('rat_pct',1:9,sep = ''), labels = paste(1:9*10,'th percentile',sep = ''))
  
  pdf(paste('output/samp_',sname,'/f_',sname,'_decratio.pdf',sep = ''))
  gplt = ggplot(pdta, aes(x = yr, y = value, lty = variable)) +
    geom_line() +
    scale_linetype_discrete(name = '') +
    scale_y_continuous(limits = c(0,1.1*max(pdta$value)),name = 'Ratio of black to white incomes',labels = percent) +
    scale_x_continuous(name = '') +
    theme_bw() +
    ggtitle(paste('Ratio of black to white incomes at various income percentiles,\n',tname,sep = ''))
  print(gplt)
  dev.off()  
    
    
  #Figure 6A: Counterfactual median ratio with steady income distribution
  #Get ranks of median black and white in each year
  ranktime = dta %>% select(yr,black_rpct5,white_rpct5) %>% melt(id.vars = 'yr')
  names(ranktime) = c('yr','race','pctile')
  
  #Get income percentiles in 1968
  pct68 = pctiles %>% filter(yr == 1968) %>% melt(id.vars = c('samp'))
  pct68$pctile = as.numeric(gsub('pct','',pct68$variable))
  
  pdta = merge(ranktime,pct68,by= 'pctile',all.x = T)
  pdta = pdta %>% select(yr,race,value) %>% dcast(yr ~ race)
  pdta$ratio = pdta$black_rpct5 / pdta$white_rpct5
  
  pdf(paste('output/samp_',sname,'/f_',sname,'_cf_oldinc.pdf',sep = ''))
  gplt = ggplot(pdta,aes(x = yr,y = ratio)) +
    geom_line()+
    scale_x_continuous(name = '') +
    scale_y_continuous(limits = c(0,.8),labels = percent,name = paste('Ratio of median black to median white',tname)) + #1.1*max(pdta$ratio)
    theme_bw()+
    ggtitle(paste('A: Counterfactual ratio of black to white median income over time\nwith steady 1968 income distribution,',tname))
  print(gplt)
  dev.off()
  
  #Export main sample into overall output folder
  if(sname == 'fambase') {
    pdf(paste('output/f6a_cf_oldinc_',sname,'.pdf',sep = ''))
    print(gplt)
    dev.off()
  }
  
  
  #Figure 6B: Counterfactual median income ratio with steady 1968 ranks
  #Get median ranks in 1968
  brank68 = dta %>% filter(yr == 1968) %>% select(black_rpct5) %>% unlist %>% round
  wrank68 = dta %>% filter(yr == 1968) %>% select(white_rpct5) %>% unlist %>% round
  
  #Get those percentiles in every year, and compute ratio
  pdta = pctiles[,c('yr',paste('pct',brank68,sep = ''),paste('pct',wrank68,sep = ''))]
  pdta$ratio = pdta[,paste('pct',brank68,sep = '')] / pdta[,paste('pct',wrank68,sep = '')] 
  
  pdf(paste('output/samp_',sname,'/f_',sname,'_cf_oldrank.pdf',sep = ''))
  gplt = ggplot(pdta,aes(x = yr,y = ratio)) +
    geom_line()+
    scale_x_continuous(name = '') +
    scale_y_continuous(limits = c(0,.8),labels = percent,name = paste('Ratio of median black to median white',tname)) + #1.1*max(pdta$ratio)
    theme_bw()+
    ggtitle(paste('B: Counterfactual ratio of black to white median income over time\nwith steady 1968 ranks,',tname))
  print(gplt)
  dev.off()
  
  #Export main sample into overall output folder
  if(sname == 'fambase') {
    pdf(paste('output/f6b_cf_oldrank_',sname,'.pdf',sep = ''))
    print(gplt)
    dev.off()
  }
  
  } #sample
