This is a file from the Wikimedia Commons

File:IncomeInequality7.svg

From Wikipedia, the free encyclopedia
Jump to navigation Jump to search
Original file(SVG file, nominally 630 × 630 pixels, file size: 116 KB)

Summary[edit]

Description
Date
Source Own work
Author DavidMCEddy
R logo.svg
This W3C-unspecified chart was created with R:

R code

#  Charts based on Piketty data NOTE:  The Wikimedia comments display deletes leading '#'
#   which is a comment character in R.  
#   However, if you edit -> copy these comments, you should get 
#   a working R script with instructions on how to create this plot.  
###
###
### Create a plot of income inequality in the US
### merging US Census Table F-1
### Income Limits for Each Fifth and Top 5 Percent of Families
###      (All Races)
###
### with Piketty & Saez of "tax units" = families
###    There are slight systematic differences
### between these two sources, but the differences are modest
### relative to the overall image esp. of the top earners
###
###

##
## 1.  Census F-1:
##     Go to
##http://www.census.gov/hhes/www/income/data/historical/inequality/index.html
## Select "Table F-1 All Races".  This should download
##     'F01AR_2010.xls' # put in the working directory
(F01_ <- dir(pattern='^F01AR_'))
(F01.xls <- grep('\\.xls$', F01_, value=TRUE)[1])
# Confirm that this is what you want.

library(gdata)
dim(F01 <- read.xls(F01.xls, stringsAsFactors=FALSE))

(id <- grep('Dollars', F01[[1]]))
nyrs <- diff(id)-1
dim(F01adj <- read.xls(F01.xls, stringsAsFactors=FALSE,
                   skip=id[2], nrows=nyrs))
names(F01adj) <- c('Year', 'Number.thousands',
                   paste('quintile', 1:4, sep=''), 'p95')

decrypt <- function(x){
# Delete commas (thousand separators) and footnote references
  x1 <- gsub(',', '', x)
  x2 <- strsplit(x1, ' ')
  x. <- sapply(x2, '[', 1)
  x.[x1==''] <- NA
  as.numeric(x.)
}

str(F01Adj <- as.data.frame(lapply(F01adj[nyrs:1, 1:7], decrypt)))
# check
matplot(F01Adj$Year, F01Adj[3:7], log='y', type='l')

##
## 2.  Piketty & Saez 2008 update
##     Go to http://elsa.berkeley.edu/~saez/
##     Download
## (Tables and Figures Updated to 2008 in Excel format, July 2010)
##  with "Income and Wealth Inequality"
## "Income Inequality in the United States, 1913-1998"
## with Thomas Piketty, Quarterly Journal of Economics, 118(1), 2003, 1-39
## It should download as 'TabFig2008.xls';  put in the working directory
##
(PikSaez_ <- dir(pattern='^TabFig20'))
(PikSaez.xls <- grep('\\.xls', PikSaez_, value=TRUE))

dim(PikS <- read.xls(PikSaez.xls, skip=3, sheet='Table A6',
                stringsAsFactors=FALSE))
#  should start with 1913
# which is on row 6 of the file, but read.xls thinks it's row 4
PikS[[1]] # check

(PikHeaders <- read.xls(PikSaez.xls, nrows=4, sheet='Table A6',
                       stringsAsFactors=FALSE))
(PikH <- as.character(PikHeaders[2, ]))
names(PikS) <- c('year', PikH[-1])

str(PikSaez <- as.data.frame(lapply(PikS[, c(1, 16:21)], decrypt)))

table(sel47. <- (PikSaez$year>1946))
str(PikS. <- rbind(PikSaez[sel47., ], NA, NA))

##
## 3.  GDP
##     Go to
##     http://www.measuringworth.org/usgdp/
##     Select all variables from 1790 to the present
##     Copy and paste into a spreadsheet,
##     save as usgdp.csv with 1 header row, data starts on row 2
##

usgdp <- 'usgdp.csv'
str(usgdp <- read.csv(usgdp))
# select years,
plot(GDP.Deflator...index.2005.100.~Year, usgdp, log='y')

str(GDP <- usgdp[usgdp$Year>1946,])

(n. <- nrow(GDP))
GDP[n.,]
GDPadj <- GDP$GDP.Deflator[n.]/100
str(GDP. <- data.frame(Year=GDP$Year,
                       realGDP.M=GDPadj*GDP$Real.GDP..,
                        GDP.Deflator=GDP$GDP.Deflator,
                        PopulationK=GDP$Population,
                        realGDPperCap=GDPadj*GDP$Real.GDP.per))

##
## 4.  Merge
##
# 4.1.  Median
# median approximated by the geometric mean of quintiles 3 and 4.
# This would be correct for a lognormal distribution,
# which is a reasonable approximation for many quantities
# involving money
F01. <- with(F01Adj, data.frame(Year, Number.thousands, quintile1,
                                quintile2,
                                median=sqrt(quintile2*quintile3),
                                quintile3, quintile4, p95))
str(F01.)
# 4.2.  Adjust PikSaez from 2008 dollars
#       to 2010 dollars used in the Census table F-1

(PSbad <- is.na(PikS.[2]))
(PSlast <- max(PikS.[!PSbad, 1]))

(PSlastRow <- which(F01[[1]]==PSlast)[1])
(F01lastRow <- which(F01[[1]]==F01.$Year[nyrs])[1])
(p95.last2 <- decrypt(F01[c(PSlastRow, F01lastRow), 7]))
(PSinflat <- p95.last2[2]/p95.last2[1])

# 4.3.  Combine

str(F01.PS <- cbind(F01., PSinflat*PikS.[-1], GDP.[-1]))
F01.PS$IRS.censusP95 <- with(F01.PS, P95/p95)
F01.PS$household <- with(F01.PS, PopulationK/Number.thousands)
F01.PS$GDPperHouse <- with(F01.PS, household*realGDPperCap)
F01.PS$mean.median <- with(F01.PS, GDPperHouse/median)
F01.PS[n.-2,]

##
## 5.  preliminary plots
##

#  5.1.  a few individual plots
plot(IRS.censusP95~Year, F01.PS, type='b')
# starts ~0.74, trends rapidly up to ~0.97,
# then drifts back to ~0.75

plot(household~Year, F01.PS, type='b')

# drifts around between ~3.71 and 4.01

plot(GDPperHouse~Year, F01.PS, type='b')

names(F01.PS)

##
## 6.  Combined plot
##
#  6.1.  Plot to a file?
svg('incomeInequality7.svg')

#  6.2.  Select columns to plot
#plotCols <- c(3:14, 21) # for all $ on the same plot
plotCols <- c(3, 5, 7:8, 11, 13:14)

#  6.3.  base plot
names(F01.PS)[plotCols]
(kcols <- length(plotCols))
(plotColors <- c(1:(kcols-1), kcols+1))
plotLty <- 1:kcols

op <- par(mar=c(5, 4, 4, 5)+0.1)

matplot(F01.PS$Year, F01.PS[plotCols]/1000,
        log='y', type='l', xlab='', ylab='', las=1,
        axes=FALSE, lwd=3, col=plotColors, lty=plotLty)
axis(1, at=seq(1950, 2010, 10),
     labels=c(1950, NA, 1970, NA, 1990, NA, 2010), cex.axis=1.5)
yat <- c(10, 50, 100, 500, 1000, 5000, 10000)
axis(2, yat, labels=c('$10K', '$50K', '$100K', '$500K',
             '$1M', '$5M', '$10M'), las=1, cex.axis=1.2)

#  6.4.  Label the lines
#names(F01.PS)
lineLbls <- c('Year', 'thousands', '20%', '40%', '50%', '60%', '80%',
    '95%', '90%', '95%', '99%', '99.5%', '99.9%', '99.99%')[plotCols]
sel75 <- (F01.PS$Year==1975)

text(1973.5, 1.1*F01.PS[sel75, plotCols]/1000, lineLbls, cex=1.2)

#*** Growth broadly shared 1947 - 1970, then began diverging
#*** The divergence has been most pronouced among the top 1%
#*** and especially the top 0.01%

#  6.5.  Growth rate by quantile 1947-1970 and 1970 - present

keyYears <- c(1947, 1970, 2008)
(iYears <- which(F01.PS$Year %in% keyYears))
(dYears <- diff(keyYears))
kk <- length(keyYears)
(lblYrs <- paste(keyYears[-kk], keyYears[-1], sep='-'))

(growth <- sapply(F01.PS[iYears,], function(x, labels=lblYrs){
    dxi <- exp(diff(log(x)))
    names(dxi) <- labels
    dxi
} ))

(gr <- round(100*(growth-1)))
#         Year NumberK quintile1 quintile2 median quintile3 quintile4
#1947.1970 1.2    40.3      89.0      91.0   91.2      91.3      85.3
#1970.2008 1.9    51.0      10.2      19.8   26.8      34.1      47.3
#           p95   P90   P95  P99 P99.5 P99.9 P99.99 realGDP.M GDP.Deflator
#1947.1970 76.3 113.3 107.0 59.7  47.4  30.6   33.4     140.4         77.0
#1970.2008 66.7  31.5  45.2 82.9  99.1 189.0  361.3     208.2        346.4
#          PopulationK realGDPperCap IRS.census household GDPperHouse
#1947.1970        42.3          68.9       17.4       1.5        71.4
#1970.2008        48.6         107.4      -12.9      -1.6       104.1
#          mean.median
#1947.1970       -10.3
#1970.2008        61.0

(grYr <- growth^(1/dYears))
(grYr. <- round(100*(grYr-1), 1))

#  6.6.  Regression line:  linear spline

names(F01.PS)
(varyg <- c(3:14, 21))
Varyg <- names(F01.PS)[varyg]
str(F01ps <- reshape(F01.PS[c(1, varyg)], idvar='Year', ids=F01.PS$Year,
                     times=Varyg, timevar='pctile',
                     varying=list(Varyg), direction='long'))
F01ps$pctile <- factor(F01ps$pctile)
F01ps$t1970p <- pmax(0, F01ps$Year-1970)

table(nas <- is.na(F01ps$quintile1))

(Fit <- lm(log(quintile1/1000)~pctile+Year+pctile*t1970p,
           F01ps[!nas, ]))
#          contrasts=contrasts(F01ps$pctile, contrasts=FALSE)))
str(Pred <- predict(Fit))

table(F01ps$pctile)

plotCols
names(F01.PS)[plotCols]

End <- numeric(kcols)
for(i in seq(length=kcols)){
  F01i <- F01ps[!nas, ]
  seli <- (as.character(F01i$pctile) == names(F01.PS)[plotCols[i]])
#  with(F01i[seli, ], lines(Year, exp(Pred[seli]), col=plotColors[i]))
  yri <- F01i$Year[seli]
  predi <- exp(Pred[seli])
  lines(yri, predi, col=plotColors[i])
  End[i] <- predi[length(predi)]
  sel70i <- (yri==1970)
  points(yri[sel70i], predi[sel70i], col=plotColors[i])
}

coef(Fit)

#  6.7.  label growth rates

#(gR <- growthRates[3, ])
#(gR. <- paste(format(gR*100, digits=1), '%', sep=''))
(lastYr. <- max(lastYrs)+3)
#text(lastYr., End, gR., xpd=NA)
text(lastYr., End, paste(gr[2, plotCols], '%', sep=''), xpd=NA)
text(lastYr.+7, End, paste(grYr.[2, plotCols], '%', sep=''), xpd=NA)

(emax <- max(End))

(lastRng <- dimnames(gr)[[1]][2])

(lastHead <- paste('Growth\n', lastRng, '\ncum   annual', sep=''))
#text(lastYr.+3.5, emax*2, lastHead, xpd=NA)

#  6.8.  Label the presidents

abline(v=c(1953, 1961, 1969, 1977, 1981, 1989, 1993, 2001, 2009))
names(F01.PS)
(m99.95 <- with(F01.PS, sqrt(P99.9*P99.99))/1000)

text(1949, 5000, 'Truman')
text(1956.8, 5000, 'Eisenhower', srt=90)
text(1963, 5000, 'Kennedy', srt=90)
text(1966.8, 5000, 'Johnson', srt=90)
text(1971, 5*m99.95[24], 'Nixon', srt=90)
text(1975, 5*m99.95[28], 'Ford', srt=90)
text(1978.5, 5*m99.95[32], 'Carter', srt=90)
text(1985.1, m99.95[38], 'Reagan' )
text(1991, 0.94*m99.95[44], 'GHW Bush', srt=90)
text(1997, m99.95[50], 'Clinton')
text(2005, 1.1*m99.95[58], 'GW Bush', srt=90)
text(2010, 1.2*m99.95[62], 'Obama', srt=90)

#  6.9.  Done
dev.off()


Licensing[edit]

I, the copyright holder of this work, hereby publish it under the following license:
w:en:Creative Commons

attribution share alike

This file is licensed under the Creative Commons Attribution-Share Alike 3.0 Unported license.
You are free:
  • to share – to copy, distribute and transmit the work
  • to remix – to adapt the work
Under the following conditions:
  • attribution – You must attribute the work in the manner specified by the author or licensor (but not in any way that suggests that they endorse you or your use of the work).
  • share alike – If you alter, transform, or build upon this work, you may distribute the resulting work only under the same or similar license to this one.

File history

Click on a date/time to view the file as it appeared at that time.

Date/TimeThumbnailDimensionsUserComment
current04:15, 10 June 2012Thumbnail for version as of 04:15, 10 June 2012630 × 630 (116 KB)DavidMCEddyupdate using more recent data
19:01, 14 February 2012Thumbnail for version as of 19:01, 14 February 2012630 × 630 (116 KB)DavidMCEddy
No pages on the English Wikipedia link to this file. (Pages on other projects are not listed.)

Metadata