This is a data demo for performing a PCA on multiple climate raster layers with rasterPCA() function.

Load libraries and grabbing dataset

#libraries related to maps
library(sp)
library(raster)
library(maps)
library(mapdata)
library(RStoolbox)# rasterPCA function

#packages for reading in data
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:raster':
## 
##     shift
#reading in dataset
dat<-fread("https://raw.githubusercontent.com/adnguyen/HelmsCahan_CBP-partA_2016/master/Script_Analyses/Sampling_sites_table.csv")

looking at the data

dat<-data.frame(dat)
str(dat)
## 'data.frame':    25 obs. of  7 variables:
##  $ n        : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Locale   : chr  "Notchview" "Smokey Mountains" "Molly Bog" "Kennebec Highlands" ...
##  $ State    : chr  "MA" "TN" "VT" "ME" ...
##  $ Site.ID  : chr  "NO" "GP" "MB" "KH" ...
##  $ Longitude: num  -73 -83.5 -72.6 -69.9 -82 ...
##  $ Latitude : num  42.5 35.6 44.5 44.6 35.9 ...
##  $ Tmax     : num  24.8 25 25.3 25.9 26 26 26 26.4 26.5 26.6 ...

grab bioclim climate variables and add to dataset

w <- getData('worldclim', var='bio', res=2.5) # downloading data
plot(w,1) # plot of mean annual temperature

#dbio1 <- extract(w, dat[,c("Longitude","Latitude")]) # by column
dbio1 <- extract(w, dat[,5:6]) #grabbing bioclim variables based on coordinates
full.dat <- data.frame(cbind(dat, dbio1[,1:19])) # combining orginal data set with bioclim variables

Plot eastern US unmanipulated climate data

plot(w, 1, xlim=c(-130, -65), ylim=c(25,50), axes=F, legend=F, col=colorRampPalette(c("lightblue","orange"))(255),main="",box=FALSE)
map("worldHires",c("USA","Canada"),add=TRUE)
map("state", c('florida', 'south carolina', 'north carolina', 'georgia', 'virginia', 'west virginia', 'maryland', 'delaware', 'new jersey', 'rhode island', 'new york', 'connecticut', 'massachusetts', 'pennyslvania', 'vermont', 'new hampshire', 'maine', 'alabama', 'tennessee', 'kentucky', 'ohio','iowa','illinois','arkansas','missouri','minnesota','wisconsin','michigan','louisiana','mississippi',"texas","arizona","illinois","california","oregon","utah","washington","kansas","new mexico","montana","idaho","wyoming","north dakota","south dakota","nebraska","oklahoma"), add = TRUE)

Using rasterPCA function

Climate is usually correlated and there are a lot of them. One way to reduce the complexity is to perform a PCA on those variables (19 of them). Also, PCA’ing the whole word will take a long time, so crop the desired region first.

#look at range of lat and lon
range(full.dat[,5]) #lon range
## [1] -83.94955 -68.51740
range(full.dat[,6]) # lat range
## [1] 33.55605 44.98180
#designating limits
lims<-c(-85,-65,30,50)

submap<-crop(w,lims)
plot(submap,1)#plot MAT
map("worldHires",c("USA","Canada"),add=TRUE)
map("state", c('florida', 'south carolina', 'north carolina', 'georgia', 'virginia', 'west virginia', 'maryland', 'delaware', 'new jersey', 'rhode island', 'new york', 'connecticut', 'massachusetts', 'pennyslvania', 'vermont', 'new hampshire', 'maine', 'alabama', 'tennessee', 'kentucky', 'ohio','iowa','illinois','arkansas','missouri','minnesota','wisconsin','michigan','louisiana','mississippi',"texas","arizona","illinois","california","oregon","utah","washington","kansas","new mexico","montana","idaho","wyoming","north dakota","south dakota","nebraska","oklahoma"), add = TRUE)

#now we can do pca
pcamap<-rasterPCA(submap,spca=TRUE)
#check loadings and eigenvalues
knitr::kable(round(pcamap$model$loadings[,1:3],3)) # top 3 loadings
Comp.1 Comp.2 Comp.3
bio1 -0.273 -0.181 0.114
bio2 -0.142 -0.174 -0.148
bio3 -0.271 -0.142 -0.011
bio4 0.276 0.062 -0.134
bio5 -0.248 -0.262 0.119
bio6 -0.270 -0.135 0.171
bio7 0.258 0.037 -0.188
bio8 -0.092 -0.415 -0.100
bio9 -0.258 0.014 0.030
bio10 -0.259 -0.228 0.114
bio11 -0.276 -0.145 0.130
bio12 -0.245 0.278 -0.165
bio13 -0.218 0.119 -0.397
bio14 -0.199 0.390 0.111
bio15 0.065 -0.294 -0.503
bio16 -0.212 0.131 -0.415
bio17 -0.223 0.353 0.084
bio18 -0.194 0.003 -0.445
bio19 -0.232 0.329 -0.018
#eigenvalues
summary(pcamap$model)
## Importance of components:
##                          Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     3.421141 1.6595794 1.5312287 0.97641761 0.83951692
## Proportion of Variance 0.616011 0.1449581 0.1234032 0.05017849 0.03709414
## Cumulative Proportion  0.616011 0.7609691 0.8843723 0.93455080 0.97164494
##                            Comp.6      Comp.7      Comp.8      Comp.9
## Standard deviation     0.45362675 0.390520722 0.270717128 0.198345152
## Proportion of Variance 0.01083038 0.008026654 0.003857251 0.002070568
## Cumulative Proportion  0.98247532 0.990501979 0.994359229 0.996429798
##                            Comp.10     Comp.11      Comp.12     Comp.13
## Standard deviation     0.151121876 0.141143325 0.1000572288 0.077941692
## Proportion of Variance 0.001201991 0.001048497 0.0005269184 0.000319732
## Cumulative Proportion  0.997631788 0.998680285 0.9992072035 0.999526936
##                             Comp.14      Comp.15      Comp.16      Comp.17
## Standard deviation     0.0659197158 0.0504049086 4.248602e-02 1.495290e-02
## Proportion of Variance 0.0002287057 0.0001337187 9.500327e-05 1.176785e-05
## Cumulative Proportion  0.9997556412 0.9998893599 9.999844e-01 9.999961e-01
##                             Comp.18      Comp.19
## Standard deviation     8.573811e-03 7.041924e-09
## Proportion of Variance 3.868959e-06 2.609931e-18
## Cumulative Proportion  1.000000e+00 1.000000e+00
#plot first pca1
plot(pcamap$map,1) #plot the first pca!
map("worldHires",c("USA","Canada"),add=TRUE)
map("state", c('florida', 'south carolina', 'north carolina', 'georgia', 'virginia', 'west virginia', 'maryland', 'delaware', 'new jersey', 'rhode island', 'new york', 'connecticut', 'massachusetts', 'pennyslvania', 'vermont', 'new hampshire', 'maine', 'alabama', 'tennessee', 'kentucky', 'ohio','iowa','illinois','arkansas','missouri','minnesota','wisconsin','michigan','louisiana','mississippi',"texas","arizona","illinois","california","oregon","utah","washington","kansas","new mexico","montana","idaho","wyoming","north dakota","south dakota","nebraska","oklahoma"), add = TRUE)

compare pca of climate of sites vs raster layer

datpca<-princomp(scale(full.dat[,8:26]))
knitr::kable(round(datpca$loadings[,1:3],3))# pca of sites
Comp.1 Comp.2 Comp.3
bio1 -0.244 0.266 0.037
bio2 -0.178 0.090 0.483
bio3 -0.288 0.091 0.062
bio4 0.298 -0.057 0.116
bio5 -0.158 0.360 0.203
bio6 -0.263 0.218 -0.072
bio7 0.283 -0.062 0.261
bio8 -0.015 0.308 -0.506
bio9 -0.234 0.094 0.264
bio10 -0.190 0.337 0.112
bio11 -0.268 0.215 -0.009
bio12 -0.249 -0.254 -0.094
bio13 -0.226 -0.245 -0.030
bio14 -0.202 -0.304 0.071
bio15 0.094 0.242 -0.217
bio16 -0.244 -0.232 -0.143
bio17 -0.225 -0.287 0.031
bio18 -0.243 -0.116 -0.437
bio19 -0.262 -0.199 0.148
#looks similar

Session Information

sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.12.2 (Sierra)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] data.table_1.9.6 RStoolbox_0.1.6  mapdata_2.2-6    maps_3.1.1      
## [5] raster_2.5-8     sp_1.2-3        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.7        highr_0.6          nloptr_1.0.4      
##  [4] formatR_1.4        plyr_1.8.4         prettydoc_0.2.0   
##  [7] iterators_1.0.8    tools_3.3.1        digest_0.6.10     
## [10] lme4_1.1-12        evaluate_0.9       gtable_0.2.0      
## [13] nlme_3.1-128       lattice_0.20-33    mgcv_1.8-12       
## [16] Matrix_1.2-6       foreach_1.4.3      rgdal_1.1-10      
## [19] curl_2.1           parallel_3.3.1     yaml_2.1.13       
## [22] SparseM_1.74       stringr_1.1.0      knitr_1.14        
## [25] rgeos_0.3-21       MatrixModels_0.4-1 stats4_3.3.1      
## [28] rprojroot_1.2      grid_3.3.1         caret_6.0-73      
## [31] nnet_7.3-12        XML_3.98-1.4       rmarkdown_1.3     
## [34] minqa_1.2.4        ggplot2_2.1.0      reshape2_1.4.1    
## [37] car_2.1-4          magrittr_1.5       splines_3.3.1     
## [40] backports_1.0.5    scales_0.4.0       codetools_0.2-14  
## [43] ModelMetrics_1.1.0 htmltools_0.3.5    MASS_7.3-45       
## [46] pbkrtest_0.4-6     geosphere_1.5-5    colorspace_1.2-6  
## [49] quantreg_5.29      stringi_1.1.2      doParallel_1.0.10 
## [52] munsell_0.4.3      chron_2.3-47