The relationship ndvi vs. savanna tree density across pixel resolution

knitr::opts_chunk$set(fig.path = "figure/RmcgwireNewv20180417_")

Preliminaries

#rwd <- "/media/Iomega_HDD/mcgwire/Rmcgwire"
rastdir <- ".."
vectdir <- "../OTB"

require(rgdal)
require(raster)
#require(gplots)
require(ggplot2)
require(gridExtra)
require(colorRamps)
require(RStoolbox)
source("mcgwirecoarsen.R")

1. Input Data

1.1 TM images

30m resolution #### Multi-band brick object

Note we assume UTM N11 (California) but arbitrary coordinates

tmbrick <- brick(file.path(rastdir,"tm_all.tif"))
projection(tmbrick)
## [1] "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"

(actual location not revealed by the author? check original data)
discard thermal band (which had been moved to 7th position)

tm <- tmbrick[[1:6]] 
names(tm) <- paste("tm",c(1:5,7),sep="")
#plotRGB(tm,r=2,g=4,b=3,stretch="lin")
ggRGB(tm,r=5,g=4,b=2,stretch="lin") + 
  ggtitle("TM false color composite (5,4,2) at 30m resolution")

1.2 Percent of trees

10m resolution

trees10 <- raster(file.path(rastdir,"trees10v2.tif"))
trees10[is.na(trees10)] <- 0 #0 is not NA
ggR(trees10, geom_raster=TRUE) + 
  scale_fill_gradientn(name = "Tree density", colours = c("white",matlab.like2(9))) +
  ggtitle("Tree density at 10 m resolution")

2. Calculate ndvi and average percent tree cover within each cell grid at full (30m) resolution

2.1. Calculate NDVI “image”

We make a crude approximation here, as at the bands should be converted to Surface Reflectance.

ndvi <- (tm[[4]] - tm[[3]])/(tm[[4]] + tm[[3]])
names(ndvi) <- "ndvi"
#ggR(ndvi,stretch="lin")
ggR(ndvi, geom_raster=TRUE) + 
  scale_fill_gradientn(name = "NDVI", colours = c("white",matlab.like2(9))) +
  ggtitle("NDVI at 30m resolution")

writeRaster(ndvi,filename="ndvi30",format="GTiff",overwrite=TRUE)

2.2 Create Spatial Polygon Data Frame with tm and ndvi values for each cell

Note: SPolDF is the R object for a polygon vector layer in GIS We add the ndvi layer to the brick

tmn <- addLayer(tm,ndvi)
tmn #check
## class       : RasterStack 
## dimensions  : 202, 114, 23028, 7  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 210, 3630, 210, 6270  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       :         tm1,         tm2,         tm3,         tm4,         tm5,         tm7,        ndvi 
## min values  :  2.00000000,  5.00000000,  7.00000000,  9.00000000,  4.00000000,  3.00000000, -0.02362205 
## max values  :  92.0000000,  64.0000000, 117.0000000, 118.0000000, 255.0000000, 175.0000000,   0.4444444

Polygonize

tmg <- rasterToPolygons(tmn*1.0, fun=NULL)
class(tmg) #check class
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(tmg, max.level=2)#investigate structure; max.level=2 to avoid recursive info
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
##   ..@ data       :'data.frame':  23028 obs. of  7 variables:
##   ..@ polygons   :List of 23028
##   .. .. [list output truncated]
##   ..@ plotOrder  : int [1:23028] 2 3 4 5 6 7 8 9 10 11 ...
##   ..@ bbox       : num [1:2, 1:2] 210 210 3630 6270
##   .. ..- attr(*, "dimnames")=List of 2
##   ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
tmg@data[1,] #check table
##   tm1 tm2 tm3 tm4 tm5 tm7      ndvi
## 1  38  26  44  73 107  49 0.2478632

We add a cell id

tmg@data <- cbind(cell=1:ncell(tm),tmg@data)
tmg@data[1:3,] #check table (print the first 3 rows)
##   cell tm1 tm2 tm3 tm4 tm5 tm7      ndvi
## 1    1  38  26  44  73 107  49 0.2478632
## 2    2  37  22  47  70 117  53 0.1965812
## 3    3  33  22  43  64 101  47 0.1962617
dim(tmg@data)
## [1] 23028     8

Write as Shapefile for QGIS

writeOGR(tmg,dsn="tmg",layer="tmg",driver="ESRI Shapefile",overwrite=TRUE)

2.3 Extract average percent tree for each 30m cell

The logical command would be

#trees30 <- extract(trees10,tmg,df=TRUE,fun=mean,na.rm=TRUE)

…but unfortunately is so slow for these many polygons that cannot be used in practice Using spatialEco::zonal.stats with this grid is also very slow:

#trees30 <- zonal.stats(x=tmg, y=trees10,stat=mean, trace = FALSE, plot = FALSE)

As adviced by RH, we rasterize and use zonal.
*(but note advive by Pierre Racine Pierre.Racine@sbf.ulaval.ca PostGIS WKT Raster was made to do exactly this kind of operation on large datasets. Give it a try! http://trac.osgeo.org/postgis/wiki/WKTRasterTutorial01)* We muste create a raster at 10m resolution with the cell ids of the TM raster at 30 m and use zonal() (raster over raster is much faster but requires both rasters to have identical geometry). to extract the tree density values at 30m resolution We first create a 30m grid raster

# tmgr <- rasterize(tmg,y=tm[[1]],field="cell")

or much faster:

tmgr <- tm[[1]]
tmgr[] <- 1:ncell(tmgr)

and increase resolution from 30m to 10m

tmgr10 <- disaggregate(tmgr,fact=3)
writeRaster(tmgr10,file="tmgr10",format="GTiff",datatype="INT2S",overwrite=TRUE)

For zonal, the 2 raster layers must have same extent and resolution, thus we crop the tree density layer to match the TM grid raster:

trees10_tm <- crop(trees10,tmgr)
writeRaster(trees10_tm,file="trees10_tm",format="GTiff",datatype="INT2S",overwrite=TRUE)

now ready for zonal:

trees30 <- zonal(trees10_tm,tmgr10,df=TRUE,fun=mean,na.rm=TRUE)

which produces a matrix with cell, average value

class(trees30)
## [1] "matrix"
trees30[1:3,]
##      zone    value
## [1,]    1 36.00000
## [2,]    2 41.77778
## [3,]    3 46.66667

Same nb. of rows than the table of the SPolDF calculated earlier

dim(trees30)
## [1] 23028     2
dim(tmg@data)
## [1] 23028     8

We add the column of ndvi values to the table

tmg@data <-cbind(tmg@data,percenttrees=trees30[,2])
tmg@data[1:3,]
##   cell tm1 tm2 tm3 tm4 tm5 tm7      ndvi percenttrees
## 1    1  38  26  44  73 107  49 0.2478632     36.00000
## 2    2  37  22  47  70 117  53 0.1965812     41.77778
## 3    3  33  22  43  64 101  47 0.1962617     46.66667

2.4 Relationship NDVI vs. Percent Tree Cover

“Ordinary” plot hides the frequency information:

ggplot(data=tmg@data,aes(x=percenttrees, y=ndvi)) +
  geom_point()

but a 2D histogram reveals the relationship

gg30 <- ggplot(data=tmg@data,aes(x=percenttrees, y=ndvi)) +
  geom_bin2d(bins=50) +
  scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
  geom_smooth(method='lm', col="black") +
  ggtitle("30m resolution")
gg30

treesndvi30lm <- lm(ndvi ~ percenttrees,data=tmg@data)
summary(treesndvi30lm)
## 
## Call:
## lm(formula = ndvi ~ percenttrees, data = tmg@data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20101 -0.03113 -0.00554  0.02588  0.35384 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.061e-02  4.851e-04   186.8   <2e-16 ***
## percenttrees 2.240e-03  2.064e-05   108.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04867 on 23026 degrees of freedom
## Multiple R-squared:  0.3384, Adjusted R-squared:  0.3384 
## F-statistic: 1.178e+04 on 1 and 23026 DF,  p-value: < 2.2e-16

The lm explains ~ 34% of the total variance on NDVI (i.e. Note that for percentree=0, NDVI has a certaing variability)

R2_30 <- summary(treesndvi30lm)$r.squared

3. Increasing coarseness

We integrate the above steps in one function and apply it at increasing coarsening levels (90m, 150m, 210m)

source("mcgwirecoarsen.R")

The inputs are the tm to be coarsened, the observed layer of percent tree cover and the coarsening factor The ouptut is a Spatial Polygons Data Frame with a table with the average TM values, ndvi and observed percent trre cover

tmg90 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=3)
tmg150 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=5)
tmg210 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=7)

Check one of the outputs:

tmg90@data[1:3,]
##   cell      tm1      tm2      tm3      tm4       tm5      tm7      ndvi
## 1    1 33.44444 22.77778 42.22222 69.00000 103.55556 47.55556 0.2407592
## 2    2 31.55556 21.55556 40.66667 66.11111  97.55556 43.77778 0.2382934
## 3    3 29.77778 21.22222 37.88889 66.11111  94.11111 41.77778 0.2713675
##   percenttrees
## 1     43.01235
## 2     41.72840
## 3     41.18519

Once we have the output data tables, we fit the linear models and plot:

gg90 <- ggplot(data=tmg90@data,aes(x=percenttrees, y=ndvi)) +
  geom_bin2d(bins=50) +
  scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
  geom_smooth(method='lm', col="black") +
  ggtitle("90m resolution")
treesndvi90lm <- lm(ndvi ~ percenttrees,data=tmg90@data)
R2_90 <- summary(treesndvi90lm)$r.squared

gg150 <- ggplot(data=tmg150@data,aes(x=percenttrees, y=ndvi)) +
  geom_bin2d(bins=50) +
  scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
  geom_smooth(method='lm', col="black") +
  ggtitle("150m resolution")
treesndvi150lm <- lm(ndvi ~ percenttrees,data=tmg150@data)
R2_150 <- summary(treesndvi150lm)$r.squared

gg210 <- ggplot(data=tmg210@data,aes(x=percenttrees, y=ndvi)) +
  geom_bin2d(bins=50) +
  scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
  geom_smooth(method='lm', col="black") +
  ggtitle("210m resolution")
treesndvi210lm <- lm(ndvi ~ percenttrees,data=tmg210@data)
R2_210 <- summary(treesndvi210lm)$r.squared
  • Linear models of NDVI vs. Percent Tree Coverincreasing coarseness“) *
grid.arrange(gg30, gg90, gg150, gg210, nrow = 2)

rm(gg30, gg90, gg150, gg210)

We observe an increase in R2 with increasing coarsening, in particular up to 150m

ggplot(data=data.frame(x=c(30,90,150,210),y=c(R2_30, R2_90, R2_150, R2_210))) +
  geom_line(aes(x,y)) +
  geom_point(aes(x,y)) +
  xlab("Resolution (m)") + ylab("R^2")

4. Compare data and predicted image at 150m resolution

We select 150m as the best compromise between R2 and coarseness We calculate and save as tiff the TM bands and NDVI at 150 m resolution

tm150 <- aggregate(tmn,fact=5,fun=mean,na.rm=TRUE)
ndvi150 <- (tm150[[4]] - tm150[[3]])/(tm150[[4]] + tm150[[3]])
writeRaster(tm150,file="tm150",format="GTiff",overwrite=TRUE)
writeRaster(ndvi150,file="ndvi150",format="GTiff",overwrite=TRUE)
ggR(ndvi150, geom_raster=TRUE) + 
  scale_fill_gradientn(name = "NDVI", colours = c("white",matlab.like2(9))) +
  ggtitle("NDVI at 150m resolution")

We calculate and save as tiff the observed layer of Percent tree cover at 150 m resolution

treesobsv150 <- aggregate(trees,fact=15,fun=mean,na.rm=TRUE)
writeRaster(treesobsv150,file="treesobsv150",format="GTiff",overwrite=TRUE)
ggobsv <- ggR(treesobsv150, geom_raster=TRUE) + 
  scale_fill_gradientn(name = "Tree Density", colours = c("white",matlab.like2(9))) +
  ggtitle("Observed Tree Density at 150m resolution")

Using the linear model, we calculate and save as tiff the predicted layer of Percent tree cover at 150 m resolution

coef150 <- coefficients(treesndvi150lm)
treespred150 <- (ndvi150-coef150[1])/coef150[2]
writeRaster(treespred150,file="treespred150",format="GTiff",overwrite=TRUE)
ggpred <- ggR(treespred150, geom_raster=TRUE) + 
  scale_fill_gradientn(name = "Tree Density", colours = c("white",matlab.like2(9))) +
  ggtitle("Predicted Tree Density at 150m resolution")
grid.arrange(ggobsv, ggpred, ncol=2)

And explore in QGIS

#