knitr::opts_chunk$set(fig.path = "figure/RmcgwireNewv20180417_")
#rwd <- "/media/Iomega_HDD/mcgwire/Rmcgwire"
rastdir <- ".."
vectdir <- "../OTB"
require(rgdal)
require(raster)
#require(gplots)
require(ggplot2)
require(gridExtra)
require(colorRamps)
require(RStoolbox)
source("mcgwirecoarsen.R")
30m resolution #### Multi-band brick object
Note we assume UTM N11 (California) but arbitrary coordinates
tmbrick <- brick(file.path(rastdir,"tm_all.tif"))
projection(tmbrick)
## [1] "+proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
(actual location not revealed by the author? check original data)
discard thermal band (which had been moved to 7th position)
tm <- tmbrick[[1:6]]
names(tm) <- paste("tm",c(1:5,7),sep="")
#plotRGB(tm,r=2,g=4,b=3,stretch="lin")
ggRGB(tm,r=5,g=4,b=2,stretch="lin") +
ggtitle("TM false color composite (5,4,2) at 30m resolution")
10m resolution
trees10 <- raster(file.path(rastdir,"trees10v2.tif"))
trees10[is.na(trees10)] <- 0 #0 is not NA
ggR(trees10, geom_raster=TRUE) +
scale_fill_gradientn(name = "Tree density", colours = c("white",matlab.like2(9))) +
ggtitle("Tree density at 10 m resolution")
We make a crude approximation here, as at the bands should be converted to Surface Reflectance.
ndvi <- (tm[[4]] - tm[[3]])/(tm[[4]] + tm[[3]])
names(ndvi) <- "ndvi"
#ggR(ndvi,stretch="lin")
ggR(ndvi, geom_raster=TRUE) +
scale_fill_gradientn(name = "NDVI", colours = c("white",matlab.like2(9))) +
ggtitle("NDVI at 30m resolution")
writeRaster(ndvi,filename="ndvi30",format="GTiff",overwrite=TRUE)
Note: SPolDF is the R object for a polygon vector layer in GIS We add the ndvi layer to the brick
tmn <- addLayer(tm,ndvi)
tmn #check
## class : RasterStack
## dimensions : 202, 114, 23028, 7 (nrow, ncol, ncell, nlayers)
## resolution : 30, 30 (x, y)
## extent : 210, 3630, 210, 6270 (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=11 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## names : tm1, tm2, tm3, tm4, tm5, tm7, ndvi
## min values : 2.00000000, 5.00000000, 7.00000000, 9.00000000, 4.00000000, 3.00000000, -0.02362205
## max values : 92.0000000, 64.0000000, 117.0000000, 118.0000000, 255.0000000, 175.0000000, 0.4444444
Polygonize
tmg <- rasterToPolygons(tmn*1.0, fun=NULL)
class(tmg) #check class
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
str(tmg, max.level=2)#investigate structure; max.level=2 to avoid recursive info
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 23028 obs. of 7 variables:
## ..@ polygons :List of 23028
## .. .. [list output truncated]
## ..@ plotOrder : int [1:23028] 2 3 4 5 6 7 8 9 10 11 ...
## ..@ bbox : num [1:2, 1:2] 210 210 3630 6270
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
tmg@data[1,] #check table
## tm1 tm2 tm3 tm4 tm5 tm7 ndvi
## 1 38 26 44 73 107 49 0.2478632
We add a cell id
tmg@data <- cbind(cell=1:ncell(tm),tmg@data)
tmg@data[1:3,] #check table (print the first 3 rows)
## cell tm1 tm2 tm3 tm4 tm5 tm7 ndvi
## 1 1 38 26 44 73 107 49 0.2478632
## 2 2 37 22 47 70 117 53 0.1965812
## 3 3 33 22 43 64 101 47 0.1962617
dim(tmg@data)
## [1] 23028 8
Write as Shapefile for QGIS
writeOGR(tmg,dsn="tmg",layer="tmg",driver="ESRI Shapefile",overwrite=TRUE)
The logical command would be
#trees30 <- extract(trees10,tmg,df=TRUE,fun=mean,na.rm=TRUE)
…but unfortunately is so slow for these many polygons that cannot be used in practice Using spatialEco::zonal.stats with this grid is also very slow:
#trees30 <- zonal.stats(x=tmg, y=trees10,stat=mean, trace = FALSE, plot = FALSE)
As adviced by RH, we rasterize and use zonal.
*(but note advive by Pierre Racine Pierre.Racine@sbf.ulaval.ca PostGIS WKT Raster was made to do exactly this kind of operation on large datasets. Give it a try! http://trac.osgeo.org/postgis/wiki/WKTRasterTutorial01)* We muste create a raster at 10m resolution with the cell ids of the TM raster at 30 m and use zonal() (raster over raster is much faster but requires both rasters to have identical geometry). to extract the tree density values at 30m resolution We first create a 30m grid raster
# tmgr <- rasterize(tmg,y=tm[[1]],field="cell")
or much faster:
tmgr <- tm[[1]]
tmgr[] <- 1:ncell(tmgr)
and increase resolution from 30m to 10m
tmgr10 <- disaggregate(tmgr,fact=3)
writeRaster(tmgr10,file="tmgr10",format="GTiff",datatype="INT2S",overwrite=TRUE)
For zonal, the 2 raster layers must have same extent and resolution, thus we crop the tree density layer to match the TM grid raster:
trees10_tm <- crop(trees10,tmgr)
writeRaster(trees10_tm,file="trees10_tm",format="GTiff",datatype="INT2S",overwrite=TRUE)
now ready for zonal:
trees30 <- zonal(trees10_tm,tmgr10,df=TRUE,fun=mean,na.rm=TRUE)
which produces a matrix with cell, average value
class(trees30)
## [1] "matrix"
trees30[1:3,]
## zone value
## [1,] 1 36.00000
## [2,] 2 41.77778
## [3,] 3 46.66667
Same nb. of rows than the table of the SPolDF calculated earlier
dim(trees30)
## [1] 23028 2
dim(tmg@data)
## [1] 23028 8
We add the column of ndvi values to the table
tmg@data <-cbind(tmg@data,percenttrees=trees30[,2])
tmg@data[1:3,]
## cell tm1 tm2 tm3 tm4 tm5 tm7 ndvi percenttrees
## 1 1 38 26 44 73 107 49 0.2478632 36.00000
## 2 2 37 22 47 70 117 53 0.1965812 41.77778
## 3 3 33 22 43 64 101 47 0.1962617 46.66667
“Ordinary” plot hides the frequency information:
ggplot(data=tmg@data,aes(x=percenttrees, y=ndvi)) +
geom_point()
but a 2D histogram reveals the relationship
gg30 <- ggplot(data=tmg@data,aes(x=percenttrees, y=ndvi)) +
geom_bin2d(bins=50) +
scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
geom_smooth(method='lm', col="black") +
ggtitle("30m resolution")
gg30
treesndvi30lm <- lm(ndvi ~ percenttrees,data=tmg@data)
summary(treesndvi30lm)
##
## Call:
## lm(formula = ndvi ~ percenttrees, data = tmg@data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20101 -0.03113 -0.00554 0.02588 0.35384
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.061e-02 4.851e-04 186.8 <2e-16 ***
## percenttrees 2.240e-03 2.064e-05 108.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04867 on 23026 degrees of freedom
## Multiple R-squared: 0.3384, Adjusted R-squared: 0.3384
## F-statistic: 1.178e+04 on 1 and 23026 DF, p-value: < 2.2e-16
The lm explains ~ 34% of the total variance on NDVI (i.e. Note that for percentree=0, NDVI has a certaing variability)
R2_30 <- summary(treesndvi30lm)$r.squared
We integrate the above steps in one function and apply it at increasing coarsening levels (90m, 150m, 210m)
source("mcgwirecoarsen.R")
The inputs are the tm to be coarsened, the observed layer of percent tree cover and the coarsening factor The ouptut is a Spatial Polygons Data Frame with a table with the average TM values, ndvi and observed percent trre cover
tmg90 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=3)
tmg150 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=5)
tmg210 <- mcgwirecoarsen(tmb=tm,trees=trees10,fcoars=7)
Check one of the outputs:
tmg90@data[1:3,]
## cell tm1 tm2 tm3 tm4 tm5 tm7 ndvi
## 1 1 33.44444 22.77778 42.22222 69.00000 103.55556 47.55556 0.2407592
## 2 2 31.55556 21.55556 40.66667 66.11111 97.55556 43.77778 0.2382934
## 3 3 29.77778 21.22222 37.88889 66.11111 94.11111 41.77778 0.2713675
## percenttrees
## 1 43.01235
## 2 41.72840
## 3 41.18519
Once we have the output data tables, we fit the linear models and plot:
gg90 <- ggplot(data=tmg90@data,aes(x=percenttrees, y=ndvi)) +
geom_bin2d(bins=50) +
scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
geom_smooth(method='lm', col="black") +
ggtitle("90m resolution")
treesndvi90lm <- lm(ndvi ~ percenttrees,data=tmg90@data)
R2_90 <- summary(treesndvi90lm)$r.squared
gg150 <- ggplot(data=tmg150@data,aes(x=percenttrees, y=ndvi)) +
geom_bin2d(bins=50) +
scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
geom_smooth(method='lm', col="black") +
ggtitle("150m resolution")
treesndvi150lm <- lm(ndvi ~ percenttrees,data=tmg150@data)
R2_150 <- summary(treesndvi150lm)$r.squared
gg210 <- ggplot(data=tmg210@data,aes(x=percenttrees, y=ndvi)) +
geom_bin2d(bins=50) +
scale_fill_gradientn(colours=c("grey70",matlab.like(9))) +
geom_smooth(method='lm', col="black") +
ggtitle("210m resolution")
treesndvi210lm <- lm(ndvi ~ percenttrees,data=tmg210@data)
R2_210 <- summary(treesndvi210lm)$r.squared
grid.arrange(gg30, gg90, gg150, gg210, nrow = 2)
rm(gg30, gg90, gg150, gg210)
We observe an increase in R2 with increasing coarsening, in particular up to 150m
ggplot(data=data.frame(x=c(30,90,150,210),y=c(R2_30, R2_90, R2_150, R2_210))) +
geom_line(aes(x,y)) +
geom_point(aes(x,y)) +
xlab("Resolution (m)") + ylab("R^2")
We select 150m as the best compromise between R2 and coarseness We calculate and save as tiff the TM bands and NDVI at 150 m resolution
tm150 <- aggregate(tmn,fact=5,fun=mean,na.rm=TRUE)
ndvi150 <- (tm150[[4]] - tm150[[3]])/(tm150[[4]] + tm150[[3]])
writeRaster(tm150,file="tm150",format="GTiff",overwrite=TRUE)
writeRaster(ndvi150,file="ndvi150",format="GTiff",overwrite=TRUE)
ggR(ndvi150, geom_raster=TRUE) +
scale_fill_gradientn(name = "NDVI", colours = c("white",matlab.like2(9))) +
ggtitle("NDVI at 150m resolution")
We calculate and save as tiff the observed layer of Percent tree cover at 150 m resolution
treesobsv150 <- aggregate(trees,fact=15,fun=mean,na.rm=TRUE)
writeRaster(treesobsv150,file="treesobsv150",format="GTiff",overwrite=TRUE)
ggobsv <- ggR(treesobsv150, geom_raster=TRUE) +
scale_fill_gradientn(name = "Tree Density", colours = c("white",matlab.like2(9))) +
ggtitle("Observed Tree Density at 150m resolution")
Using the linear model, we calculate and save as tiff the predicted layer of Percent tree cover at 150 m resolution
coef150 <- coefficients(treesndvi150lm)
treespred150 <- (ndvi150-coef150[1])/coef150[2]
writeRaster(treespred150,file="treespred150",format="GTiff",overwrite=TRUE)
ggpred <- ggR(treespred150, geom_raster=TRUE) +
scale_fill_gradientn(name = "Tree Density", colours = c("white",matlab.like2(9))) +
ggtitle("Predicted Tree Density at 150m resolution")
grid.arrange(ggobsv, ggpred, ncol=2)
And explore in QGIS
#