Comparing GPS data to High-resolution DEM data of different sources in Deception Is.

knitr::opts_chunk$set(message = FALSE, warning = FALSE, 
                      fig.caption = TRUE, eval=TRUE)

require(raster)
require(rgdal)
require(ggplot2)
require(gridExtra)
require(RStoolbox)
require(reshape2)
require(lubridate)
require(broom)
require(tmap)

# require(plotly)
# require(scales)
# require(MASS)
# require(lmodel2)
# require(ggspatial)

dirdem1  <- "../GEODATA/DEMetc/fromSIMAC"
dirdem2  <- "../GEODATA/DEMetc/TanDEMX"
dirdem3  <- "../GEODATA/DEMetc/REMA"
dirtopo <- "../GEODATA/TOPO"
dirgps <- "../20180218Campaign/SimonGCP/GPS_measurements_SNOW_GLACIERS/SGGPS32720"
dirwtile <- "../GEODATA/DEMetc/TanDEMX/TanDEMXWrongTile_epsg32720_buf30"

1. Input data and preliminary operations

  • GPS data provided by simon.gascoin@cesbio.cnes.fr
    • see SGGPS2shp_log.R for converting original files to shp
  • SIMAC DEM (Ref. [1])
    • Original layer SIMAC/inf_vector/GRID/g2504028001 in SIMAC
    • See simacdem_Hmasl2HWGS84_log.R for changes in vertical reference:
    • simHwv1: Conversion to WGS84 vertical ellipsoid (assuming original masl == EGM96)
    • simHwv2: Conversion to WGS84 vertical ellipsoid (assuming original masl == GeoiDEC14 (Jigena et.al 2016. Antarctic Science 28(4), 277–292 (2016) (http://goo.gl/ZbxRZS)
  • REMA DEM (Ref. [2])
    • Original tile 45_04_8m_dem.tif
    • rema: Clipped and reprojected to epsg 32720 in QGIS
  • TanDEM-X (Ref. [3])
    • Original data TDM1_DEM__04_S64W062_DEM.tif and TDM1_DEM__04_S63W062_DEM.tif
    • tandemxNA: Clipped and mosaicked in QGIS. Region of wrong tile set to NA
  • Topographic map: Centro Geografico del Ejercito (Spain)

1.1 Read GPS positions, DEMs and Topo map

sgps <- readOGR(dsn=dirgps,layer="SGGPS32720",stringsAsFactors = FALSE,verbose=FALSE)
tandemxNA <- raster(file.path(dirdem2,"DecepAOI1_TDM1_DEMNA_epsg32720.tif"))

tandemxNA[tandemxNA==-9999] <- NA
#see tandemxNA_log.R (tandemx data with wrong tile masked out)
simHwv1 <- raster(file.path(dirdem1,"simacdemHWGS84vEGM96_epsg32720.tif"))
simHwv2 <- raster(file.path(dirdem1,"simacdemHWGS84vGeoiDEC14_epsg32720.tif"))
rema <- raster(file.path(dirdem3,"REMA_Decep_epsg32720.tif"))
topo <- brick(file.path(dirtopo,"2005_IslaDecepcion_TOPO.jpg"))
projection(topo) <- CRS("+init=epsg:32720")

1.2 Extract DEM values at GPS positions

Verify projections and extract DEM values at each GPS position:

projection(sgps)
projection(tandemx)
projection(simHwv1)
projection(simHwv2)
projection(rema)
projection(tandemxNA)
sgpsDEMsimac <- extract(stack(simHwv1,simHwv2),sgps,df=TRUE)
sgpsDEMrema <- extract(rema,sgps,df=TRUE)
sgpsDEMtandemx <- extract(tandemxNA,sgps,df=TRUE)
delme <- data.frame(sgpsDEMsimac$ID,coordinates(sgps),sgpsDEMsimac[,-1],sgpsDEMrema[,-1],sgpsDEMtandemx[,-1])
names(delme) <- c("ID","UTMX","UTMY","simHwv1","simHwv2","rema","tandemx")
#head(delme,3)

1.3 Include DEM values in GPS data table

sgpsDEM <- sgps
sgpsDEM@data <- data.frame(sgps@data,delme)
names(sgpsDEM)[3] <- "GPS_Height"
sgpsDEM@data$DateTime <- ymd_hms(sgpsDEM@data$DateTime)

Resulting dataset (show few rows)

knitr::kable(head(sgpsDEM@data[-(6:12)],3))
Date DateTime GPS_Height Q ns ratio ID UTMX UTMY simHwv1 simHwv2 rema tandemx
2018-02-24 2018-02-24 12:54:00 27.1269 1 8 12.6 1 617791.9 3014852 29.19249 28.03763 18.02536 21.72486
2018-02-24 2018-02-24 12:54:01 27.1317 1 8 12.0 2 617791.9 3014852 29.19249 28.03763 18.02536 21.72486
2018-02-24 2018-02-24 12:54:02 27.1321 1 8 11.8 3 617791.9 3014852 29.19249 28.03763 18.02536 21.72486

1.4 Tidy data

Arrange as “Long format” and discard any value <= 0

dfl <- melt(sgpsDEM@data,id.vars = c(1:5,13:16),measure.vars=17:20)
#head(dfl)
#dim(dfl)
names(dfl)[10:11] <- c("Source","DEM_Height")

Show few rows of dataset in “long” format:

knitr::kable(head(dfl,3))
Date DateTime GPS_Height Q ns ratio ID UTMX UTMY Source DEM_Height
2018-02-24 2018-02-24 12:54:00 27.1269 1 8 12.6 1 617791.9 3014852 simHwv1 29.19249
2018-02-24 2018-02-24 12:54:01 27.1317 1 8 12.0 2 617791.9 3014852 simHwv1 29.19249
2018-02-24 2018-02-24 12:54:02 27.1321 1 8 11.8 3 617791.9 3014852 simHwv1 29.19249

Note 1 GPS value is <0:

knitr::kable(dfl[dfl$GPS_Height<=0,])
Date DateTime GPS_Height Q ns ratio ID UTMX UTMY Source DEM_Height
98337 2018-03-10 2018-03-10 16:21:32 -21.4695 2 5 1 98337 617765.4 3014861 simHwv1 27.16510
196689 2018-03-10 2018-03-10 16:21:32 -21.4695 2 5 1 98337 617765.4 3014861 simHwv2 26.01107
295041 2018-03-10 2018-03-10 16:21:32 -21.4695 2 5 1 98337 617765.4 3014861 rema 15.22816
393393 2018-03-10 2018-03-10 16:21:32 -21.4695 2 5 1 98337 617765.4 3014861 tandemx 20.83290
dfl$GPS_Height[dfl$GPS_Height<=0] <- NA

and quite a number of DEM values are <= 0:

nrow(dfl[dfl$DEM_Height<=0,])
## [1] 22051
dfl$DEM_Height[dfl$DEM_Height<=0] <- NA

2. Comparison of DEM values to GPS height data

2.1 Lineal models of DEM vs. GPS height values

2.1.1 By DEM source, all trips considered

lmstats <- NULL
for (s in unique(dfl$Source)){
    delme <- lm(DEM_Height ~ GPS_Height, data=dfl[dfl$Source==s,])
    lmstats <- rbind(lmstats,data.frame(tidy(delme)[1,c(2,5)],tidy(delme)[2,c(2,5)],glance(delme)[c(1,5)]))
}
lmstats <- data.frame(unique(dfl$Source),lmstats)
colnames(lmstats) <- c("DEM","Intercept", "p.int", "Slope", "p.slope", "R2", "p.R2")
#print.data.frame(data.frame(DEM=lmstats$DEM,round(lmstats[,-1],3)))
knitr::kable(lmstats,digits=3)
DEM Intercept p.int Slope p.slope R2 p.R2
simHwv1 -2.691 0 1.005 0 0.991 0
simHwv2 -3.821 0 1.005 0 0.991 0
rema -13.813 0 1.050 0 0.964 0
tandemx -3.528 0 0.999 0 0.993 0
p <- ggplot(data=dfl) +
    geom_point(aes(x=GPS_Height,y=DEM_Height,color=Date),size=1) +
    geom_smooth(aes(x=GPS_Height,y=DEM_Height),method=lm,color="black",size=0.5) +
    geom_abline(aes(intercept=0, slope=1),linetype="dotdash", size=0.5) +
    xlab("GPS Height (m)") + ylab("DEM (m)") +
    scale_color_brewer(palette="Dark2")
p + facet_wrap(~Source, ncol=2) + 
    coord_fixed(ratio=1) +
    guides(color = guide_legend(override.aes = list(size=3))) +
    labs(title="Scatterplots and linear regressions of DEM values vs. GPS heights")

ggplot(data=dfl) +
    geom_histogram(aes(x=DEM_Height-GPS_Height),
                   fill="lightblue",binwidth=8,color="black",show.legend=FALSE) +
    xlab("DEM Height - GPS Height (m)") +
    facet_wrap(~Source, ncol=2, scales="fixed") +
    ggtitle("Histograms of the DEM - GPS height differences")

2.1.2 By DEM source and trip

lmstats2 <- tripdem <- NULL
dfl <- na.omit(dfl)
for (v in unique(dfl$Date)){
    sel <- dfl[dfl$Date==v,]
    for (s in unique(sel$Source)){
        delme <- lm(DEM_Height ~ GPS_Height, data=sel[sel$Source==s,])
        lmstats2 <- rbind(lmstats2,data.frame(tidy(delme)[1,c(2,5)],tidy(delme)[2,c(2,5)],glance(delme)[c(1,5)]))
    }
    tripdem <- rbind(tripdem,data.frame(v,unique(sel$Source)))
}
lmstats2 <- data.frame(tripdem,lmstats2)
colnames(lmstats2) <- c("Trip", "DEM","Intercept", "p.int", "Slope", "p.slope", "R2", "p.R2")
knitr::kable(lmstats2,digits=3)
Trip DEM Intercept p.int Slope p.slope R2 p.R2
2018-02-24 simHwv1 -0.103 0.097 1.013 0 0.996 0
2018-02-24 simHwv2 -1.216 0.000 1.013 0 0.996 0
2018-02-24 rema -11.110 0.000 1.040 0 0.987 0
2018-02-24 tandemx -4.423 0.000 0.999 0 0.994 0
2018-02-25 simHwv1 2.231 0.000 0.996 0 0.999 0
2018-02-25 simHwv2 1.101 0.000 0.996 0 0.999 0
2018-02-25 rema -14.573 0.000 1.131 0 0.842 0
2018-02-25 tandemx -4.482 0.000 0.992 0 0.999 0
2018-02-26 simHwv1 9.358 0.000 0.724 0 0.630 0
2018-02-26 simHwv2 8.190 0.000 0.724 0 0.630 0
2018-02-26 rema 0.132 0.729 0.697 0 0.595 0
2018-02-26 tandemx 12.315 0.000 0.650 0 0.506 0
2018-02-27 simHwv1 -13.857 0.000 1.032 0 0.949 0
2018-02-27 simHwv2 -15.093 0.000 1.032 0 0.949 0
2018-02-27 rema -8.816 0.000 1.013 0 0.990 0
2018-02-27 tandemx 3.333 0.000 0.996 0 0.991 0
2018-03-03 simHwv1 0.071 0.388 1.007 0 0.998 0
2018-03-03 simHwv2 -1.050 0.000 1.007 0 0.998 0
2018-03-03 rema -10.592 0.000 1.004 0 0.998 0
2018-03-03 tandemx -4.780 0.000 1.000 0 0.999 0
2018-03-07 simHwv1 0.692 0.000 0.998 0 0.999 0
2018-03-07 simHwv2 -0.694 0.000 0.999 0 0.999 0
2018-03-07 rema -10.046 0.000 1.002 0 1.000 0
2018-03-10 simHwv1 0.723 0.000 0.977 0 0.963 0
2018-03-10 simHwv2 -0.389 0.007 0.977 0 0.963 0
2018-03-10 rema -9.940 0.000 0.997 0 0.996 0
2018-03-10 tandemx -3.348 0.000 1.025 0 0.989 0

Note the lower R2 values and slopes departing from 1 in trip 20180226

p + theme(legend.position="none") +
    coord_fixed(ratio=1) +
    facet_grid(Date~Source) +
    labs(title="Scatterplots of DEM values vs. GPS heights by trip and DEM")

  • Some anomalies appear in one particular DEM only (e.g. REMA on 20180225), which indicates a problem with that particular DEM
  • Some other anomalies are consistent across DEMs, which indicates a problem with some GPS readings (e.g. horizontal segment on 20180226 and horizonatal peak on 20180227)

2.2 Differences DEM - GPS by trip

2.2.1 Scatterplots of DEM values vs. GPS height by trip

Note differences in dispersion among trips

ggplot(data=dfl) +
    geom_point(aes(x=GPS_Height,y=DEM_Height,color=Source),size=1,alpha=0.5) +
    #geom_smooth(aes(x=GPS_Height,y=DEM_Height),method=lm,color="black",size=0.5) +
    geom_abline(aes(intercept=0, slope=1),linetype="dotdash", size=0.5) +
    scale_color_brewer(palette="Set1") +
    xlab("GPS Height (m)") + ylab("DEM (m)") +
    facet_wrap(~Date, scales="free")+
    guides(color = guide_legend(override.aes = list(size=3))) +
    labs(title="Scatterplots of DEM values vs. GPS heights by trip")

2.2.2 Display Trips on topo map for reference

a <- ggspatial::df_spatial(sgps)
p <- ggRGB(topo,r=1,g=2,b=3,ggObj=TRUE)
p <- p + geom_point(data=a,aes(x,y,color=Date),size=1) + 
    xlab("UTMX") + ylab("UTMY") +
    scale_color_brewer(palette="Dark2") +
    guides(color = guide_legend(override.aes = list(size=3))) +
    ggtitle ("Simon Gascoin GPS Trips", subtitle= "(ICTJA-CSIC campaign 2018)")
p

2.2.3 Geographic display of differences as bubble plot

difs <- sgpsDEM@data[,17:20] - sgpsDEM@data$GPS_Height
names(difs) <- c("DsimHwv1", "DsimHwv2", "Drema", "Dtandemx")
sgpsDEM@data <- data.frame(sgpsDEM@data,difs)

Save as shapefile for ulterior exploration

writeOGR(sgpsDEM,dsn="./sgpsDEM",layer="sgpsDEM", driver="ESRI Shapefile",
         overwrite=TRUE)

Note zones of anomalies.This spatial distribution deserves further attention in interactive display

# #+ fig.height=16, fig.width=18,  fig.show='hold'
# #spplot(sgpsDEM,21:22)
# b1 <-bubble(sgpsDEM[!is.na(sgpsDEM@data$DsimHwv2),],zcol="DsimHwv1",
#             main="SIMAC DEM (ellipsoidal v1)")
# b2 <- bubble(sgpsDEM[!is.na(sgpsDEM@data$DsimHwv2),],zcol="DsimHwv2",
#              main="SIMAC DEM (ellipsoidal v2)")
# b3 <- bubble(sgpsDEM[!is.na(sgpsDEM@data$Drema),],zcol="Drema",
#              main = "REMA DEM")
# b4 <- bubble(sgpsDEM[!is.na(sgpsDEM@data$Dtandemx),],zcol="Dtandemx",
#              main="TanDEMX DEM")
# #grid.arrange(p,b2,b3,b4)
# grid.arrange(b1,b2,b3,b4,top="Differences to GPS readings (m)")
#rm(a,p,topo)
#Much better:
dfl$Dif <- dfl$DEM_Height - dfl$GPS_Height
dfl$sign <- ifelse(dfl$Dif<0,"<=0",">0")
dfl$absDif <- abs(dfl$Dif)
coordinates(dfl) <- ~UTMX+UTMY
projection(dfl) <- CRS("+init=epsg:32720")
writeOGR(dfl,dsn="GPSvsDEMsLF",layer="GPSvsDEMsLF",driver="ESRI Shapefile",overwrite=TRUE)
topocoarse <- aggregate(topo,fact=10)
tm_shape(topocoarse) +
    tm_rgb() +
    tm_shape(dfl) +
    tm_bubbles(size="absDif",style="fixed",col="sign",
               palette="-RdYlBu",border.alpha =0) +
    tm_facets(by="Source",ncol=2, free.coords=FALSE) +
    tm_layout(main.title="DEM values - GPS height values",
              legend.outside=TRUE, legend.outside.position="bottom",
              legend.stack="horizontal", outer.margins=0,legend.outside.size = .2)

3.Conclusions

  • SIMAC and TanDEM-X show very strong linear relations to GPS readings, with slopes very close to 1 R2 > 0.99 and offsets < 4 m below the GPS readings.
  • REMA vs. GPS relationship is also linear but deteriorated by a large discrepancy in the area of. Mt. Irizar, which should be investigated (difference in snow pack?). Otherwise, REMA looks the most linear. To note its much larger offset: REMA is 13.8 m below GPS readings.
  • Few DEM vs. GPS discrepancies are consistent among all DEMs and spatially contiguous, indicating problems with some GPS data (post-processing issues?)
  • In general, a comprehensive spatial (and interactive) analysis of the DEM - GPS discrepancies should be undertaken

4. References

  • [1] SIMAC
    • Torrecillas, Cristina, Manuel Berrocoso, and Alicia García-García. 2006. “The Multidisciplinary Scientific Information Support System (SIMAC) for Deception Island.” In Antarctica, edited by Dieter Karl Fütterer, Detlef Damaske, Georg Kleinschmidt, Hubert Miller, and Franz Tessensohn, 397–402. Berlin/Heidelberg: Springer-Verlag. https://doi.org/10.1007/3-540-32934-X_50.
  • [2] REMA
    • DEMs provided by the Byrd Polar and Climate Research Center and the Polar Geospatial Center under NSF-OPP awards 1543501, 1810976, 1542736, 1559691, 1043681, 1541332, 0753663, 1548562, 1238993 and NASA award NNX10AN61G. Computer time provided through a Blue Waters Innovation Initiative. DEMs produced using data from DigitalGlobe, Inc.
    • ’Howat, Ian; Morin, Paul; Porter, Claire; Noh, Myong-Jong, 2018, “The Reference Elevation Model of Antarctica”, https://doi.org/10.7910/DVN/SAIK8B, Harvard Dataverse, V1
  • [3] TanDEMX