Case Study: Satellite Image Classification

Image segmentation for urban surfaces with random forest based on spectral signatures to build image classification model (random forest)

Define paths

# Load the R package
library(kwb.ml)

### define paths

path_list <- list(
  local_dir = getwd(),
  cloud_dir = "projects/keys/WP2_SUW_pollution_Jinxi/_DataAnalysis/gis",
  site = "Jinxi",
  data = "WP2_SUW_pollution_<site>",
  gis = "<local_dir>/<data>/_DataAnalysis/gis",
  pattern_image = 'input_image\\.img$',
  pattern_groundtruth = 'input_groundtruth\\.*',
  satellite_image = "<gis>/input_image.img",
  groundtruth = "<gis>/input_groundtruth.shp"
)

paths <- kwb.utils::resolve(path_list)

Image Files

This tutorial needs a one satellite input_image.img file which directly downloaded from the KWB Cloud.

Currently seven input files are required:

  • input_image.img (input: name of satellite image file)

  • <input_groundtruth>.shp|dbf|cpg|prj|qpj|shx (input: groundtruth data)

Cloud

You can download the required files from the KWB cloud if you are a registered user with access to the folder projects/keys/WP2_SUW_pollution_Jinxi/_DataAnalysis/gis

For doing so follow the steps below:

  1. Open RStudio and run usethis::edit_r_environ()

  2. In the opened window add the required environment variables

NEXTCLOUD_URL = "https://<replace-with-kwb-cloud-url>"
NEXTCLOUD_USER = "<your-kwb-cloud-username>" # your username
NEXTCLOUD_PASSWORD = "your-nextcloud-app-password" ### see details below

For creating <your-nextcloud-app-password>:

  1. Finally you need to restart Rstudio and proceed with the code below:

required_files <- paste0(c(paths$pattern_image,
                           paths$pattern_groundtruth),
                         collapse = "|")


# Download .cbc and .bhd and .dis files
ml_files <- kwb.nextcloud::list_files(
  paths$cloud_dir,
  full_info = TRUE) %>%
  dplyr::filter(stringr::str_detect(.data$file,
                                    pattern = required_files))

ml_files


if(!dir.exists(paths$gis)) fs::dir_create(paths$gis, recurse = TRUE)

kwb.nextcloud::download_files(href = ml_files$href,
                              target_dir = paths$gis)

fs::dir_ls(paths$gis)

paths$satellite_image
paths$groundtruth

Machine Learning

Input Data

Satellite Image

library(raster)
library(rgdal)

## Plot 'satellite image'

satellite_image <- raster::stack(paths$satellite_image)  
raster::plotRGB(satellite_image)

Classification Dataset

### Training dataset
groundtruth <- raster::shapefile(paths$groundtruth)

### number of pre-classified polygons for each categorie
table(groundtruth$cover)

### plot of pre-classified surfaces (used for model ML-training)
raster::plot(groundtruth, col = topo.colors(5))

Setup Model

Build a random forest machine learning model for classification of the following five urban surfaces: - roofs - streets - perivous areas - shadow (accounting for classification artifacts due to limited satellite image resolution) - water areas


# build classification model
kwb.ml::buildClassMod(
  rawdir = paths$gis,
  image = 'input_image.img',
  groundTruth = 'input_groundtruth.shp',
  groundTruthValues = list('roof' = 1, 
                           'street' = 2,
                           'pervious' = 3,
                           'shadow' = 4,
                           'water' = 5),
  spectrSigName = 'spectrSig.Rdata',
  modelName = 'rForest.Rdata',
  overlayExists = FALSE,
  nCores = parallel::detectCores() - 1,
  mtryGrd = 1:2, 
  ntreeGrd=seq(80, 150, by=10),
  nfolds = 3, 
  nodesize = 3, 
  cvrepeats = 2)

# check model performance
load(file.path(paths$gis,"rForest.Rdata"))
caret::confusionMatrix(data = model$finalModel$predicted, 
                       reference = model$trainingData$.outcome, 
                       mode = 'prec_recall')
                       
# classify image for roofs and streets
kwb.ml::predictSurfClass(rawdir = paths$gis,
                                    modelName = 'rForest.Rdata',
                                    image = 'input_image.img',
                                    predName = 'classified_image.img')