---
title: "Tutorial"
author: "Michael Rustler"
date: "`r Sys.time()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Tutorial}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
editor_options:
chunk_output_type: console
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
is_ghactions <- identical(Sys.getenv("CI"), "true")
### Functions that are used in this document but not shown to the user
write_json_and_inform <- function(x, filename, what, format) {
write_pretty_json(x, filename)
also_available(what, format = "in JSON format", filename)
}
write_pretty_json <- function(x, filename) {
jsonlite::write_json(x, path = filename, pretty = TRUE)
}
also_available <- function(what, format, filename) {
cat(paste0(
what,
" is also available ",
format,
" here: ",
"[https://kwb-r.github.io/wasserportal/", filename, "]",
"(../", filename, ")"
))
}
top_filter_datatable <- function(x, caption = NULL) {
DT::datatable(x, filter = "top", caption = caption)
}
rounded_percentage <- function(x, basis, digits = 2L) {
round(kwb.utils::percentage(x, basis), digits)
}
```
## Install the Package
Use the "remotes" package to install the package "wasserportal" directly from
KWB's GitHub site:
```{r eval = FALSE}
# install.packages("remotes")
remotes::install_github("kwb-r/wasserportal", upgrade = "never", force = TRUE)
```
## Overview on Monitoring Stations and Parameters
Get information on monitoring stations and parameters that are available on the Wasserportal:
```{r get_stations}
stations <- wasserportal::get_stations(type = c("list", "crosstable"))
str(stations, 2)
```
The data frame `stations$crosstable` informs about the parameters that are
measured at the different monitoring stations:
```{r datatable_stations_crosstable, echo = FALSE}
top_filter_datatable(
stations$crosstable,
"Data availabilty per monitoring station"
)
```
The parameter abbreviations that appear as column names in the above table have
the following meanings:
```{r get_parameters}
parameters <- wasserportal::get_overview_options()
str(parameters)
```
```{r input_ghactions0, echo = FALSE, results = 'asis', eval = is_ghactions}
write_json_and_inform(
x = stations$crosstable,
filename = "stations_crosstable.json",
what = "The table of data availabilty for each monitoring station"
)
```
## Provide Pipe Operator and Helper Functions
The code provided in the following requires the pipe operator `%>%` of the
"magrittr" package and some helper functions to be defined:
```{r define_helpers}
`%>%` <- magrittr::`%>%`
comma_separated <- function(x) {
paste(x, collapse = ", ")
}
to_plotly_title <- function(x) {
key_values <- paste(names(x), unname(unlist(x)), sep = ": ")
list(text = sprintf(
"%s
%s",
key_values[1L],
comma_separated(key_values[-1L])
))
}
ggplot2_date_value <- function(data, col) {
ggplot2::ggplot(data, mapping = ggplot2::aes(
x = Datum,
y = Messwert,
col = col
))
}
```
## Groundwater Level Data
The data frame `stations$overview_list$groundwater.level` gives general
information on the groundwater monitoring stations:
```{r datatable_stations_list_groundwater_level, echo = FALSE}
top_filter_datatable(stations$overview_list$groundwater.level)
```
### Master data
More information on the groundwater level stations (master data), such as the
coordinates of the wells, can be found if you follow the web link (URL) that is
given in column `stammdaten_link` of the above table. The "wasserportal" package
provides a function to retrieve information from these links:
```{r stations_gwl_table_master}
urls <- stations$overview_list$groundwater.level$stammdaten_link
stations_gwl_master <- wasserportal::get_wasserportal_masters_data(urls)
```
This is how the resulting table `stations_gwl_master` looks like:
```{r datatable_stations_gwl_master, echo = FALSE}
top_filter_datatable(stations_gwl_master)
```
```{r input_ghactions1, echo = FALSE, results = 'asis', eval = is_ghactions}
write_json_and_inform(
x = stations_gwl_master,
filename = "stations_gwl_master.json",
what = "The master data of groundwater level stations"
)
```
### Trend Classification
Groundwater level trend classification (provided by SenWeb) is visualized below.
#### 1. Trend Classification Histogram
```{r stations_gwl_trend}
gwl <- stations$overview_list$groundwater.level %>%
dplyr::mutate(Datum = as.Date(Datum, format = "%d.%m.%Y"))
text_low_levels <- c("extrem niedrig", "sehr niedrig", "niedrig")
text_high_levels <- c("hoch", "sehr hoch", "extrem hoch")
levels_ordered <- c(text_low_levels, "normal", text_high_levels, "keine")
gwl$Klassifikation <- forcats::fct_relevel(gwl$Klassifikation, levels_ordered)
gwl_classified_only <- gwl %>%
dplyr::filter(Klassifikation != "keine")
percental_share_low_levels <- rounded_percentage(
sum(gwl_classified_only$Klassifikation %in% text_low_levels),
basis = nrow(gwl_classified_only)
)
percental_share_high_levels <- rounded_percentage(
sum(gwl_classified_only$Klassifikation %in% text_high_levels),
basis = nrow(gwl_classified_only)
)
title_text <- sprintf(
"GW level classification (n = %d out of %d have 'classification' data)",
nrow(gwl_classified_only),
nrow(gwl)
)
g1 <- gwl_classified_only %>%
dplyr::count(Klassifikation, Grundwasserspannung) %>%
dplyr::mutate(percental_share = kwb.utils::percentage(n, nrow(gwl))) %>%
ggplot2::ggplot(ggplot2::aes(
x = Klassifikation,
y = percental_share,
fill = Grundwasserspannung
)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::labs(
title = title_text,
x = "Classification",
y = "Percental share (%)"
) +
ggplot2::theme_bw()
plotly::ggplotly(g1)
```
`r percental_share_low_levels`
percent of all considered
`r nrow(gwl_classified_only)`
groundwater level monitoring stations containing `classification` data
(out of `r nrow(gwl)` provided by SenWeb) indicate `below normal`
(`r comma_separated(text_low_levels)`)
groundwater levels. However, only
`r percental_share_low_levels`
percent are indicate `above normal`
(`r comma_separated(text_high_levels)`)
groundwater levels.
#### 2. Trend Classification Map
```{r stations_gwl_trend_spatially}
level_colors <- data.frame(
Klassifikation = levels_ordered,
classi_color = c(
"darkred",
"red",
"orange",
"green",
"lightblue",
"blue",
"darkblue",
"grey"
)
)
rechtswert <- "Rechtswert_UTM_33_N"
hochwert <- "Hochwert_UTM_33_N"
gwl_classified_only_with_coords <- gwl_classified_only %>%
dplyr::mutate(
Messstellennummer = as.character(Messstellennummer),
) %>%
dplyr::inner_join(
stations_gwl_master %>%
tibble::as_tibble() %>%
dplyr::select(dplyr::all_of(c("Nummer", rechtswert, hochwert))) %>%
dplyr::rename(Messstellennummer = "Nummer"),
by = "Messstellennummer"
) %>%
dplyr::left_join(
level_colors,
by = "Klassifikation"
) %>%
sf::st_as_sf(
coords = c(rechtswert, hochwert),
crs = 25833
) %>%
sf::st_transform(crs = 4326)
if(nrow(gwl_classified_only_with_coords) > 0) {
# Create a vector of labels for each row in gwl_classified_only_with_coords
labs <- wasserportal::columns_to_labels(
data = gwl_classified_only_with_coords,
columns = c(
"Messstellennummer",
"Grundwasserspannung",
"Klassifikation",
"Datum"
),
fmt = "
%s: %s
", sep = "" ) # Print Map gwlmap <- gwl_classified_only_with_coords %>% leaflet::leaflet() %>% leaflet::addTiles() %>% leaflet::addProviderTiles(leaflet::providers$CartoDB.Positron) %>% leaflet::addCircles( color = ~classi_color, label = lapply(labs, htmltools::HTML) ) %>% leaflet::addLegend( position = "topright", colors = level_colors$classi_color, labels = level_colors$Klassifikation, title = sprintf( "Classification (latest data: %s)", max(gwl_classified_only_with_coords$Datum) ) ) htmlwidgets::saveWidget( gwlmap, "./map_gwl-trend.html", title = "GW level trend" ) gwlmap } ``` ```{r input_ghactions_map, echo=FALSE, results='asis', eval=is_ghactions} also_available( what = "GW level trend plot", format = "on a full html page", filename = "map_gwl-trend.html" ) ``` ### Groundwater Levels: One Station The following code downloads and plots groundwater level data for one monitoring station: ```{r test_gwl_download_single, eval = FALSE} station_gwl <- stations$overview_list$groundwater.level[1L, ] gw_level <- wasserportal::read_wasserportal_raw_gw( station = station_gwl$Messstellennummer, stype = "gws" #, as_text = TRUE, dbg = TRUE ) %>% dplyr::mutate(Label = sprintf("%s (%s)", Parameter, Einheit)) head(gw_level) g <- gw_level %>% ggplot2_date_value(col = "Label") + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() plotly::ggplotly(g) %>% plotly::layout(title = to_plotly_title(station_gwl)) ``` ### Groundwater Levels: Multiple Stations The following code downloads and plots groundwater level data for multiple monitoring stations: ```{r test_gwl_download_multiple, eval = FALSE} gw_level_multi <- data.table::rbindlist(lapply( stations$overview_list$groundwater.level$Messstellennummer, function(id) { kwb.utils::catAndRun( sprintf("Downloading Messstellennummer == '%s'", id), wasserportal::read_wasserportal_raw_gw(station = id, stype = "gws"), dbg = FALSE ) } )) readr::write_csv(gw_level_multi, file = "groundwater_level.csv") # Plot 10 GW level selected_stations <- stations$overview_list$groundwater.level$Messstellennummer[1:10] g <- gw_level_multi %>% dplyr::filter(Messstellennummer %in% selected_stations) %>% dplyr::mutate(Messstellennummer = as.character(Messstellennummer)) %>% ggplot2_date_value(col = "Messstellennummer") + ggplot2::labs(title = "GW level (m above NN)") + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() plotly::ggplotly(g) ``` ```{r input_ghactions_gwl_download, echo = FALSE, results = 'asis', eval = is_ghactions} also_available( what = "The data of all GW level stations", format = "in CSV format", filename = "groundwater_level.csv" ) ``` ## Groundwater Quality Data Overview data of GW level stations can be requested as shown below: ```{r stations_gwq_table_overview} stations_gwq <- wasserportal::get_wasserportal_stations_table( type = parameters$groundwater$quality ) ``` ```{r datatable_stations_gwq, echo = FALSE} top_filter_datatable(stations_gwq) ``` Master data of groundwater quality stations can be requested as shown below: ```{r stations_gwq_table_master} stations_gwq_master <- wasserportal::get_wasserportal_masters_data( master_urls = stations_gwq$stammdaten_link ) ``` ```{r input_ghactions2, echo = FALSE, results = 'asis', eval = is_ghactions} write_json_and_inform( x = stations_gwq_master, filename = "stations_gwq_master.json", what = "The master data of groundwater quality stations" ) ``` ### Groundwater Quality: One Station The following code downloads and plots groundwater quality data for one monitoring station: ```{r test_gwq_download_single, eval = FALSE} station_gwq <- stations$overview_list$groundwater.quality[1L, ] gw_quality <- wasserportal::read_wasserportal_raw_gw( station = station_gwq$Messstellennummer, stype = "gwq" ) head(gw_quality) unique(gw_quality$Parameter) g <- gw_quality %>% dplyr::filter(Parameter == "Sulfat") %>% ggplot2_date_value(col = "Parameter") + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() plotly::ggplotly(g) %>% plotly::layout(title = to_plotly_title(station_gwq)) ``` ### Groundwater Quality: Multiple Stations The following code downloads and plots groundwater quality data for multiple monitoring stations: ```{r test_gwq_download_multiple, eval = FALSE} gw_quality_multi <- data.table::rbindlist(lapply( stations$overview_list$groundwater.quality$Messstellennummer, function(id) kwb.utils::catAndRun( sprintf("Downloading Messstellennummer == '%s'", id), wasserportal::read_wasserportal_raw_gw(station = id, stype = "gwq"), dbg = FALSE ) )) readr::write_csv(gw_quality_multi, "groundwater_quality.csv") # Plot 10 GW quality selected_stations <- stations$overview_list$groundwater.quality$Messstellennummer[1:10] g <- gw_quality_multi %>% dplyr::filter(Messstellennummer %in% selected_stations) %>% dplyr::mutate(Messstellennummer = as.character(Messstellennummer)) %>% dplyr::filter(Parameter == "Sulfat") %>% ggplot2_date_value(col = "Messstellennummer") + ggplot2::labs(title = "GW quality (Sulfat)") + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::theme_bw() plotly::ggplotly(g) ``` ```{r input_ghactions_gwq_download, echo = FALSE, results = 'asis', eval = is_ghactions} also_available( what = "The data of all GW quality stations", format = "in CSV format", filename = "groundwater_quality.csv" ) ```