--- title: "Inflow: Test Random Distributions" author: "Michael Rustler" date: "`r format(Sys.time(), '%d %B, %Y')`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Inflow: Test Random Distributions} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # 1 Installation ```{r eval = TRUE} if(FALSE) { Sys.setenv(GITHUB_PAT = "mysecret_access_token") install.packages("remotes", repos = "https://cloud.r-project.org") library(remotes) remotes::install_github("r-lib/remotes@18c7302637053faf21c5b025e1e9243962db1bdc") remotes::install_github("KWB-R/kwb.qmra") remotes::install_github("KWB-R/qmra.db") } ## Load packages library(kwb.qmra) library(qmra.db) library(ggplot2) library(tibble) library(sessioninfo) library(stringi) ``` # 2 Import data from "qmra.db" See: https://github.com/KWB-R/qmra.db/tree/master/inst/database ```{r} create_inflow_metadata <- function(csv_dir = system.file("database/qmra-db_accdb", package = "qmra.db")) { paths_list <- list( csv_dir = system.file("database/qmra-db_accdb", package = "qmra.db"), inflow = "/tbl_inflow.csv", pathogen = "/tbl_pathogen.csv", pathogen_group = "/tbl_pathogenGroup.csv", references = "/tbl_reference.csv", water_source = "/tbl_waterSource.csv" ) paths <- kwb.utils::resolve(paths_list) inflow <- readr::read_csv(file = paths$inflow) pathogen <- readr::read_csv(file = paths$pathogen) pathogen_group <- readr::read_csv(file = paths$pathogen_group) references <- readr::read_csv(file = paths$references) water_source <- readr::read_csv(file = paths$water_source) inflow_metadata <- inflow %>% dplyr::left_join(pathogen) %>% dplyr::left_join(pathogen_group) %>% dplyr::left_join(water_source) %>% dplyr::left_join(references) %>% dplyr::arrange(.data$WaterSourceName, .data$PathogenGroup, .data$PathogenName, .data$ReferenceName) %>% dplyr::mutate(row_id = 1:dplyr::n()) } inflow_metadata <- create_inflow_metadata() head(inflow_metadata) ``` # 3 Test ## 3.1 Define Checking Function ```{r} check_inflow_distribution <- function(inflow_metadata, ### for valid "type" parameter values ### see: https://kwb-r.github.io/kwb.qmra/reference/create_random_distribution.html type = "log10_norm", number_of_repeatings = 1000, number_of_events = 365, dbg = TRUE) { metadata <- inflow_metadata %>% dplyr::mutate(row_id = 1:dplyr::n()) %>% dplyr::mutate(distribution_type = type) percentiles_list <- lapply(seq_len(nrow(inflow_metadata)), FUN = function(i) { sel_dat <- inflow_metadata[i, ] print(sprintf("Water source: %s, Pathogen: %s, Reference: %s, Min: %f, Max: %f", stringi::stri_enc_toutf8(sel_dat$WaterSourceName), stringi::stri_enc_toutf8(sel_dat$PathogenName), stringi::stri_enc_toutf8(sel_dat$ReferenceName), sel_dat$min, sel_dat$max)) inflow <- kwb.qmra::create_random_distribution(type = type, number_of_repeatings = number_of_repeatings, number_of_events = number_of_events, min = sel_dat$min, max = sel_dat$max, debug = dbg) ## Go ahead even if errors occur try(tibble::enframe(quantile(inflow$events$values, probs = seq(0, 1, by = 0.01)))) }) dplyr::bind_rows(percentiles_list, .id = "row_id") %>% dplyr::rename(percentile = name) %>% dplyr::mutate(percentile = as.numeric(stringr::str_remove(.data$percentile, "%")), distribution_type = type) } ``` ## 3.2 Define Plot Function ```{r} plot_percentiles <- function(inflow_stats, inflow_metadata) { n <- unique(inflow_stats$row_id) for (i in seq_along(n)) { sel_inflow_stats <- inflow_stats[inflow_stats$row_id == i,] metadata <- inflow_metadata[inflow_metadata$row_id == i, ] gg <- ggplot(sel_inflow_stats, aes_string(x = "percentile", y = "value", col = "distribution_type")) + ggplot2::scale_y_log10() + labs(title = sprintf("%s (water source: %s):\nmin = %f, max = %f", stringi::stri_enc_toutf8(metadata$PathogenName), stringi::stri_enc_toutf8(metadata$WaterSourceName), metadata$min, metadata$max), subtitle = sprintf("Reference: [%s](%s)", stringi::stri_enc_toutf8(metadata$ReferenceName), stringi::stri_enc_toutf8(metadata$ReferenceLink)), x = "Percentile", y = "Pathogens Per Litre") + geom_line() + theme_bw() print(gg) } } ``` ## 3.4 Run Checking Function And Plot ```{r eval = TRUE} inflow_stats_list <- lapply(c("log10_norm", "log10_uniform", "lognorm", "norm", "uniform", "triangle"), function(type) { check_inflow_distribution(inflow_metadata, type = type) }) inflow_stats_list_df <- dplyr::bind_rows(inflow_stats_list) plot_percentiles(inflow_stats_list_df, inflow_metadata) ``` ## 3.5 Export Distribution Plots to PDF The plots shown above are also available in a PDF document which can be downloaded by clicking on the link: **[inflow_test_random_distributions.pdf](../inflow_test_random_distributions.pdf)** ```{r eval = TRUE} pdff <- kwb.utils::preparePdf("inflow_test_random_distributions.pdf") plot_percentiles(inflow_stats_list_df, inflow_metadata) dev.off() #kwb.utils::finishAndShowPdf(pdff) ``` # 4 Session Info For reproducibility ```{r echo = TRUE} sessioninfo::session_info() ```