Tutorial

Installation

### Optionally: specify GitHub Personal Access Token (GITHUB_PAT)
### See here why this might be important for you:
### https://kwb-r.github.io/kwb.pkgbuild/articles/install.html#set-your-github_pat

# Sys.setenv(GITHUB_PAT = "mysecret_access_token")

# Install package "remotes" from CRAN
if (! require("remotes")) {
  install.packages("remotes", repos = "https://cloud.r-project.org")
}

# Install KWB package 'kwb.heatsine' from GitHub
remotes::install_github("KWB-R/kwb.heatsine")

Data prerequisites

The sinus fit optimisation works for daily time series of a surface water and a groundwater monitoring point, which follow the following requirements:

File naming

The time series must be provided in a .csv (comma separated value) file, which follows the naming convention:

temperature_<type>_<monitoring_id>.csv

where <type> needs to be exactly groundwater or surface-water. In case any other value for <type> is chosen the program will not work (e.g SURFACE WATER). The value of the <monitoring_id> will be used for labeling the monitoring points in the plots and can be almost any value but must not contain _ (underscore) or " " (space).

Data structure

The data structure of each .csv file is very simple as it contains only the two columns date (formatted: YYYY-MM-DD, e.g. 2020-06-23) and value (temperature data in degree Celsius) as shown exemplary below:

date,value
2020-06-23,12.3
2020-06-24,12.3

Please make sure that both columns date and value are all lowercase and correctly spelled as the program will not work if this is not the case.

Data preparation

Import

The R package includes the following temporal times series data for testing:

list.files(kwb.heatsine::extdata_file())
#> [1] "temperature_groundwater_Txxbxxxx6.csv"         
#> [2] "temperature_groundwater_Txxhxxx10.csv"         
#> [3] "temperature_groundwater_Txxsxxx20.csv"         
#> [4] "temperature_groundwater_Txxwxxx13.csv"         
#> [5] "temperature_groundwater_Txxxx3.csv"            
#> [6] "temperature_surface-water_Txxsxx-mxxxxsxxx.csv"

In this the example the files temperature_surface-water_Txxsxx-mxxxxsxxx.csv and temperature_groundwater_Txxxx3.csv are used for testing the functionality and imported by using the code below:

## Load the R package
library(kwb.heatsine)

load_temp <- function(base_name) {
  kwb.heatsine::load_temperature_from_csv(
    kwb.heatsine::extdata_file(base_name)
  )
}

data_sw <- load_temp("temperature_surface-water_Txxsxx-mxxxxsxxx.csv")
data_gw <- load_temp("temperature_groundwater_Txxxx3.csv")

Visualise

Plot time series for whole time period (moveover on data points to select)

Surface water

kwb.heatsine::plot_temperature_interactive(data_sw)

Groundwater

kwb.heatsine::plot_temperature_interactive(data_gw)

Select

Reduce time period to a full sinus period which is definde by the user input (date_end is optional, if not given it is set to ‘date_start’ + 365.25 days).

data_sw_selected <- kwb.heatsine::select_timeperiod(
  data_sw, 
  date_start = "2015-10-10",
  date_end = "2016-10-14"
)

data_gw_selected <- kwb.heatsine::select_timeperiod(
  data_gw,
  date_start = "2015-12-28",
  date_end = "2016-12-26"
)

Plot selected datasets:

Surface Water (selected)

kwb.heatsine::plot_temperature_interactive(df = data_sw_selected)

Grundwater (selected)

kwb.heatsine::plot_temperature_interactive(df = data_gw_selected)

Sinus optimisation

Background

According to Cross Validated (2013) the sinus fit can not only be written as y = y0 + amplitude * sin(x + phase_shift), but also in a linear form y = y0 + alpha * sin(x) + beta * cos(x). The latter approach is used within this R package as it enables the calculation of both amplitude (a) and phase_shift (x0) by using R’s build-in stats::lm() function as exemplary shown below:


### Generate synthetic temperature data following sine curve


period_length <- 365 # assumed period length
day_number <- seq_len(period_length) # daily sequence from 1 to period_length
temperature <- 26.9188 + 2.123641 * sin(2 * pi * day_number / period_length + 0.6049163)

x <- 2 * pi * day_number / period_length

### Print first data points:
tibble::tibble(day_number = day_number, temperature = temperature, x = x)
#> # A tibble: 365 × 3
#>    day_number temperature      x
#>         <int>       <dbl>  <dbl>
#>  1          1        28.2 0.0172
#>  2          2        28.2 0.0344
#>  3          3        28.2 0.0516
#>  4          4        28.2 0.0689
#>  5          5        28.3 0.0861
#>  6          6        28.3 0.103 
#>  7          7        28.3 0.120 
#>  8          8        28.4 0.138 
#>  9          9        28.4 0.155 
#> 10         10        28.4 0.172 
#> # ℹ 355 more rows

### Now check the if both formulas yield (almost) equal results

fit.lm2 <- lm(temperature ~ sin(x) + cos(x))

coeffs <- coef(fit.lm2)

y0 <- coeffs[1L] # y0
alpha <- coeffs[2L] # alpha = sin(phi)
beta <- coeffs[3L]  # beta = cos(phi)
a <- sqrt(alpha ^ 2 + beta ^ 2) # amplitude
x0 <- atan2(beta, alpha) # phase t

### Print parameters of sinus fit
tibble::tibble(
  y0 = y0,
  alpha = alpha,
  beta = beta,
  a = a,
  x0 = x0
)
#> # A tibble: 1 × 5
#>      y0 alpha  beta     a    x0
#>   <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1  26.9  1.75  1.21  2.12 0.605

x <- 2 * pi * day_number / period_length

par(mfrow = c(1, 2))

### Formula option 1: y = y0 + amplitude * sin(x + phase)
plot(
  x = day_number,
  y = y0 + a * sin(x + x0),
  type = "l",
  lty = 1,
  lwd = 3,
  col = "gray",
  main = "Overplotted Graphs",
  xlab = "day_number",
  ylab = "temperature"
)

### Formula option 2: y = y0 + alpha * sin(x) + beta * cos(x)

lines(
  x = day_number,
  y = y0 + alpha * sin(x) + beta * cos(x),
  lwd = 2,
  lty = 2,
  col = "red",
)

### Check numeric differences between both approaches
plot(
  x = day_number,
  y = y0 + a * sin(x + x0) - (y0 + alpha * sin(x) + beta * cos(x)),
  lwd = 3,
  col = "gray",
  main = "Difference",
  xlab = "day_number",
  ylab = "temperature"
)

As shown above the differences between both sinus calculation approaches are mostly zero but sometimes show a minimal offset (which is close to R’s floating point accuracy of 10^-15.6535598 , i.e. 2.220446^{-16} for the machine the calculation was carried out). As a consequence the second approach can and will be used for sinus fitting within this R package.

Run

Sinus fit operation is performed by minimizing the RMSE error between observed and simulated temperature curves.

limits <- c(100, 500) # minimum/maximum period length
tolerance <- 0.001 # the desired accuracy ()
debug <- TRUE

# Helper function to do the sinus optimisation
do_sinus_optimisation <- function(temp_df) {
  kwb.heatsine::optimise_sinus_variablePeriod(
    temp_df = temp_df,
    opt_limits = limits,
    opt_tolerance = tolerance,
    opt_debug = debug
  )
}

sinusfit_sw <- do_sinus_optimisation(temp_df = data_sw_selected)
#> Ran opt_func with period length 252.79 days: 6.27 'RMSE' for Temp
#> Ran opt_func with period length 347.21 days: 1.89 'RMSE' for Temp
#> Ran opt_func with period length 405.57 days: 1.78 'RMSE' for Temp
#> Ran opt_func with period length 379.63 days: 1.54 'RMSE' for Temp
#> Ran opt_func with period length 379.14 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 366.94 days: 1.55 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 376.33 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 375.25 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 374.59 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 374.18 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.92 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.77 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.67 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.61 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.57 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.55 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.54 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.53 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.52 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
#> Ran opt_func with period length 373.51 days: 1.53 'RMSE' for Temp
sinusfit_gw <- do_sinus_optimisation(temp_df = data_gw_selected)
#> Ran opt_func with period length 252.79 days: 2.21 'RMSE' for Temp
#> Ran opt_func with period length 347.21 days: 0.47 'RMSE' for Temp
#> Ran opt_func with period length 405.57 days: 0.52 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.86 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.43 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.27 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.16 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.10 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.06 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.04 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.02 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.01 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.01 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp
#> Ran opt_func with period length 373.00 days: 0.35 'RMSE' for Temp

# Generate data frame with simulated and observed values
predictions <- kwb.heatsine::get_predictions(
  sinusfit_sw = sinusfit_sw, 
  sinusfit_gw = sinusfit_gw, 
  retardation_factor = 1.8
)

Analyse

Interactive Plot

kwb.heatsine::plot_prediction_interactive(predictions)

Table: Traveltimes

knitr::kable(predictions$traveltimes)
point_type surface-water (Txxsxx-mxxxxsxxx) groundwater (Txxxx3) traveltime_thermal_days retardation_factor traveltime_hydraulic_days
turning-point_1 2015-10-21 2016-01-11 82.12867 1.8 45.62704
min 2016-01-23 2016-04-14 82.00000 1.8 45.55556
turning-point_2 2016-04-25 2016-07-16 81.87133 1.8 45.48407
max 2016-07-28 2016-10-18 82.00000 1.8 45.55556
turning-point_3 2016-10-29 2017-01-19 81.87133 1.8 45.48407

Table: Goodness of Fit

While only RMSE is used for optimising the sinus fit, a number of goodness-of-fit parameters is also calculated and shown in the table below. For details on each of these parameters the use is referred to the function hydroGOF::gof() which is used for calculating all these values.

knitr::kable(predictions$gof)
type ME MAE MSE RMSE ubRMSE NRMSE % PBIAS % RSR rSD NSE mNSE rNSE wNSE wsNSE d dr md rd cp r R2 bR2 VE KGE KGElf KGEnp KGEkm
surface-water 0 1.24 2.33 1.53 1.53 20.1 0 0.20 0.98 0.96 0.82 -0.17 0.95 0.83 0.99 0.91 0.91 0.7 -10.49 0.98 0.96 0.95 0.90 0.97 0.62 0.95 0.98
groundwater 0 0.24 0.12 0.35 0.35 12.5 0 0.13 0.99 0.98 0.90 0.99 0.98 0.89 1.00 0.95 0.95 1.0 -23.85 0.99 0.98 0.98 0.98 0.99 0.98 0.99 0.99

Table: Optimisation Parameters

knitr::kable(predictions$paras)
type period_length alpha beta y0 a x0
surface-water 373.5143 -10.278478 2.0304625 12.72494 10.477113 2.946559
groundwater 372.9996 -3.742912 0.9567905 10.73679 3.863268 2.891325

Table: Data

# only first entries
knitr::kable(head(predictions$data))
type monitoring_id label date observed simulated residuals simulated_pi_lower simulated_pi_upper
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-10 13.50958 14.75541 -1.245822 11.72751 17.78331
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-11 12.88167 14.58222 -1.700557 11.55433 17.61012
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-12 12.17833 14.40852 -2.230183 11.38062 17.43642
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-13 11.68292 14.23433 -2.551416 11.20643 17.26223
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-14 11.23500 14.05972 -2.824722 11.03182 17.08762
surface-water Txxsxx-mxxxxsxxx surface-water (Txxsxx-mxxxxsxxx) 2015-10-15 11.14667 13.88473 -2.738067 10.85684 16.91263

Export

Export results to csv:

write_to_csv <- function(df, file_base) {
  readr::write_csv(df, path = file.path(
    kwb.utils::createDirectory("csv"), paste0(file_base, ".csv")
  ))
}

write_to_csv(predictions$data, "sinus-fit_predictions")
write_to_csv(predictions$paras, "sinus-fit_parameters")
write_to_csv(predictions$gof, "sinus-fit_goodness-of-fit")
write_to_csv(predictions$traveltimes, "sinus-fit_traveltimes")
write_to_csv(predictions$residuals, "sinus-fit_residuals")

## Export plots:

save_to_html <- function(p, file_base) {
  htmlwidgets::saveWidget(p, paste0(file_base, ".html"))
}

withr::with_dir(new = kwb.utils::createDirectory("plots"), code = {
  
  save_to_html(
    plot_temperature_interactive(data_sw), 
    "temperature_surface-water_time-series_full"
  )
  
  save_to_html(
    plot_temperature_interactive(data_sw_selected),
    "temperature_surface-water_time-series_selected"
  )
  
  save_to_html(
    plot_temperature_interactive(data_gw), 
    "temperature_groundwater_time-series_full"
  )
  
  save_to_html(
    plot_temperature_interactive(data_gw_selected),
    "temperature_groundwater_time-series_selected"
  )
  
  save_to_html(
    plot_prediction_interactive(predictions), 
    "temperature_prediction"
  )
  
  # save_to_html(
  #   plot_residuals_interactive(prediction_df, binwidth = 0.5),
  #   "temperature_prediction_residuals"
  # )
})  

Session Info

Plattform

name value
version R version 4.4.2 (2024-10-31)
os Ubuntu 24.04.1 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate C
ctype en_US.UTF-8
tz Etc/UTC
date 2024-12-28
pandoc 3.2.1 @ /usr/local/bin/ (via rmarkdown)

Packages

#>  package      * version date (UTC) lib source
#>  bit            4.5.0.1 2024-12-03 [2] RSPM (R 4.4.0)
#>  bit64          4.5.2   2024-09-22 [2] RSPM (R 4.4.0)
#>  bslib          0.8.0   2024-07-29 [2] RSPM (R 4.4.0)
#>  buildtools     1.0.0   2024-12-26 [3] local (/pkg)
#>  cachem         1.1.0   2024-05-16 [2] RSPM (R 4.4.0)
#>  class          7.3-22  2023-05-03 [2] RSPM (R 4.4.0)
#>  classInt       0.4-10  2023-09-05 [2] RSPM (R 4.4.0)
#>  cli            3.6.3   2024-06-21 [2] RSPM (R 4.4.0)
#>  colorspace     2.1-1   2024-07-26 [2] RSPM (R 4.4.0)
#>  crayon         1.5.3   2024-06-20 [2] RSPM (R 4.4.0)
#>  crosstalk      1.2.1   2023-11-23 [2] RSPM (R 4.4.0)
#>  data.table     1.16.4  2024-12-06 [2] RSPM (R 4.4.0)
#>  digest         0.6.37  2024-08-19 [2] RSPM (R 4.4.0)
#>  dplyr          1.1.4   2023-11-17 [2] RSPM (R 4.4.0)
#>  e1071          1.7-16  2024-09-16 [2] RSPM (R 4.4.0)
#>  evaluate       1.0.1   2024-10-10 [2] RSPM (R 4.4.0)
#>  farver         2.1.2   2024-05-13 [2] RSPM (R 4.4.0)
#>  fastmap        1.2.0   2024-05-15 [2] RSPM (R 4.4.0)
#>  forcats        1.0.0   2023-01-29 [2] RSPM (R 4.4.0)
#>  generics       0.1.3   2022-07-05 [2] RSPM (R 4.4.0)
#>  ggplot2        3.5.1   2024-04-23 [2] RSPM (R 4.4.0)
#>  glue           1.8.0   2024-09-30 [2] RSPM (R 4.4.0)
#>  gtable         0.3.6   2024-10-25 [2] RSPM (R 4.4.0)
#>  hms            1.1.3   2023-03-21 [2] RSPM (R 4.4.0)
#>  htmltools      0.5.8.1 2024-04-04 [2] RSPM (R 4.4.0)
#>  htmlwidgets    1.6.4   2023-12-06 [2] RSPM (R 4.4.0)
#>  httr           1.4.7   2023-08-15 [2] RSPM (R 4.4.0)
#>  hydroGOF       0.6-0.1 2024-11-04 [2] RSPM (R 4.4.0)
#>  hydroTSM       0.7-0.1 2024-11-04 [2] RSPM (R 4.4.0)
#>  jquerylib      0.1.4   2021-04-26 [2] RSPM (R 4.4.0)
#>  jsonlite       1.8.9   2024-09-20 [2] RSPM (R 4.4.0)
#>  KernSmooth     2.23-24 2024-05-17 [2] RSPM (R 4.4.0)
#>  knitr          1.49    2024-11-08 [2] RSPM (R 4.4.0)
#>  kwb.heatsine * 0.1.5   2024-12-28 [1] https://kwb-r.r-universe.dev (R 4.4.2)
#>  kwb.utils      0.15.0  2024-12-01 [2] https://kwb-r.r-universe.dev (R 4.4.2)
#>  labeling       0.4.3   2023-08-29 [2] RSPM (R 4.4.0)
#>  lattice        0.22-6  2024-03-20 [2] RSPM (R 4.4.0)
#>  lazyeval       0.2.2   2019-03-15 [2] RSPM (R 4.4.0)
#>  lifecycle      1.0.4   2023-11-07 [2] RSPM (R 4.4.0)
#>  lubridate      1.9.4   2024-12-08 [2] RSPM (R 4.4.0)
#>  magrittr       2.0.3   2022-03-30 [2] RSPM (R 4.4.0)
#>  maketools      1.3.1   2024-10-04 [3] RSPM (R 4.4.0)
#>  munsell        0.5.1   2024-04-01 [2] RSPM (R 4.4.0)
#>  pillar         1.10.0  2024-12-17 [2] RSPM (R 4.4.0)
#>  pkgconfig      2.0.3   2019-09-22 [2] RSPM (R 4.4.0)
#>  plotly         4.10.4  2024-01-13 [2] RSPM (R 4.4.0)
#>  proxy          0.4-27  2022-06-09 [2] RSPM (R 4.4.0)
#>  purrr          1.0.2   2023-08-10 [2] RSPM (R 4.4.0)
#>  R6             2.5.1   2021-08-19 [2] RSPM (R 4.4.0)
#>  readr          2.1.5   2024-01-10 [2] RSPM (R 4.4.0)
#>  rlang          1.1.4   2024-06-04 [2] RSPM (R 4.4.0)
#>  rmarkdown    * 2.29    2024-11-04 [2] RSPM (R 4.4.0)
#>  sass           0.4.9   2024-03-15 [2] RSPM (R 4.4.0)
#>  scales         1.3.0   2023-11-28 [2] RSPM (R 4.4.0)
#>  sessioninfo    1.2.2   2021-12-06 [2] RSPM (R 4.4.0)
#>  stringi        1.8.4   2024-05-06 [2] RSPM (R 4.4.0)
#>  stringr        1.5.1   2023-11-14 [2] RSPM (R 4.4.0)
#>  sys            3.4.3   2024-10-04 [2] RSPM (R 4.4.0)
#>  tibble         3.2.1   2023-03-20 [2] RSPM (R 4.4.0)
#>  tidyr          1.3.1   2024-01-24 [2] RSPM (R 4.4.0)
#>  tidyselect     1.2.1   2024-03-11 [2] RSPM (R 4.4.0)
#>  timechange     0.3.0   2024-01-18 [2] RSPM (R 4.4.0)
#>  tzdb           0.4.0   2023-05-12 [2] RSPM (R 4.4.0)
#>  vctrs          0.6.5   2023-12-01 [2] RSPM (R 4.4.0)
#>  viridisLite    0.4.2   2023-05-02 [2] RSPM (R 4.4.0)
#>  vroom          1.6.5   2023-12-05 [2] RSPM (R 4.4.0)
#>  withr          3.0.2   2024-10-28 [2] RSPM (R 4.4.0)
#>  xfun           0.49    2024-10-31 [2] RSPM (R 4.4.0)
#>  xts            0.14.1  2024-10-15 [2] RSPM (R 4.4.0)
#>  yaml           2.3.10  2024-07-26 [2] RSPM (R 4.4.0)
#>  zoo            1.8-12  2023-04-13 [2] RSPM (R 4.4.0)
#> 
#>  [1] /tmp/RtmpIfB5Lp/Rinst12a5d2f922c
#>  [2] /github/workspace/pkglib
#>  [3] /usr/local/lib/R/site-library
#>  [4] /usr/lib/R/site-library
#>  [5] /usr/lib/R/library

Pandoc

#>        pandoc_directory pandoc_version
#> 1 /usr/local/bin/pandoc          3.2.1