Package 'kwb.geosalz'

Title: R Package for Documenting Workflow Used in Project "geosalz"
Description: R Package for Documenting Workflow Used in Project "geosalz".
Authors: Michael Rustler [aut, cre] , Hauke Sonnenberg [ctb] , Christoph Sprenger [ctb] , GeoSalz [fnd], Kompetenzzentrum Wasser Berlin gGmbH (KWB) [cph]
Maintainer: Michael Rustler <[email protected]>
License: MIT + file LICENSE
Version: 0.7.0
Built: 2024-10-26 05:12:47 UTC
Source: https://github.com/KWB-R/kwb.geosalz

Help Index


add_para_metadata

Description

add_para_metadata

Usage

add_para_metadata(df, lookup_para_path, parameters_path)

Arguments

df

df

lookup_para_path

lookup_para_path

parameters_path

parameters_path

Value

return "df" with added parameter metadata


add_site_metadata

Description

add_site_metadata

Usage

add_site_metadata(df, site_path)

Arguments

df

df

site_path

site_path

Value

data frame with added site metadata


Check if all strings are not empty

Description

Check if all strings are not empty

Usage

all_defined(x)

Arguments

x

vector of character

Value

TRUE or FALSE


Helper function: cat_green_bold_0

Description

Helper function: cat_green_bold_0

Usage

cat_green_bold_0(...)

Arguments

...

text passed to crayon::green()

Value

formatted text output


Helper function: cat_red_bold_0

Description

Helper function: cat_red_bold_0

Usage

cat_red_bold_0(...)

Arguments

...

text passed to crayon::red

Value

formatted text output


Helper function: column_pattern_gather_ignore

Description

Helper function: column_pattern_gather_ignore

Usage

column_pattern_gather_ignore(
  fields = c("Datum", "KN", "[iI]nterne Nr.", "Name der", "Ort", "Probe", "Prü",
    "Untersuchung", "Labor", "Jahr", "Galer", "Detail", "Meß", "Zeit", "Bezei", "Monat")
)

Arguments

fields

column names to be ignored for gathering (default: c( "Datum", "KN", "[iI]nterne Nr.", "Name der", "Ort", "Probe", "Pr\u00FC", "Untersuchung", "Labor", "Jahr", "Galer", "Detail", "Me\u00DF", "Zeit", "Bezei", "Monat")

Value

vector with ignored columns for gathering


Helper function: column_pattern_gather_ignore_clean

Description

Helper function: column_pattern_gather_ignore_clean

Usage

column_pattern_gather_ignore_clean(
  fields = c("LabSampleCode", "Date", "Time", "Waterbody", "ExSiteCode", "Site")
)

Arguments

fields

column names to be ignored for gathering (default: c("LabSampleCode", "Date", "Time", "Waterbody", "ExSiteCode", Site")

Value

vector with ignored columns for gathering


Convert PhreeqC input to "wide" format

Description

Convert PhreeqC input to "wide" format

Usage

convert_phreeqc_input_to_wide(phreeqc_input)

Arguments

phreeqc_input

PhreeqC input as retrieved by get_phreeqc_data

Value

PhreeqC input in "wide" format


Convert to SF

Description

Convert to SF

Usage

convert_to_sf(
  df,
  crs_source = 25833,
  crs_target = 4326,
  col_coord_x = "Rechtswert_UTM_33_N",
  col_cood_y = "Hochwert_UTM_33_N"
)

Arguments

df

data frame or tibble with spatial data

crs_source

origingal CRS (default: 25833)

crs_target

target CRS (default: 4326)

col_coord_x

column name of latitude (default: "Rechtswert_UTM_33_N")

col_cood_y

column name of longitude (default: "Hochwert_UTM_33_N")

Value

data frame or tibble converted to sf

Examples

gwl_master <- jsonlite::fromJSON("https://kwb-r.github.io/wasserportal/stations_gwl_master.json")
convert_to_sf(gwl_master)

Convert xls to xlsx

Description

Convert xls to xlsx

Usage

convert_xls_as_xlsx(
  input_dir,
  export_dir = tempdir(),
  office_folder = safe_office_folder(),
  dbg = TRUE
)

Arguments

input_dir

input directory containing .xls files

export_dir

export directory (default: tempdir())

office_folder

office folder path (default: safe_office_folder)

dbg

debug (default: TRUE)


Helper function: convert_xls_to_xlsx

Description

Helper function: convert_xls_to_xlsx

Usage

convert_xls_to_xlsx(exe, xls, xlsx, i, n_files, dbg = TRUE)

Arguments

exe

exe

xls

xls

xlsx

xlsx

i

i

n_files

n_files

dbg

debug (default: TRUE)


Helper function: copy_lookup_para_file

Description

Helper function: copy_lookup_para_file

Usage

copy_lookup_para_file(
  from_dir,
  to_dir,
  overwrite = FALSE,
  recursive = TRUE,
  file_pattern = "^lookup_para\\.csv$"
)

Arguments

from_dir

input directory with xlsx files

to_dir

target directory where to copy the xlsx files

overwrite

should existing files be overwritten (TRUE) otherwise (FALSE) ? (default: FALSE)

recursive

if TRUE recursively find all xlsx files in the directory specified in parameter "from_dir" (default: TRUE)

file_pattern

pattern for identifying lookup_para file (default: "^lookup_para\.csv$")


Helper function: copy_xlsx_files

Description

Helper function: copy_xlsx_files

Usage

copy_xlsx_files(
  from_dir,
  to_dir,
  overwrite = FALSE,
  recursive = TRUE,
  file_pattern = "[xX][lL][sS][xX]"
)

Arguments

from_dir

input directory with xlsx files

to_dir

target directory where to copy the xlsx files

overwrite

should existing files be overwritten (TRUE) otherwise (FALSE) ? (default: FALSE)

recursive

if TRUE recursively find all xlsx files in the directory specified in parameter "from_dir" (default: TRUE)

file_pattern

pattern for identifying xlsx fles (default: "[xX][lL][sS][xX]")


Create EMSHOFF91 Import Data Frame

Description

Create EMSHOFF91 Import Data Frame

Usage

create_emshoff91_import(
  ods_dir,
  files_to_ignore = c("cl25", "clliste", "rupelauf", "salzlast")
)

Arguments

ods_dir

directory to ".ods" files created manually by importing original ".wq1" files into LibreOffice 7.0 on Ubuntu with encoding (Western Europe (DOS/OS2-437/US)) and exporting to ".ods" format

files_to_ignore

tidied names of files to ignore due to complex data input structure not yet covered by importer (default: c("cl25", "clliste", "gwnguete", "rupelauf", "salzlast"))

Value

data frame with columns "ods_paths" (full paths to ".ods" files), "ods_files" (their "basenames") and "ods_names_clean" (tidied names used as identifier)

Examples

## Not run: 
ods_dir <- "<replace-with-path-to-files>/emshoff91/converted_ods"
emshoff91_import <- create_emshoff91_import(ods_dir)

## End(Not run)

Measurement Chains: Create an SFTP Connection

Description

Measurement Chains: Create an SFTP Connection

Usage

create_sftp_connection()

Value

sftp connection


Helper function: delete_registry

Description

Helper function: delete_registry

Usage

delete_registry(office_folder = safe_office_folder(), dbg = TRUE)

Arguments

office_folder

office folder path (default: safe_office_folder)

dbg

debug (default: TRUE)


Measurement Chains: download data

Description

Measurement Chains: download data

Usage

download_measurementchains_data(
  sftp_paths,
  target_directory = temp_dir(),
  sftp_connection = create_sftp_connection(),
  run_parallel = TRUE,
  debug = FALSE
)

Arguments

sftp_paths

character vector with paths to files to be downloaded. As retrieved by get_measurementchains_files column "sftp_path"

target_directory

target directory

sftp_connection

an SFTP connnection as retrieved by create_sftp_connection

run_parallel

default: TRUE

debug

show debug messages (default: FALSE)

Value

tibble with columns file_id, sftp_path and local_path of csv files

Examples

## Not run: 
mc_files <- kwb.geosalz::get_measurementchains_files()
target_directory <- tempdir()
local_paths <- kwb.geosalz::download_measurementchains_data(
sftp_paths = mc_files$sftp_path,
target_directory)

## End(Not run)

Emshoff 91: list to data frame

Description

Emshoff 91: list to data frame

Usage

emshoff91_list_to_df(emshoff91_list)

Arguments

emshoff91_list

list as retrieved by read_multiple_emshoff91_ods

Value

tibble


Emshoff 91: remap values from imported tibble

Description

Emshoff 91: remap values from imported tibble

Usage

emshoff91_remap_values(
  emshoff91_df,
  remap_list = list(fi_mi = "fi_mi_m_nn", ku_sto = "kupp_st", lf = "el_lf", progr =
    "beprob_progr", strat = "stratigr", uv254 = "uv_ext"),
  delete_cols = TRUE
)

Arguments

emshoff91_df

tibble as retrieved by emshoff91_list_to_df

remap_list

list with values to be remapped. Names of the list are columns values contained in list values should be mapped to (default: list(fi_mi = "fi_mi_m_nn", ku_sto = "kupp_st", lf = "el_lf", progr = "beprob_progr", strat = "stratigr", uv254 = "uv_ext"))

delete_cols

should unneeded columns be deleted, i.e. the ones where data where mapped from (default: TRUE)

Value

data frame with remapped values and deleted columns were this values were copied from (default: TRUE)


Helper function: gather_and_join_1

Description

Helper function: gather_and_join_1

Usage

gather_and_join_1(tmp_data, columns_keep, metadata, dbg = FALSE)

Arguments

tmp_data

tmp_data

columns_keep

columns_keep

metadata

metadata

dbg

dbg (default: FALSE)

Value

gathered and joined data frame


Helper function: gather_and_join_2

Description

Helper function: gather_and_join_2

Usage

gather_and_join_2(tmp_content, columns_keep, header)

Arguments

tmp_content

tmp_content

columns_keep

columns_keep

header

header

Value

gathered and joined data frame


Helper function: get_excelcnv_exe

Description

Helper function: get_excelcnv_exe

Usage

get_excelcnv_exe(office_folder = safe_office_folder())

Arguments

office_folder

office folder path (default: safe_office_folder

Value

path containing 'excelcnv.exe'


get_foerdermengen

Description

get_foerdermengen

Usage

get_foerdermengen(
  xlsx_path,
  sheet_name = "WW Q Rhow ",
  sheet_range = "A4:S127"
)

Arguments

xlsx_path

path to xlsx file with pumping rates

sheet_name

sheet_name (default: "WW Q Rhow ")

sheet_range

sheet_range (default: "A4:S127")

Value

data frame with annual pumping rates per waterworks


Get Abtraction of Friedrichshagen Well Galleries

Description

Get Abtraction of Friedrichshagen Well Galleries

Usage

get_foerdermengen_gal_fri(path)

Arguments

path

path to "2018-04-27 Rohwasser Bericht - Galeriefördermengen.xlsx"

Value

tidy data frame with abstraction rates for waterworks Friedrichshagen


Get Measurement Chain Data on KWB Cloud

Description

Get Measurement Chain Data on KWB Cloud

Usage

get_measurement_chain_data_on_cloud(dbg = TRUE)

Arguments

dbg

logical indicating whether or not to show debug messages

Value

data frame with the content of "mc_data.zip" in the GeoSalz project folder on the Nextcloud server. The SFTP paths to the files from which the data in "mc_data.zip" originate are returned in attribute "sftp_paths". If either of the files "mc_data.zip" or "mc_files.csv" does not exist, NULL is returned.


Measurement Chains: Get Tidied Files Metadata

Description

Measurement Chains: Get Tidied Files Metadata

Usage

get_measurementchains_files(
  sftp_connection = create_sftp_connection(),
  debug = FALSE
)

Arguments

sftp_connection

an SFTP connnection as retrieved by create_sftp_connection

debug

show debug messages (default: FALSE)

Value

tibble with information on available files and tidied meta-information based on file naming

Examples

## Not run: 
mc_files <- kwb.geosalz::get_measurementchains_files()
str(mc_files)

## End(Not run)

Measurement Chains: Get Metadata

Description

Measurement Chains: Get Metadata

Usage

get_measurementchains_metadata(file = extdata_file("metadata_messketten.csv"))

Arguments

file

path to measurement chains metadata file. Default: kwb.geosalz:::extdata_file("metadata_messketten.csv")

Value

tibble with measurement chains metadata

Examples

mc_metadata <- kwb.geosalz::get_measurementchains_metadata()
str(mc_metadata)
mc_metadata

Measurement Chains: get statistics for data

Description

Measurement Chains: get statistics for data

Usage

get_measurmentchains_data_stats(mc_data)

Arguments

mc_data

tibble with measurement chains data as retrieved by read_measurementchains_data

Value

tibble with colunns datetime min/max, q10 (10 median, q90 (90


Helper function: get_meta_sheet_or_stop

Description

Helper function: get_meta_sheet_or_stop

Usage

get_meta_sheet_or_stop(sheets, pattern, file)

Arguments

sheets

sheets

pattern

pattern

file

file

Value

meta sheet name


Get Information on Pandoc

Description

Get Information on Pandoc

Usage

get_pandoc_info()

Value

data frame with columns pandoc_directory, pandoc_version if Pandoc is installed, otherwise a message is printed that pandoc is not installed.


get_parameters_meta

Description

get_parameters_meta

Usage

get_parameters_meta(xlsx_path, sheet_name = "nur Parameterliste")

Arguments

xlsx_path

path to EXCEL spreadsheet with parameter metadata

sheet_name

name of sheet containing metadata table (default: "nur Parameterliste")

Value

imported parameter metadata with cleaned columns names


Get PhreeqC data

Description

Get PhreeqC data

Usage

get_phreeqc_data(lab_bwb)

Arguments

lab_bwb

imported BWB lab data as retrieved by read_lab_bwb

Value

tibble with columns solution, par_name_phreeqc (which are not empty or NA) and numeric_value


Helper function: get_site_id

Description

Helper function: get_site_id

Usage

get_site_id(string, pattern = "^[0-9]{1,4}")

Arguments

string

vector with character strings

pattern

pattern used for identifying site_id (default: "^[0-9]1,4")

Value

extracted site_id's from input string


import_labor

Description

import_labor

Usage

import_labor(files, export_dir, func = read_bwb_header2)

Arguments

files

vector with full paths of xlsx input files

export_dir

export directory

func

function to be used (default: read_bwb_header2)

Value

list with length equal to number of input files


Order Measurement Chain Data

Description

Order Measurement Chain Data

Usage

order_measurement_chain_data(data)

Arguments

data

data frame as retrieved by read_measurementchains_data

Value

data, ordered by "parameter", "sensor_id", "datum_uhrzeit"


Plot measurementchain and well operation in combined plot

Description

Plot measurementchain and well operation in combined plot

Usage

plot_measurementchain_and_well_operation(
  mc_dat,
  well_op_data_meta,
  brunnen_nr = 9,
  para = "Leitfaehigkeit",
  y_label = "elektr. Leitfähigkeit (µS/cm)",
  date_min = as.Date("2023-05-10"),
  date_max = Sys.Date()
)

Arguments

mc_dat

mc_dat

well_op_data_meta

well_op_data_meta

brunnen_nr

well id (default: 9)

para

parameter (either: "Leitfaehigkeit" or "Temperatur")

y_label

y label (default: "elektr. Leitfaehigkeit (µS/cm)")

date_min

minimum date for plotting (default: as.Date("2023-05-10"))

date_max

maximum date for plotting (default: Sys.Date())

Value

combined plot


Measurement Chains: plott

Description

Measurement Chains: plott

Usage

plot_measurementchains(mc_data, para = "Leitfaehigkeit")

Arguments

mc_data

as retrieved by read_measurementchains_data

para

parameter to plot "Leitfaehigkeit" or "Temperatur" (default: Leitfaehigkeit")

Value

plot of selected measurement chain parameter


Prepare PhreeqC input

Description

Prepare PhreeqC input

Usage

prepare_phreeqc_input(lab_bwb_phreeqc, title = "")

Arguments

lab_bwb_phreeqc

selected BWB lab data as retrieved by get_phreeqc_data

title

user defined title (default: "")

Value

data frame with input structure for kwb.phreeqc


Import: read_bwb_data

Description

wrapper around read_bwb_header2 and read_bwb_header1_meta

Usage

read_bwb_data(
  files,
  meta_pattern = "META",
  keep_pattern = column_pattern_gather_ignore(),
  site_id_pattern = "^[0-9]{1,4}",
  dbg = TRUE
)

Arguments

files

file path(s) to EXCEL spreadsheet

meta_pattern

(default: "META")

keep_pattern

(default: column_pattern_gather_ignore)

site_id_pattern

(default: "^[0-9]1,4")

dbg

debug (default: TRUE)

Value

data.table with imported xls(x) files


Import: read_bwb_header1_meta

Description

Import: read_bwb_header1_meta

Usage

read_bwb_header1_meta(
  file,
  meta_pattern = "META",
  keep_pattern = column_pattern_gather_ignore(),
  dbg = FALSE
)

Arguments

file

path(s) to EXCEL spreadsheet

meta_pattern

meta_pattern default("META")

keep_pattern

keep_pattern (default: column_pattern_gather_ignore)

dbg

debug (default: FALSE)

Value

data.table with imported xls(x) files


Import: read_bwb_header2

Description

Import: read_bwb_header2

Usage

read_bwb_header2(
  file,
  skip = 2,
  keep_pattern = column_pattern_gather_ignore(),
  site_id_pattern = "^[0-9]{1,4}",
  dbg = TRUE
)

Arguments

file

file path(s) to EXCEL spreadsheet

skip

number of rows to skip in each sheet (default: 2)

keep_pattern

(default: column_pattern_gather_ignore())

site_id_pattern

(default: "^[0-9]1,4")

dbg

debug (default: TRUE)

Value

data.table with imported xls(x) files


Import: read_bwb_header4

Description

Import: read_bwb_header4

Usage

read_bwb_header4(
  file,
  skip = 4,
  keep_pattern = column_pattern_gather_ignore(),
  site_id_pattern = "^[0-9]{1,4}",
  dbg = TRUE
)

Arguments

file

file path(s) to EXCEL spreadsheet

skip

number of rows to skip in each sheet (default: 4)

keep_pattern

(default: column_pattern_gather_ignore())

site_id_pattern

(default: "^[0-9]1,4")

dbg

debug (default: TRUE)

Value

data.table with imported xls(x) files


Reads a Single EMSHOFF 91 ODS File into Tibble

Description

Reads a Single EMSHOFF 91 ODS File into Tibble

Usage

read_emshoff91_ods(emshoff91_import_selected)

Arguments

emshoff91_import_selected

a row as retrieved by create_emshoff91_import

Value

imports ods file into tibble


Read Isotopes

Description

Read Isotopes

Usage

read_isotopes(path)

Arguments

path

path to Isotopes delim (field separator ";")

Value

imported isotopes data


Read Lab BWB

Description

Read Lab BWB

Usage

read_lab_bwb(path, sheet = "Analysen")

Arguments

path

path to file with lab BWB data

sheet

name of sheet containing analysis data. Default: "Analysen"

Value

cleaned data frame with master data and lab values for all samples but only for selected parameters (columns A-BA and HB-HC)


Read Master Data

Description

Read Master Data

Usage

read_master_data(path)

Arguments

path

path to file with master data (currently in file: lab BWB data)

Value

imported master data contained in sheet "Stammdaten"


Measurement Chains: read csv data from multiple files

Description

Measurement Chains: read csv data from multiple files

Usage

read_measurementchains_data(
  csv_files,
  datetime_installation = as_gmt_plus_one("2022-09-27 11:00:00"),
  run_parallel = TRUE,
  debug = FALSE
)

Arguments

csv_files

vector of paths as retrieved by download_measurementchains_data

datetime_installation

datetime of first logger installation in well K10. Used to filter out older measurement data! Default: kwb.geosalz:::as_gmt_plus_one("2022-09-27 11:00:00")

run_parallel

default: TRUE

debug

show debug messages (default: FALSE)

Value

data frame with imported data from csv files

Examples

## Not run: 
mc_files <- kwb.geosalz::get_measurementchains_files()
target_directory <- tempdir()
csv_files <- kwb.geosalz::download_measurementchains_data(
  sftp_paths = mc_files$sftp_path,
  target_directory
)
mc_data <- kwb.geosalz::read_measurementchains_data(csv_files)

## End(Not run)

Reads Multiple EMSHOFF 91 ODS Files into List

Description

Reads Multiple EMSHOFF 91 ODS Files into List

Usage

read_multiple_emshoff91_ods(emshoff91_import)

Arguments

emshoff91_import

a tibble as retrieved by create_emshoff91_import

Value

imports mulitple ods files into a list of tibbles

Examples

## Not run: 
ods_dir <- "<replace-with-path-to-files>/emshoff91/emshoff91/converted_ods"
ods_dir <- "C:/users/mrustl/Downloads/emshoff91/emshoff91/converted_ods"
emshoff91_import <- create_emshoff91_import(ods_dir)
read_multiple_emshoff91_ods(emshoff91_import)

## End(Not run)

Replace n.a. (not available) and n.b. (not determined) from lab data with NA

Description

Replace n.a. (not available) and n.b. (not determined) from lab data with NA

Usage

replace_nanb_with_na(string)

Arguments

string

string

Value

string with NA instead of "n.a." or "n.b." (including 0-10 spaces between "n" and "a"/"b"

Examples

string <- c("19.2", "n.b.", "n. b.", "n.  b.", "n.a.", "n. a.", "n.  a.")
replace_nanb_with_na(string)

Helper function: safe_office_folder

Description

Helper function: safe_office_folder

Usage

safe_office_folder(office_path = "C:/Program Files (x86)/Microsoft Office")

Arguments

office_path

office folder path (default: "C:/Program Files (x86)/Microsoft Office")

Value

path of office folder (if existing)


Helper function: stop if duplicated sample ids are found

Description

Helper function: stop if duplicated sample ids are found

Usage

stop_if_duplicated_samples_found(df, col_sampleid, path, sheet = "")

Arguments

df

data frame with samples in wide format

col_sampleid

column name of sample id

path

path to file from which df was read (for information only)

sheet

optional in case EXCEL is used (default: "")

Value

error in case duplicated samples were found


Helper function: stop_on_missing_or_inform_on_extra_sheets

Description

Helper function: stop_on_missing_or_inform_on_extra_sheets

Usage

stop_on_missing_or_inform_on_extra_sheets(has_site_id, file, sheets)

Arguments

has_site_id

has_site_id

file

file

sheets

sheets


Helper function: to_full_metadata2

Description

Helper function: to_full_metadata2

Usage

to_full_metadata_2(header, file, sheet)

Arguments

header

header

file

file

sheet

sheet

Value

data frame with metadata for header2 (EXCEL) files


Helper function: to_full_metadata_4

Description

Helper function: to_full_metadata_4

Usage

to_full_metadata_4(header, file, sheet)

Arguments

header

header

file

file

sheet

sheet

Value

data frame with metadata for header4 (EXCEL) files


Measurement Chains: write csv data

Description

Measurement Chains: write csv data

Usage

write_measurementchains_data(
  mc_data,
  target_directory,
  to_zip = FALSE,
  debug = FALSE
)

Arguments

mc_data

measurement chains data as retrieved by read_measurementchains_data

target_directory

target directory

to_zip

should data be zipped? (default: FALSE), if TRUE only a temporary csv file is created which will be subsequently zipped and deleted

debug

print debug messages (default: FALSE)

Value

writes csv data to path