Package 'kwb.endnote' reference manual

Title:	Helper Functions for Analysing KWB Endnote Library (Exported as .xml)
Description:	Helper Functions For Analysing KWB Endnote Library (Exported As .XML).
Authors:	Michael Rustler [aut, cre] , Hauke Sonnenberg [ctb] , FAKIN [fnd], Kompetenzzentrum Wasser Berlin gGmbH (KWB) [cph]
Maintainer:	Michael Rustler <[email protected]>
License:	MIT + file LICENSE
Version:	0.2.0
Built:	2025-03-14 03:45:46 UTC
Source:	https://github.com/KWB-R/kwb.endnote

Helper function: add fileinfo attributes

Description

Helper function: add fileinfo attributes

Usage

add_file_info_attributes(obj, path)
add_file_info_attributes(obj, path)

Arguments

`obj`	object to write attributes to
`path`	path to file

Value

object with file info attributes

Check two Dataframes for Differences

Description

Check two Dataframes for Differences

Usage

check_for_differences(df_x, df_y, dbg = TRUE)
check_for_differences(df_x, df_y, dbg = TRUE)

Arguments

`df_x`	data frame as retrieved by create_references_df() or clean_references_df()
`df_y`	data frame as retrieved by create_references_df() or clean_references_df()
`dbg`	should dbg messages be printed (default: TRUE)

Value

a dataframe containing only the differences between df_x and df_y

Examples

## Not run: 
############################################################################
### Option 1
### Check differences between two different versions of "KWB_documents.xml"
############################################################################

old_xml <- extdata_file("2020-05-25_KWB-documents.xml")
new_xml <- extdata_file("2020-06-17_KWB-documents.xml")
old_list <- kwb.endnote::create_endnote_list(old_xml)
new_list <- kwb.endnote::create_endnote_list(new_xml)
old_df <- kwb.endnote::create_references_df(old_list)
new_df <- kwb.endnote::create_references_df(new_list)
diffs_df_oldnew <- check_for_differences(old_df, new_df)
head(diffs_df_oldnew)

############################################################################
### Option 2:
### Check differences between "as-is" import and "collapsing" fields
############################################################################

endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_df_collapse <- create_references_df(endnote_list, collapse = TRUE)
diffs_df <- check_for_differences(refs_df, refs_df_collapse)
head(diffs_df)


## End(Not run)
## Not run: 
############################################################################
### Option 1
### Check differences between two different versions of "KWB_documents.xml"
############################################################################

old_xml <- extdata_file("2020-05-25_KWB-documents.xml")
new_xml <- extdata_file("2020-06-17_KWB-documents.xml")
old_list <- kwb.endnote::create_endnote_list(old_xml)
new_list <- kwb.endnote::create_endnote_list(new_xml)
old_df <- kwb.endnote::create_references_df(old_list)
new_df <- kwb.endnote::create_references_df(new_list)
diffs_df_oldnew <- check_for_differences(old_df, new_df)
head(diffs_df_oldnew)

############################################################################
### Option 2:
### Check differences between "as-is" import and "collapsing" fields
############################################################################

endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_df_collapse <- create_references_df(endnote_list, collapse = TRUE)
diffs_df <- check_for_differences(refs_df, refs_df_collapse)
head(diffs_df)


## End(Not run)

Check Endnote for Problematic References

Description

Check Endnote for Problematic References

Usage

check_problematic_entries(endnote_list, give_hints = TRUE, dbg = TRUE)
check_problematic_entries(endnote_list, give_hints = TRUE, dbg = TRUE)

Arguments

`endnote_list`	list created with create_endnote_list()
`give_hints`	if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data
`dbg`	show debug messages (default: TRUE)

Value

a data frame with problematic entries

Examples

## Not run: endnote_list <- create_endnote_list()
problematic_entries <- check_problematic_entries(endnote_list)
head(problematic_entries)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
problematic_entries <- check_problematic_entries(endnote_list)
head(problematic_entries)

## End(Not run)

Helper function: clean access information

Description

Helper function: clean access information

Usage

clean_accessibility(access, give_hints = FALSE, dbg = TRUE)
clean_accessibility(access, give_hints = FALSE, dbg = TRUE)

Arguments

`access`	vector with accessibility information
`give_hints`	if TRUE hints will be generated, e.g. "add_public_or_confidential" in case of missing entries (default: FALSE)
`dbg`	show debug messages (default: TRUE)

Value

vector with cleaned accessibility information

Helper function: clean author names

Description

Helper function: clean author names

Usage

clean_author_names(author_names, give_hints = FALSE, dbg = TRUE)
clean_author_names(author_names, give_hints = FALSE, dbg = TRUE)

Arguments

`author_names`	with author names to clean
`give_hints`	if TRUE hints will be generated, e.g. "fix_multiple_authors_per_line" in case of missing entries (default: FALSE)
`dbg`	show debug messages (default: TRUE)

Value

vector with cleaned author names

Helper function: clean DOIs

Description

Helper function: clean DOIs

Usage

clean_dois(dois, dbg = TRUE)
clean_dois(dois, dbg = TRUE)

Arguments

`dois`	vectors with DOIs to clean
`dbg`	show debug messages (default: TRUE)

Value

cleaned DOIs

Helper function: clean project names

Description

Helper function: clean project names

Usage

clean_project_names(project_names, give_hints = FALSE, dbg = TRUE)
clean_project_names(project_names, give_hints = FALSE, dbg = TRUE)

Arguments

`project_names`	with project names to clean
`give_hints`	if TRUE hints will be generated, e.g. "add_project_name" in case of missing entries (default: FALSE)
`dbg`	show debug messages (default: TRUE)

Value

vector with cleaned project names

Clean References Dataframe

Description

Clean References Dataframe

Usage

clean_references_df(endnote_list, give_hints = FALSE, dbg = TRUE)
clean_references_df(endnote_list, give_hints = FALSE, dbg = TRUE)

Arguments

`endnote_list`	list created with create_endnote_list()
`give_hints`	if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data
`dbg`	show debug messages (default: TRUE)

Value

cleaned references_df

Examples

## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
head(refs_clean_df)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
head(refs_clean_df)

## End(Not run)

Create dataframe from Endnote XML file

Description

Create dataframe from Endnote XML file

Usage

create_df_from_endnote_xml(endnote_xml = default_xml())
create_df_from_endnote_xml(endnote_xml = default_xml())

Arguments

endnote_xml

path to Endnote library exported as .xml (default: default_xml())

Value

data.frame with all information from Endnote XML joined with data from get_reference_type_names()

Examples

references_df <- create_df_from_endnote_xml()
head(references_df)

references_df <- create_df_from_endnote_xml()
head(references_df)

Create List From Endnote XML

Description

Create List From Endnote XML

Usage

create_endnote_list(endnote_xml = default_xml())
create_endnote_list(endnote_xml = default_xml())

Arguments

endnote_xml

path to Endnote library exported as .xml (default: default_xml())

Value

list of imported endnote_xml

Examples

endnote_list <- create_endnote_list()
str(endnote_list[1]$record)
attr(endnote_list, "xml_file_info")
attr(endnote_list, "xml_filename_without_extension")
endnote_list <- create_endnote_list()
str(endnote_list[1]$record)
attr(endnote_list, "xml_file_info")
attr(endnote_list, "xml_filename_without_extension")

Create Keywords Dataframe

Description

Create Keywords Dataframe

Usage

create_keywords_df(references_df)
create_keywords_df(references_df)

Arguments

references_df

references_df as recrieved from kwb.endnote::create_df_from_endnote_xml()

Value

keywords dataframe

Examples

references_df <- create_df_from_endnote_xml()
keywords_df <- create_keywords_df(references_df)
head(keywords_df)

references_df <- create_df_from_endnote_xml()
keywords_df <- create_keywords_df(references_df)
head(keywords_df)

Create List By Pub Type From Datafram

Description

Create List By Pub Type From Datafram

Usage

create_list_by_pubtype_from_df(refs_df)
create_list_by_pubtype_from_df(refs_df)

Arguments

refs_df

data frame as created with create_references_df()

Value

list with references with one sublist for each publication type

Examples

## Not run: 
endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_list_by_pubtype <- create_list_by_pubtype_from_df(refs_df)
str(refs_list_by_pubtype, 1)

## End(Not run)
## Not run: 
endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_list_by_pubtype <- create_list_by_pubtype_from_df(refs_df)
str(refs_list_by_pubtype, 1)

## End(Not run)

Create List with Unique Entries

Description

Create List with Unique Entries

Usage

create_list_with_unique_entries(refs_df)
create_list_with_unique_entries(refs_df)

Arguments

refs_df

data frame as created with create_references_df()

Value

list with unique values for selected columns

Examples

## Not run: endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
unique_entries_list <- create_list_with_unique_entries(refs_df)
str(unique_entries_list, 1)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
unique_entries_list <- create_list_with_unique_entries(refs_df)
str(unique_entries_list, 1)

## End(Not run)

Create References Dataframe

Description

Create References Dataframe

Usage

create_references_df(endnote_list, collapse = FALSE)
create_references_df(endnote_list, collapse = FALSE)

Arguments

`endnote_list`	list created with create_endnote_list()
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

data.frame with columns record_id, rec_number, ref_type_id, ref_type_name

Examples

endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
head(refs_df)

endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
head(refs_df)

Helper function: default filename for cleaned XLSX

Description

Helper function: default filename for cleaned XLSX

Usage

default_clean_xlsx(endnote_list)
default_clean_xlsx(endnote_list)

Arguments

endnote_list

list as retrieved by create_endnote_list()

Value

default clean xlsx filename

Helper function: default filename for XLSX

Description

Helper function: default filename for XLSX

Usage

default_xlsx(endnote_list)
default_xlsx(endnote_list)

Arguments

endnote_list

list as retrieved by create_endnote_list()

Value

default xlsx filename

Path to Default XML File

Description

Path to Default XML File

Usage

default_xml()
default_xml()

Value

path to xml file stored in this package, containing references from KWB Endnote database

Get Path to File in This Package

Description

Get Path to File in This Package

Usage

extdata_file(...)
extdata_file(...)

Arguments

...

parts of path passed to system.file

Helper function: get abstract from list for a reference

Description

Helper function: get abstract from list for a reference

Usage

get_abstract(record_list, collapse = FALSE)
get_abstract(record_list, collapse = FALSE)

Arguments

`record_list`	list with one record of create_endnote_list()
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row abstract data frame

Helper function: get authors from list for a reference

Description

Helper function: get authors from list for a reference

Usage

get_authors(
  record_list,
  col_name = "author",
  extract_value = "authors",
  collapse = FALSE
)
get_authors(
  record_list,
  col_name = "author",
  extract_value = "authors",
  collapse = FALSE
)

Arguments

`record_list`	list with one record of create_endnote_list()
`col_name`	default: "author"
`extract_value`	extract_value = "authors"
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row authors data frame

Helper Function: Get Available Multi Col Names

Description

all names which are valid inputs for tidy_selected_cols()

Usage

get_available_multi_cols(df)
get_available_multi_cols(df)

Arguments

`df`	as retrieved by create_references_df() or clean_references_df()

Value

all names which are valid inputs for tidy_selected_cols()

Helper function: get keywords from list for a reference

Description

Helper function: get keywords from list for a reference

Usage

get_keywords(
  record_list,
  col_name = "keyword",
  extract_value = "keywords",
  collapse = FALSE
)
get_keywords(
  record_list,
  col_name = "keyword",
  extract_value = "keywords",
  collapse = FALSE
)

Arguments

`record_list`	list with one record of create_endnote_list()
`col_name`	default: "keyword"
`extract_value`	extract_value = "keyword"
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row keywords data frame

Helper function: get pdfurls from list for a reference

Description

Helper function: get pdfurls from list for a reference

Usage

get_pdfurls(record_list, col_name = "urls_pdf", collapse = FALSE)
get_pdfurls(record_list, col_name = "urls_pdf", collapse = FALSE)

Arguments

`record_list`	list with one record of create_endnote_list()
`col_name`	default: "url_pdfurls"
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row pdfurls data frame

Helper function: get reference type names

Description

Helper function: get reference type names

Usage

get_reference_type_names(endnote_xml = default_xml())
get_reference_type_names(endnote_xml = default_xml())

Arguments

endnote_xml

path to Endnote library exported as .xml (default: default_xml())

Value

data.frame with columns record_id, rec_number, ref_type_id, ref_type_name

Examples

ref_type_names <- get_reference_type_names()
head(ref_type_names)
ref_type_names <- get_reference_type_names()
head(ref_type_names)

Helper function: get secondary authors from list for a reference

Description

Helper function: get secondary authors from list for a reference

Usage

get_secondary_authors(record_list, collapse = FALSE)
get_secondary_authors(record_list, collapse = FALSE)

Arguments

`record_list`	list with one record of create_endnote_list()
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row authors data frame

Helper function: get tertiary authors from list for a reference

Description

Helper function: get tertiary authors from list for a reference

Usage

get_tertiary_authors(record_list, collapse = FALSE)
get_tertiary_authors(record_list, collapse = FALSE)

Arguments

`record_list`	list with one record of create_endnote_list()
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

one row authors data frame

Helper function: get xml filename without extension

Description

Helper function: get xml filename without extension

Usage

get_xml_filename_without_extension(obj)
get_xml_filename_without_extension(obj)

Arguments

obj

list or datafram as retrieved by create_endnote_list() or create_df_from_endnote_xml()

Value

xml filename without file extension

Helper Function: Give Hints For Accessibility

Description

Helper Function: Give Hints For Accessibility

Usage

give_hints_accessiblity(access, dbg = TRUE)
give_hints_accessiblity(access, dbg = TRUE)

Arguments

`access`	vector with accessibility metadata to check
`dbg`	should debug messages be printed (default: TRUE)

Value

vector with access info with hints how to improve data quality (in case give_hints = TRUE)

Helper Function: Give Hints For Author Names

Description

Helper Function: Give Hints For Author Names

Usage

give_hints_author_names(author_names, dbg = TRUE)
give_hints_author_names(author_names, dbg = TRUE)

Arguments

`author_names`	vector with author names to check
`dbg`	should debug messages be printed (default: TRUE)

Value

vector with author_names with hints how to improve data quality (in case give_hints = TRUE)

Helper Function: Give Hints For Project Names

Description

Helper Function: Give Hints For Project Names

Usage

give_hints_project_names(project_names, dbg = TRUE)
give_hints_project_names(project_names, dbg = TRUE)

Arguments

`project_names`	vector with project names to check
`dbg`	should debug messages be printed (default: TRUE)

Value

vector with project_names with hints how to improve data quality (in case give_hints = TRUE)

Plot Number of Publications by Author

Description

Plot Number of Publications by Author

Usage

plot_pubs_by_author(pubs_by_author_df)
plot_pubs_by_author(pubs_by_author_df)

Arguments

pubs_by_author_df

a data frame with author names in column "value" and number of publications (in column "n")

Value

plot of number of publications per author

Examples

refs_by_author_lastfirst <- create_df_from_endnote_xml() %>%
dplyr::filter(.data$key2 == "authors") %>%
dplyr::count(.data$value)  %>%
dplyr::arrange(dplyr::desc(.data$n))
plot_pubs_by_author(refs_by_author_lastfirst[1:30, ])
refs_by_author_lastfirst <- create_df_from_endnote_xml() %>%
dplyr::filter(.data$key2 == "authors") %>%
dplyr::count(.data$value)  %>%
dplyr::arrange(dplyr::desc(.data$n))
plot_pubs_by_author(refs_by_author_lastfirst[1:30, ])

Plot Publications By Year

Description

Plot Publications By Year

Usage

plot_pubs_by_year(refs_df)
plot_pubs_by_year(refs_df)

Arguments

refs_df

reference dataframe as retrieved by kwb.endnote::create_references_df()

Value

plot with publications by year

Examples

endnote_list <- kwb.endnote::create_endnote_list()
refs_df <- kwb.endnote::create_references_df(endnote_list)
plot_pubs_by_year(refs_df)

endnote_list <- kwb.endnote::create_endnote_list()
refs_df <- kwb.endnote::create_references_df(endnote_list)
plot_pubs_by_year(refs_df)

Plot Wordcloud Keywords

Description

Plot Wordcloud Keywords

Usage

plot_wordcloud_keywords(keywords_df, ...)
plot_wordcloud_keywords(keywords_df, ...)

Arguments

`keywords_df`	keywords dataframe as retrieved by create_keywords_df()
`...`	additional arguments passed to wordcloud2::wordcloud2()

Value

wordcloud keywords plot

Reference List to Data Frame

Description

Reference List to Data Frame

Usage

record_list_to_df(record_list, collapse = FALSE)
record_list_to_df(record_list, collapse = FALSE)

Arguments

`record_list`	list with one record of create_endnote_list()
`collapse`	should separate fields in "style" be collapsed to one field? (default: FALSE)

Value

data frame for record

Helper function: tidy dataframe

Description

Helper function: tidy dataframe

Usage

tidy_df(df, exclude_cols = "rec_number")
tidy_df(df, exclude_cols = "rec_number")

Arguments

`df`	data frame as retrieved by create_references_df() or clean_references_df()
`exclude_cols`	vector of column names to exclude for gathering (default: "rec_number")

Value

a tidy dataframe with columns rec_number, key and value

Examples

## Not run: 
endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_df_tidy <- tidy_df(refs_df)

## End(Not run)
## Not run: 
endnote_list <- create_endnote_list()
refs_df <- create_references_df(endnote_list)
refs_df_tidy <- tidy_df(refs_df)

## End(Not run)

Helper Function: Tidy Multi Cols Dataframe

Description

Helper Function: Tidy Multi Cols Dataframe

Usage

tidy_multi_cols_df(df)
tidy_multi_cols_df(df)

Arguments

`df`	as retrieved by create_references_df() or clean_references_df()

Value

tidy dataframe for all multi cols in df

Examples

## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
multi_cols_df <- tidy_multi_cols_df(refs_clean_df)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
multi_cols_df <- tidy_multi_cols_df(refs_clean_df)

## End(Not run)

Helper Function: Tidy Multi Cols List

Description

Helper Function: Tidy Multi Cols List

Usage

tidy_multi_cols_list(df)
tidy_multi_cols_list(df)

Arguments

`df`	as retrieved by create_references_df() or clean_references_df()

Value

tidy list with a sublist for echa multi col in df (see: get_available_multi_cols())

Examples

## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
multi_cols_list <- tidy_multi_cols_list(refs_clean_df)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
refs_clean_df <- clean_references_df(endnote_list)
multi_cols_list <- tidy_multi_cols_list(refs_clean_df)

## End(Not run)

Helper Function: Tidy Selected Cols

Description

Helper Function: Tidy Selected Cols

Usage

tidy_selected_cols(df, column = "author")
tidy_selected_cols(df, column = "author")

Arguments

`df`	as retrieved by create_references_df() or clean_references_df()
`column`	a multi col column e.g. "author"

Value

tidy dataframe for selected multi col in df

Write Clean References Dataframe to XLSX

Description

Write Clean References Dataframe to XLSX

Usage

write_clean_references_df_to_xlsx(
  endnote_list,
  file = default_clean_xlsx(endnote_list),
  export_dir = ".",
  give_hints = FALSE,
  dbg = TRUE,
  ...
)
write_clean_references_df_to_xlsx(
  endnote_list,
  file = default_clean_xlsx(endnote_list),
  export_dir = ".",
  give_hints = FALSE,
  dbg = TRUE,
  ...
)

Arguments

`endnote_list`	list created with create_endnote_list()
`file`	name of file to save (default: default_clean_xlsx(endnote_list))
`export_dir`	directory where to save 'file' (default: ".")
`give_hints`	if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data
`dbg`	show debug messages (default: TRUE)
`...`	additional arguments passed to openxlsx::write.xlsx() publication type

Examples

## Not run: endnote_list <- create_endnote_list()
write_clean_references_df_to_xlsx(endnote_list)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
write_clean_references_df_to_xlsx(endnote_list)

## End(Not run)

Write References Dataframe to XLSX

Description

Write References Dataframe to XLSX

Usage

write_references_df_to_xlsx(
  endnote_list,
  file = default_xlsx(endnote_list),
  export_dir = ".",
  dbg = TRUE,
  ...
)
write_references_df_to_xlsx(
  endnote_list,
  file = default_xlsx(endnote_list),
  export_dir = ".",
  dbg = TRUE,
  ...
)

Arguments

`endnote_list`	list created with create_endnote_list()
`file`	name of file to save (default: default_xlsx(endnote_list))
`export_dir`	directory where to save 'file' (default: ".")
`dbg`	show debug messages (default: TRUE)
`...`	additional arguments passed to openxlsx::write.xlsx()

Value

write references dataframe to xlsx with one sheet for each publication type

Examples

## Not run: endnote_list <- create_endnote_list()
write_references_df_to_xlsx(endnote_list)

## End(Not run)
## Not run: endnote_list <- create_endnote_list()
write_references_df_to_xlsx(endnote_list)

## End(Not run)

Package 'kwb.endnote'

Help Index

Helper function: add fileinfo attributes

Description

Usage

Arguments

Value

Check two Dataframes for Differences

Description

Usage

Arguments

Value

Examples

Check Endnote for Problematic References

Description

Usage

Arguments

Value

Examples

Helper function: clean access information

Description

Usage

Arguments

Value

Helper function: clean author names

Description

Usage

Arguments

Value

Helper function: clean DOIs

Description

Usage

Arguments

Value

Helper function: clean project names

Description

Usage

Arguments

Value

Clean References Dataframe

Description

Usage

Arguments

Value

Examples

Create dataframe from Endnote XML file

Description

Usage

Arguments

Value

Examples

Create List From Endnote XML

Description

Usage

Arguments

Value

Examples

Create Keywords Dataframe

Description

Usage

Arguments

Value

Examples

Create List By Pub Type From Datafram

Description

Usage

Arguments

Value

Examples

Create List with Unique Entries

Description

Usage

Arguments

Value

Examples

Create References Dataframe

Description

Usage

Arguments

Value