Title: | Helper Functions for Analysing KWB Endnote Library (Exported as .xml) |
---|---|
Description: | Helper Functions For Analysing KWB Endnote Library (Exported As .XML). |
Authors: | Michael Rustler [aut, cre] , Hauke Sonnenberg [ctb] , FAKIN [fnd], Kompetenzzentrum Wasser Berlin gGmbH (KWB) [cph] |
Maintainer: | Michael Rustler <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.2.0 |
Built: | 2024-11-14 06:17:37 UTC |
Source: | https://github.com/KWB-R/kwb.endnote |
Helper function: add fileinfo attributes
add_file_info_attributes(obj, path)
add_file_info_attributes(obj, path)
obj |
object to write attributes to |
path |
path to file |
object with file info attributes
Check two Dataframes for Differences
check_for_differences(df_x, df_y, dbg = TRUE)
check_for_differences(df_x, df_y, dbg = TRUE)
df_x |
data frame as retrieved by create_references_df() or clean_references_df() |
df_y |
data frame as retrieved by create_references_df() or clean_references_df() |
dbg |
should dbg messages be printed (default: TRUE) |
a dataframe containing only the differences between df_x and df_y
## Not run: ############################################################################ ### Option 1 ### Check differences between two different versions of "KWB_documents.xml" ############################################################################ old_xml <- extdata_file("2020-05-25_KWB-documents.xml") new_xml <- extdata_file("2020-06-17_KWB-documents.xml") old_list <- kwb.endnote::create_endnote_list(old_xml) new_list <- kwb.endnote::create_endnote_list(new_xml) old_df <- kwb.endnote::create_references_df(old_list) new_df <- kwb.endnote::create_references_df(new_list) diffs_df_oldnew <- check_for_differences(old_df, new_df) head(diffs_df_oldnew) ############################################################################ ### Option 2: ### Check differences between "as-is" import and "collapsing" fields ############################################################################ endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_df_collapse <- create_references_df(endnote_list, collapse = TRUE) diffs_df <- check_for_differences(refs_df, refs_df_collapse) head(diffs_df) ## End(Not run)
## Not run: ############################################################################ ### Option 1 ### Check differences between two different versions of "KWB_documents.xml" ############################################################################ old_xml <- extdata_file("2020-05-25_KWB-documents.xml") new_xml <- extdata_file("2020-06-17_KWB-documents.xml") old_list <- kwb.endnote::create_endnote_list(old_xml) new_list <- kwb.endnote::create_endnote_list(new_xml) old_df <- kwb.endnote::create_references_df(old_list) new_df <- kwb.endnote::create_references_df(new_list) diffs_df_oldnew <- check_for_differences(old_df, new_df) head(diffs_df_oldnew) ############################################################################ ### Option 2: ### Check differences between "as-is" import and "collapsing" fields ############################################################################ endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_df_collapse <- create_references_df(endnote_list, collapse = TRUE) diffs_df <- check_for_differences(refs_df, refs_df_collapse) head(diffs_df) ## End(Not run)
Check Endnote for Problematic References
check_problematic_entries(endnote_list, give_hints = TRUE, dbg = TRUE)
check_problematic_entries(endnote_list, give_hints = TRUE, dbg = TRUE)
endnote_list |
list created with create_endnote_list() |
give_hints |
if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data |
dbg |
show debug messages (default: TRUE) |
a data frame with problematic entries
## Not run: endnote_list <- create_endnote_list() problematic_entries <- check_problematic_entries(endnote_list) head(problematic_entries) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() problematic_entries <- check_problematic_entries(endnote_list) head(problematic_entries) ## End(Not run)
Helper function: clean access information
clean_accessibility(access, give_hints = FALSE, dbg = TRUE)
clean_accessibility(access, give_hints = FALSE, dbg = TRUE)
access |
vector with accessibility information |
give_hints |
if TRUE hints will be generated, e.g. "add_public_or_confidential" in case of missing entries (default: FALSE) |
dbg |
show debug messages (default: TRUE) |
vector with cleaned accessibility information
Helper function: clean author names
clean_author_names(author_names, give_hints = FALSE, dbg = TRUE)
clean_author_names(author_names, give_hints = FALSE, dbg = TRUE)
author_names |
with author names to clean |
give_hints |
if TRUE hints will be generated, e.g. "fix_multiple_authors_per_line" in case of missing entries (default: FALSE) |
dbg |
show debug messages (default: TRUE) |
vector with cleaned author names
Helper function: clean DOIs
clean_dois(dois, dbg = TRUE)
clean_dois(dois, dbg = TRUE)
dois |
vectors with DOIs to clean |
dbg |
show debug messages (default: TRUE) |
cleaned DOIs
Helper function: clean project names
clean_project_names(project_names, give_hints = FALSE, dbg = TRUE)
clean_project_names(project_names, give_hints = FALSE, dbg = TRUE)
project_names |
with project names to clean |
give_hints |
if TRUE hints will be generated, e.g. "add_project_name" in case of missing entries (default: FALSE) |
dbg |
show debug messages (default: TRUE) |
vector with cleaned project names
Clean References Dataframe
clean_references_df(endnote_list, give_hints = FALSE, dbg = TRUE)
clean_references_df(endnote_list, give_hints = FALSE, dbg = TRUE)
endnote_list |
list created with create_endnote_list() |
give_hints |
if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data |
dbg |
show debug messages (default: TRUE) |
cleaned references_df
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) head(refs_clean_df) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) head(refs_clean_df) ## End(Not run)
Create dataframe from Endnote XML file
create_df_from_endnote_xml(endnote_xml = default_xml())
create_df_from_endnote_xml(endnote_xml = default_xml())
endnote_xml |
path to Endnote library exported as .xml (default:
|
data.frame with all information from Endnote XML joined with data from get_reference_type_names()
references_df <- create_df_from_endnote_xml() head(references_df)
references_df <- create_df_from_endnote_xml() head(references_df)
Create List From Endnote XML
create_endnote_list(endnote_xml = default_xml())
create_endnote_list(endnote_xml = default_xml())
endnote_xml |
path to Endnote library exported as .xml (default:
|
list of imported endnote_xml
endnote_list <- create_endnote_list() str(endnote_list[1]$record) attr(endnote_list, "xml_file_info") attr(endnote_list, "xml_filename_without_extension")
endnote_list <- create_endnote_list() str(endnote_list[1]$record) attr(endnote_list, "xml_file_info") attr(endnote_list, "xml_filename_without_extension")
Create Keywords Dataframe
create_keywords_df(references_df)
create_keywords_df(references_df)
references_df |
references_df as recrieved from kwb.endnote::create_df_from_endnote_xml() |
keywords dataframe
references_df <- create_df_from_endnote_xml() keywords_df <- create_keywords_df(references_df) head(keywords_df)
references_df <- create_df_from_endnote_xml() keywords_df <- create_keywords_df(references_df) head(keywords_df)
Create List By Pub Type From Datafram
create_list_by_pubtype_from_df(refs_df)
create_list_by_pubtype_from_df(refs_df)
refs_df |
data frame as created with create_references_df() |
list with references with one sublist for each publication type
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_list_by_pubtype <- create_list_by_pubtype_from_df(refs_df) str(refs_list_by_pubtype, 1) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_list_by_pubtype <- create_list_by_pubtype_from_df(refs_df) str(refs_list_by_pubtype, 1) ## End(Not run)
Create List with Unique Entries
create_list_with_unique_entries(refs_df)
create_list_with_unique_entries(refs_df)
refs_df |
data frame as created with create_references_df() |
list with unique values for selected columns
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) unique_entries_list <- create_list_with_unique_entries(refs_df) str(unique_entries_list, 1) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) unique_entries_list <- create_list_with_unique_entries(refs_df) str(unique_entries_list, 1) ## End(Not run)
Create References Dataframe
create_references_df(endnote_list, collapse = FALSE)
create_references_df(endnote_list, collapse = FALSE)
endnote_list |
list created with create_endnote_list() |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
data.frame with columns record_id, rec_number, ref_type_id, ref_type_name
endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) head(refs_df)
endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) head(refs_df)
Helper function: default filename for cleaned XLSX
default_clean_xlsx(endnote_list)
default_clean_xlsx(endnote_list)
endnote_list |
list as retrieved by create_endnote_list() |
default clean xlsx filename
Helper function: default filename for XLSX
default_xlsx(endnote_list)
default_xlsx(endnote_list)
endnote_list |
list as retrieved by create_endnote_list() |
default xlsx filename
Path to Default XML File
default_xml()
default_xml()
path to xml file stored in this package, containing references from KWB Endnote database
Get Path to File in This Package
extdata_file(...)
extdata_file(...)
... |
parts of path passed to |
Helper function: get abstract from list for a reference
get_abstract(record_list, collapse = FALSE)
get_abstract(record_list, collapse = FALSE)
record_list |
list with one record of create_endnote_list() |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row abstract data frame
Helper function: get authors from list for a reference
get_authors( record_list, col_name = "author", extract_value = "authors", collapse = FALSE )
get_authors( record_list, col_name = "author", extract_value = "authors", collapse = FALSE )
record_list |
list with one record of create_endnote_list() |
col_name |
default: "author" |
extract_value |
extract_value = "authors" |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row authors data frame
all names which are valid inputs for tidy_selected_cols()
get_available_multi_cols(df)
get_available_multi_cols(df)
df |
as retrieved by create_references_df() or clean_references_df() |
all names which are valid inputs for tidy_selected_cols()
Helper function: get keywords from list for a reference
get_keywords( record_list, col_name = "keyword", extract_value = "keywords", collapse = FALSE )
get_keywords( record_list, col_name = "keyword", extract_value = "keywords", collapse = FALSE )
record_list |
list with one record of create_endnote_list() |
col_name |
default: "keyword" |
extract_value |
extract_value = "keyword" |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row keywords data frame
Helper function: get pdfurls from list for a reference
get_pdfurls(record_list, col_name = "urls_pdf", collapse = FALSE)
get_pdfurls(record_list, col_name = "urls_pdf", collapse = FALSE)
record_list |
list with one record of create_endnote_list() |
col_name |
default: "url_pdfurls" |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row pdfurls data frame
Helper function: get reference type names
get_reference_type_names(endnote_xml = default_xml())
get_reference_type_names(endnote_xml = default_xml())
endnote_xml |
path to Endnote library exported as .xml (default:
|
data.frame with columns record_id, rec_number, ref_type_id, ref_type_name
ref_type_names <- get_reference_type_names() head(ref_type_names)
ref_type_names <- get_reference_type_names() head(ref_type_names)
Helper function: get secondary authors from list for a reference
get_secondary_authors(record_list, collapse = FALSE)
get_secondary_authors(record_list, collapse = FALSE)
record_list |
list with one record of create_endnote_list() |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row authors data frame
Helper function: get tertiary authors from list for a reference
get_tertiary_authors(record_list, collapse = FALSE)
get_tertiary_authors(record_list, collapse = FALSE)
record_list |
list with one record of create_endnote_list() |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
one row authors data frame
Helper function: get xml filename without extension
get_xml_filename_without_extension(obj)
get_xml_filename_without_extension(obj)
obj |
list or datafram as retrieved by create_endnote_list() or create_df_from_endnote_xml() |
xml filename without file extension
Helper Function: Give Hints For Accessibility
give_hints_accessiblity(access, dbg = TRUE)
give_hints_accessiblity(access, dbg = TRUE)
access |
vector with accessibility metadata to check |
dbg |
should debug messages be printed (default: TRUE) |
vector with access info with hints how to improve data quality (in case give_hints = TRUE)
Helper Function: Give Hints For Author Names
give_hints_author_names(author_names, dbg = TRUE)
give_hints_author_names(author_names, dbg = TRUE)
author_names |
vector with author names to check |
dbg |
should debug messages be printed (default: TRUE) |
vector with author_names with hints how to improve data quality (in case give_hints = TRUE)
Helper Function: Give Hints For Project Names
give_hints_project_names(project_names, dbg = TRUE)
give_hints_project_names(project_names, dbg = TRUE)
project_names |
vector with project names to check |
dbg |
should debug messages be printed (default: TRUE) |
vector with project_names with hints how to improve data quality (in case give_hints = TRUE)
Plot Number of Publications by Author
plot_pubs_by_author(pubs_by_author_df)
plot_pubs_by_author(pubs_by_author_df)
pubs_by_author_df |
a data frame with author names in column "value" and number of publications (in column "n") |
plot of number of publications per author
refs_by_author_lastfirst <- create_df_from_endnote_xml() %>% dplyr::filter(.data$key2 == "authors") %>% dplyr::count(.data$value) %>% dplyr::arrange(dplyr::desc(.data$n)) plot_pubs_by_author(refs_by_author_lastfirst[1:30, ])
refs_by_author_lastfirst <- create_df_from_endnote_xml() %>% dplyr::filter(.data$key2 == "authors") %>% dplyr::count(.data$value) %>% dplyr::arrange(dplyr::desc(.data$n)) plot_pubs_by_author(refs_by_author_lastfirst[1:30, ])
Plot Publications By Year
plot_pubs_by_year(refs_df)
plot_pubs_by_year(refs_df)
refs_df |
reference dataframe as retrieved by kwb.endnote::create_references_df() |
plot with publications by year
endnote_list <- kwb.endnote::create_endnote_list() refs_df <- kwb.endnote::create_references_df(endnote_list) plot_pubs_by_year(refs_df)
endnote_list <- kwb.endnote::create_endnote_list() refs_df <- kwb.endnote::create_references_df(endnote_list) plot_pubs_by_year(refs_df)
Plot Wordcloud Keywords
plot_wordcloud_keywords(keywords_df, ...)
plot_wordcloud_keywords(keywords_df, ...)
keywords_df |
keywords dataframe as retrieved by create_keywords_df() |
... |
additional arguments passed to wordcloud2::wordcloud2() |
wordcloud keywords plot
Reference List to Data Frame
record_list_to_df(record_list, collapse = FALSE)
record_list_to_df(record_list, collapse = FALSE)
record_list |
list with one record of create_endnote_list() |
collapse |
should separate fields in "style" be collapsed to one field? (default: FALSE) |
data frame for record
Helper function: tidy dataframe
tidy_df(df, exclude_cols = "rec_number")
tidy_df(df, exclude_cols = "rec_number")
df |
data frame as retrieved by create_references_df() or clean_references_df() |
exclude_cols |
vector of column names to exclude for gathering (default: "rec_number") |
a tidy dataframe with columns rec_number, key and value
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_df_tidy <- tidy_df(refs_df) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_df <- create_references_df(endnote_list) refs_df_tidy <- tidy_df(refs_df) ## End(Not run)
Helper Function: Tidy Multi Cols Dataframe
tidy_multi_cols_df(df)
tidy_multi_cols_df(df)
df |
as retrieved by create_references_df() or clean_references_df() |
tidy dataframe for all multi cols in df
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) multi_cols_df <- tidy_multi_cols_df(refs_clean_df) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) multi_cols_df <- tidy_multi_cols_df(refs_clean_df) ## End(Not run)
Helper Function: Tidy Multi Cols List
tidy_multi_cols_list(df)
tidy_multi_cols_list(df)
df |
as retrieved by create_references_df() or clean_references_df() |
tidy list with a sublist for echa multi col in df (see: get_available_multi_cols())
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) multi_cols_list <- tidy_multi_cols_list(refs_clean_df) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() refs_clean_df <- clean_references_df(endnote_list) multi_cols_list <- tidy_multi_cols_list(refs_clean_df) ## End(Not run)
Helper Function: Tidy Selected Cols
tidy_selected_cols(df, column = "author")
tidy_selected_cols(df, column = "author")
df |
as retrieved by create_references_df() or clean_references_df() |
column |
a multi col column e.g. "author" |
tidy dataframe for selected multi col in df
Write Clean References Dataframe to XLSX
write_clean_references_df_to_xlsx( endnote_list, file = default_clean_xlsx(endnote_list), export_dir = ".", give_hints = FALSE, dbg = TRUE, ... )
write_clean_references_df_to_xlsx( endnote_list, file = default_clean_xlsx(endnote_list), export_dir = ".", give_hints = FALSE, dbg = TRUE, ... )
endnote_list |
list created with create_endnote_list() |
file |
name of file to save (default: default_clean_xlsx(endnote_list)) |
export_dir |
directory where to save 'file' (default: ".") |
give_hints |
if TRUE hints will be generated, e.g. "add_public_or_confidential" for accessiblity data |
dbg |
show debug messages (default: TRUE) |
... |
additional arguments passed to openxlsx::write.xlsx() publication type |
## Not run: endnote_list <- create_endnote_list() write_clean_references_df_to_xlsx(endnote_list) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() write_clean_references_df_to_xlsx(endnote_list) ## End(Not run)
Write References Dataframe to XLSX
write_references_df_to_xlsx( endnote_list, file = default_xlsx(endnote_list), export_dir = ".", dbg = TRUE, ... )
write_references_df_to_xlsx( endnote_list, file = default_xlsx(endnote_list), export_dir = ".", dbg = TRUE, ... )
endnote_list |
list created with create_endnote_list() |
file |
name of file to save (default: default_xlsx(endnote_list)) |
export_dir |
directory where to save 'file' (default: ".") |
dbg |
show debug messages (default: TRUE) |
... |
additional arguments passed to openxlsx::write.xlsx() |
write references dataframe to xlsx with one sheet for each publication type
## Not run: endnote_list <- create_endnote_list() write_references_df_to_xlsx(endnote_list) ## End(Not run)
## Not run: endnote_list <- create_endnote_list() write_references_df_to_xlsx(endnote_list) ## End(Not run)