Title: | Functions Related to File and Path Operations |
---|---|
Description: | This package provides helper functions that have been developed during different research projects at KWB. The functions are dealing with file operations and handling file and folder paths. Let's see what we have in different scripts and other packages and better fits here... |
Authors: | Hauke Sonnenberg [aut, cre] , Kompetenzzentrum Wasser Berlin gGmbH (KWB) [cph] |
Maintainer: | Hauke Sonnenberg <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.3.2 |
Built: | 2025-01-07 03:59:22 UTC |
Source: | https://github.com/KWB-R/kwb.file |
Add File Information From File Database
add_file_info(data)
add_file_info(data)
data |
data frame with column |
data frame data
with additional columns folder_path
and file_name
# Define some paths paths <- c( "/very/long/path/very_long_file_name_1", "/very/long/path/very_long_file_name_2", "/very/long/path/very_long_file_name_3" ) # Create a "file database" from the paths file_db <- kwb.file::to_file_database(paths, remove_common_base = FALSE) # Create a data frame that relates some information to the files. # Use the file identifier instead of the full name to keep the data clean (df <- kwb.utils::noFactorDataFrame( file_id = file_db$files$file_id, value = seq_along(paths) )) # Store the file database in the attribute "file_db" df <- structure(df, file_db = file_db) # Restore the full file paths add_file_info(df)
# Define some paths paths <- c( "/very/long/path/very_long_file_name_1", "/very/long/path/very_long_file_name_2", "/very/long/path/very_long_file_name_3" ) # Create a "file database" from the paths file_db <- kwb.file::to_file_database(paths, remove_common_base = FALSE) # Create a data frame that relates some information to the files. # Use the file identifier instead of the full name to keep the data clean (df <- kwb.utils::noFactorDataFrame( file_id = file_db$files$file_id, value = seq_along(paths) )) # Store the file database in the attribute "file_db" df <- structure(df, file_db = file_db) # Restore the full file paths add_file_info(df)
Calls file.copy
under the hood but gives a message about the indices
and paths of the files that could not be copied.
copy_files_to_target_dir(from_paths, target_dir, target_files)
copy_files_to_target_dir(from_paths, target_dir, target_files)
from_paths |
paths to the files to be copied |
target_dir |
path to the target directory |
target_files |
relative paths to the target files, relative to
|
root <- system.file(package = "kwb.file") relative_paths <- dir(root, recursive = TRUE) # The original files are in root or in different subfolders relative_paths # Create a temporary target folder target_dir <- kwb.utils::createDirectory(file.path(tempdir(), "target")) # Copy all files into one target folder without subfolders from_paths <- file.path(root, relative_paths) to_paths <- basename(from_paths) # Make sure that the target file names contain no duplicates, otherwise # an error is raised to_paths <- kwb.utils::makeUnique(to_paths, warn = FALSE) # Copy the files copy_files_to_target_dir(from_paths, target_dir, to_paths) # Look at the result dir(target_dir, recursive = TRUE)
root <- system.file(package = "kwb.file") relative_paths <- dir(root, recursive = TRUE) # The original files are in root or in different subfolders relative_paths # Create a temporary target folder target_dir <- kwb.utils::createDirectory(file.path(tempdir(), "target")) # Copy all files into one target folder without subfolders from_paths <- file.path(root, relative_paths) to_paths <- basename(from_paths) # Make sure that the target file names contain no duplicates, otherwise # an error is raised to_paths <- kwb.utils::makeUnique(to_paths, warn = FALSE) # Copy the files copy_files_to_target_dir(from_paths, target_dir, to_paths) # Look at the result dir(target_dir, recursive = TRUE)
This function provides a shortcut to dir(..., full.names = TRUE)
dir_full(...)
dir_full(...)
... |
arguments passed to |
dir_full(system.file(package = "kwb.file"))
dir_full(system.file(package = "kwb.file"))
Get Full Paths to all XML files Below a Root Folder
dir_full_recursive_xml(root)
dir_full_recursive_xml(root)
root |
path to root folder |
vector of character
Get Default Download Directory
get_download_dir()
get_download_dir()
assumed default download directory on the user's computer (vector of character of length one)
dir_full(get_download_dir())
dir_full(get_download_dir())
Read File Metadata from YAML-File
read_file_metadata( yaml_file, file_encoding = "UTF-8", out_class = c("data.frame", "list")[1] )
read_file_metadata( yaml_file, file_encoding = "UTF-8", out_class = c("data.frame", "list")[1] )
yaml_file |
path to YAML-File containing file metadata (as saved with
|
file_encoding |
passed to argument |
out_class |
one of "data.frame", "list" |
depending on out_class
, either a data frame with the following
columns or a list with the following elements is returned:
clean file name given to the original file for simpler access,
original file name given by data provider,
original path to folder in which file was provided.
Remove the Common Root Parts
remove_common_root(x, n_keep = 1L, dbg = TRUE)
remove_common_root(x, n_keep = 1L, dbg = TRUE)
x |
list of vectors of character as returned by
|
n_keep |
minimum number of segments to be kept in any case in the
returned relative paths. For example, two paths "a" and "a/b" have the
common root "a". Removing this root would result in relative paths
"" and "b". As this is not useful, |
dbg |
if |
# Split paths at the slashes absparts <- strsplit(c("a/b/c", "a/b/d", "a/b/e/f/g", "a/b/hi"), "/") # Remove the common parts of the paths relparts <- remove_common_root(absparts) relparts # The extracted root is returned in attribute "root" attr(relparts, "root")
# Split paths at the slashes absparts <- strsplit(c("a/b/c", "a/b/d", "a/b/e/f/g", "a/b/hi"), "/") # Remove the common parts of the paths relparts <- remove_common_root(absparts) relparts # The extracted root is returned in attribute "root" attr(relparts, "root")
"Split Full Paths into Directory Path and Filename"
split_into_dir_and_file(paths)
split_into_dir_and_file(paths)
paths |
vector of character representing full file paths |
data frame with columns directory
and file
split_into_dir_and_file(c("path/to/file-1", "path/to/file-2"))
split_into_dir_and_file(c("path/to/file-1", "path/to/file-2"))
Split Full Paths into Root, Folder, File and Extension
split_into_root_folder_file_extension(paths, n_root_parts = 0)
split_into_root_folder_file_extension(paths, n_root_parts = 0)
paths |
vector of character representing full file paths |
n_root_parts |
number of first path segments considered as "root" |
data frame with columns root
, folder
, file
,
extension
, depth
paths <- c( "//always/the/same/root/project-1/intro.doc", "//always/the/same/root/project-1/logo.png", "//always/the/same/root/project-2/intro.txt", "//always/the/same/root/project-2/planning/file-1.doc", "//always/the/same/root/project-2/result/report.pdf" ) split_into_root_folder_file_extension(paths) split_into_root_folder_file_extension(paths, n_root_parts = 6) split_into_root_folder_file_extension(paths, n_root_parts = 7)
paths <- c( "//always/the/same/root/project-1/intro.doc", "//always/the/same/root/project-1/logo.png", "//always/the/same/root/project-2/intro.txt", "//always/the/same/root/project-2/planning/file-1.doc", "//always/the/same/root/project-2/result/report.pdf" ) split_into_root_folder_file_extension(paths) split_into_root_folder_file_extension(paths, n_root_parts = 6) split_into_root_folder_file_extension(paths, n_root_parts = 7)
Split Full Paths at Slashes into Parts
split_paths(paths, dbg = TRUE, use_fs = FALSE)
split_paths(paths, dbg = TRUE, use_fs = FALSE)
paths |
vector of character representing full file paths |
dbg |
if |
use_fs |
whether or not to simply use |
segments <- split_paths(c("path/to/file-1", "path/to/file-2")) segments
segments <- split_paths(c("path/to/file-1", "path/to/file-2")) segments
From a vector of given file paths, this function generates short and unique
identifiers for files and folders. The assignements between identifiers and
original paths are stored in two data frames, files
and folders
that are returned.
to_file_database(files, remove_common_base = TRUE)
to_file_database(files, remove_common_base = TRUE)
files |
vector of file paths |
remove_common_base |
if |
list of two data frames, files
and folders
paths <- c( "very_long/very_ugly_path/even with spaces.doc", "very_long/very_ugly_path/even with spaces.docx" ) to_file_database(paths) to_file_database(paths, remove_common_base = FALSE)
paths <- c( "very_long/very_ugly_path/even with spaces.doc", "very_long/very_ugly_path/even with spaces.docx" ) to_file_database(paths) to_file_database(paths, remove_common_base = FALSE)
Convert Long File Paths to Simple Paths
to_simple_names(paths, method = 1L, get_base = NULL, sha1_digits = 4)
to_simple_names(paths, method = 1L, get_base = NULL, sha1_digits = 4)
paths |
vector of character containing file paths |
method |
|
get_base |
function taking a vector of character as input and returning
a vector of character as output. If not |
sha1_digits |
number of digits used when |
vector of character as long as paths
paths <- c("v1_ugly_name_1.doc", "v1_very_ugly_name.xml", "v2_ugly_name_1.docx", "v2_very_ugly_name.xmlx") to_simple_names(paths, method = 1L) writeLines(sort(to_simple_names(paths, method = 2L))) # All sha1 are different because all base names (file name without extension # by default) are different. If you want to give the same sha1 to files that # correspond to each other but have a different extension, set the function # that extracts the "base name" of the file: get_base <- function(x) kwb.utils::removeExtension(gsub("^v\\d+_", "", x)) writeLines(sort(to_simple_names(paths, method = 2L, get_base = get_base))) # Now the file names that have the same base name (neglecting the prefix # v1_ or v2_) get the same sha1 and thus appear as groups in the sorted # file list
paths <- c("v1_ugly_name_1.doc", "v1_very_ugly_name.xml", "v2_ugly_name_1.docx", "v2_very_ugly_name.xmlx") to_simple_names(paths, method = 1L) writeLines(sort(to_simple_names(paths, method = 2L))) # All sha1 are different because all base names (file name without extension # by default) are different. If you want to give the same sha1 to files that # correspond to each other but have a different extension, set the function # that extracts the "base name" of the file: get_base <- function(x) kwb.utils::removeExtension(gsub("^v\\d+_", "", x)) writeLines(sort(to_simple_names(paths, method = 2L, get_base = get_base))) # Now the file names that have the same base name (neglecting the prefix # v1_ or v2_) get the same sha1 and thus appear as groups in the sorted # file list
Convert a Vector of Paths to a Matrix of Subfolders
to_subdir_matrix( paths, fill.value = "", result_type = "matrix", dbg = FALSE, method = NA_integer_ )
to_subdir_matrix( paths, fill.value = "", result_type = "matrix", dbg = FALSE, method = NA_integer_ )
paths |
vector of path strings |
fill.value |
value used to fill empty cells of the result matrix |
result_type |
one of |
dbg |
if |
method |
integer specifying the implementation method. Currently not used. |
matrix or data frame, depending on result_type
folder_matrix <- kwb.file::to_subdir_matrix(c("a1/b1/c1", "a1/b2", "a2")) folder_matrix dim(folder_matrix) folder_matrix[folder_matrix[, 1] == "a1", ]
folder_matrix <- kwb.file::to_subdir_matrix(c("a1/b1/c1", "a1/b2", "a2")) folder_matrix dim(folder_matrix) folder_matrix[folder_matrix[, 1] == "a1", ]