This package provides example files containing artificial path data.
The files are located in the extdata
folder of the package.
The function extdata_file()
returns the full path to a file
in this folder when being given the filename:
The package contains functions named read_*
. These
functions are described in the following.
This function reads a csv file either with read.table()
(version = 1) or with data.table::fread()
(version = 2). It
reports about what it does and about the time it takes.
data_1 <- kwb.fakin::read_csv(example_file, version = 1)
#> Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with utils::read.table() ... ok. (0.00 secs)
data_2 <- kwb.fakin::read_csv(example_file, version = 2)
#> Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00 secs)
head(data_1, 3)
#> path size last_access type
#> 1 radio 0 2016-12-31T12:46:27Z directory
#> 2 radio/effect.png 729340270 2009-08-08T05:41:27Z file
#> 3 radio/person.pdf 812027222 2011-01-17T18:35:28Z file
head(data_2, 3)
#> path size last_access type
#> 1 radio 0 2016-12-31 12:46:27 directory
#> 2 radio/effect.png 729340270 2009-08-08 05:41:27 file
#> 3 radio/person.pdf 812027222 2011-01-17 18:35:28 file
identical(data_1, data_2)
#> [1] FALSE
This function reads a text file that contains any kind of file path information. The function is aimed to read files containing file paths only, one path per line, as well as files that contain additional information such as file size, creation time or last modification time. File sizes are assumed to be given in bytes and are converted to Mebibytes (MiB = 2^20 bytes) in order to avoid very large numbers that require a non-standard data type.
file_info <- fakin.path.app::read_file_paths(example_file)
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_file_info() ... Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00 secs)
#> Converting file size to MiB ... ok. (0.00 secs)
#> ok. (0.00 secs)
head(file_info)
#> path size last_access type
#> 1 radio 0.0000 2016-12-31 12:46:27 directory
#> 2 radio/effect.png 695.5531 2009-08-08 05:41:27 file
#> 3 radio/person.pdf 774.4095 2011-01-17 18:35:28 file
#> 4 radio/please.R 111.6977 2016-05-21 02:07:34 file
#> 5 radio/provide 0.0000 2012-10-24 05:32:42 directory
#> 6 radio/provide/double 0.0000 2018-04-05 02:02:54 directory
When reading files that only contain file paths (without any
additional columns) the function adds columns type
and
size
. The type
is guessed from the filename
extension whereas the size
is set to 0 for directories and
to 2^20
for files. You can specify the
fileEncoding
assumed for the file. When being set to
NULL
the function uses
utils::localeToCharset()
to guess an encoding. The encoding
is passed to the function file()
that is used to open an
explicit connection. This connection is then given to
readLines()
. The lines read are assumed to be full file
paths. Backslashes are converted to slashes. By default, all paths are
sorted.
# Helper function
read_example <- function(x) {
fakin.path.app::read_file_paths(kwb.fakin::extdata_file(x))
}
file_paths <- read_example("example_file_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Suggested encodings: 'UTF-8', 'ISO8859-1'
#> Selected encoding: 'UTF-8'
#> Reading paths from '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/example_file_paths.csv' ... ok. (0.00 secs)
#> 546 lines have been read.
#> Sorting paths ... ok. (0.00 secs)
#> Guessing file path type ... ok. (0.00 secs)
#> ok. (0.00 secs)
folder_paths <- read_example("example_folder_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Suggested encodings: 'UTF-8', 'ISO8859-1'
#> Selected encoding: 'UTF-8'
#> Reading paths from '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/example_folder_paths.csv' ... ok. (0.00 secs)
#> 113 lines have been read.
#> Sorting paths ... ok. (0.00 secs)
#> Guessing file path type ... ok. (0.00 secs)
#> ok. (0.00 secs)
head(file_paths)
#> path type size
#> 1 radio/effect.png file 1048576
#> 2 radio/person.pdf file 1048576
#> 3 radio/please.R file 1048576
#> 4 radio/provide/double/between.xls file 1048576
#> 5 radio/provide/double/corner/describe.png file 1048576
#> 6 radio/provide/double/corner/select/final.pdf file 1048576
head(folder_paths)
#> path type size
#> 1 radio directory 0
#> 2 radio/provide directory 0
#> 3 radio/provide/double directory 0
#> 4 radio/provide/double/corner directory 0
#> 5 radio/provide/double/corner/select directory 0
#> 6 radio/provide/double/design directory 0
This function reads file information files specified by a name
pattern matching the files to be read. It calls
read_file_info()
in a loop over the files and returns a
list with the list elements named according to the file names. The
content of a file matching
path-info_<yyyy-mm-dd_HHMM>_<name>.csv
appears
in the list as element <name>
.
file_infos <- kwb.fakin::read_path_information(
file_info_dir = kwb.fakin::extdata_file(""),
pattern = "^example_file_info.*\\.csv$",
sep = ";"
)
#> No files matching '^example_file_info.*\.csv$' in
#> '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/'
#> Available files:
#> 'example_file_paths.csv'
#> 'example_folder_paths.csv'
#> 'example_sizeable_sankey.R'
#> 'folder_rules'
#> 'main.c'
#> 'makefile'
#> 'powershell_template_search_query.txt'
#> 'property_names.txt'
#> 'testcalls_1.R'
#> 'testcalls_2.R'
#> 'testcalls_3.R'
The functions treated so far have in common that they read file
information from files. The following functions can be used to create
these files. They use dir
or more advanced functions from
the fs package.
kwb.fakin::list_files
#> function (root, file, use_batch = TRUE)
#> {
#> kwb.utils::safePath(dirname(file))
#> cat_time("Start")
#> if (use_batch) {
#> batchfile <- write_batch_list_files(root, file)
#> system2(batchfile)
#> }
#> else {
#> locale_all <- strsplit(Sys.getlocale("LC_ALL"), ";")[[1]]
#> Sys.setlocale("LC_ALL", "C")
#> on.exit(for (locale_one in locale_all) {
#> parts <- strsplit(locale_one, "=")[[1]]
#> Sys.setlocale(parts[1], parts[2])
#> })
#> paths <- kwb.utils::catAndRun(paste("Scanning all files in",
#> root), dir(root, all.files = TRUE, full.names = TRUE,
#> recursive = TRUE, no.. = TRUE))
#> kwb.utils::writeText(paths, file, "paths to")
#> }
#> cat_time("End")
#> }
#> <bytecode: 0x55d852d3dab0>
#> <environment: namespace:kwb.fakin>
This is just a wrapper around fs::dir_info()
with
recurse = TRUE
. The function is run inside a call to
kwb.utils::catAndRun()
.
root <- system.file(package = "kwb.fakin")
file_info <- fakin.path.app::get_recursive_file_info(root)
#> Getting file information on files below /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin ... ok. (0.00 secs)
head(file_info[, 1:3])
#> # A tibble: 6 × 3
#> path type size
#> <fs::path> <fct> <fs::b>
#> 1 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/DESCRIPTION file 2.29K
#> 2 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/INDEX file 2.24K
#> 3 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/LICENSE file 1.08K
#> 4 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta directo… 4K
#> 5 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta/Rd.rds file 1.56K
#> 6 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta/features.rds file 123