What is in the package?

Example files

This package provides example files containing artificial path data. The files are located in the extdata folder of the package. The function extdata_file() returns the full path to a file in this folder when being given the filename:

(example_file <- fakin.path.app:::extdata_file("example_file_info_1.csv"))
#> [1] "/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv"

Read Functions

The package contains functions named read_*. These functions are described in the following.

read_csv()

This function reads a csv file either with read.table() (version = 1) or with data.table::fread() (version = 2). It reports about what it does and about the time it takes.

data_1 <- kwb.fakin::read_csv(example_file, version = 1) 
#> Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with utils::read.table() ... ok. (0.00 secs)

data_2 <- kwb.fakin::read_csv(example_file, version = 2)
#> Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00 secs)

head(data_1, 3)
#>               path      size          last_access      type
#> 1            radio         0 2016-12-31T12:46:27Z directory
#> 2 radio/effect.png 729340270 2009-08-08T05:41:27Z      file
#> 3 radio/person.pdf 812027222 2011-01-17T18:35:28Z      file

head(data_2, 3)
#>               path      size         last_access      type
#> 1            radio         0 2016-12-31 12:46:27 directory
#> 2 radio/effect.png 729340270 2009-08-08 05:41:27      file
#> 3 radio/person.pdf 812027222 2011-01-17 18:35:28      file

identical(data_1, data_2)
#> [1] FALSE

read_file_info()

This function reads a text file that contains any kind of file path information. The function is aimed to read files containing file paths only, one path per line, as well as files that contain additional information such as file size, creation time or last modification time. File sizes are assumed to be given in bytes and are converted to Mebibytes (MiB = 2^20 bytes) in order to avoid very large numbers that require a non-standard data type.

file_info <- fakin.path.app::read_file_paths(example_file)
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_file_info() ... Reading '/github/workspace/pkglib/fakin.path.app/extdata/example_file_info_1.csv' with data.table::fread() ... ok. (0.00 secs) 
#> Converting file size to MiB ... ok. (0.00 secs) 
#> ok. (0.00 secs)

head(file_info)
#>                   path     size         last_access      type
#> 1                radio   0.0000 2016-12-31 12:46:27 directory
#> 2     radio/effect.png 695.5531 2009-08-08 05:41:27      file
#> 3     radio/person.pdf 774.4095 2011-01-17 18:35:28      file
#> 4       radio/please.R 111.6977 2016-05-21 02:07:34      file
#> 5        radio/provide   0.0000 2012-10-24 05:32:42 directory
#> 6 radio/provide/double   0.0000 2018-04-05 02:02:54 directory

When reading files that only contain file paths (without any additional columns) the function adds columns type and size. The type is guessed from the filename extension whereas the size is set to 0 for directories and to 2^20 for files. You can specify the fileEncoding assumed for the file. When being set to NULL the function uses utils::localeToCharset() to guess an encoding. The encoding is passed to the function file() that is used to open an explicit connection. This connection is then given to readLines(). The lines read are assumed to be full file paths. Backslashes are converted to slashes. By default, all paths are sorted.

# Helper function
read_example <- function(x) {
  fakin.path.app::read_file_paths(kwb.fakin::extdata_file(x))
} 

file_paths <- read_example("example_file_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Suggested encodings: 'UTF-8', 'ISO8859-1'
#> Selected encoding: 'UTF-8'
#> Reading paths from '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/example_file_paths.csv' ... ok. (0.00 secs) 
#> 546 lines have been read.
#> Sorting paths ... ok. (0.00 secs) 
#> Guessing file path type ... ok. (0.00 secs) 
#> ok. (0.00 secs)
folder_paths <- read_example("example_folder_paths.csv")
#> Encodings guessed by readr:
#> [1] "ASCII"
#> Selected encoding: ascii
#> Reading file with read_paths_only() ... Suggested encodings: 'UTF-8', 'ISO8859-1'
#> Selected encoding: 'UTF-8'
#> Reading paths from '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/example_folder_paths.csv' ... ok. (0.00 secs) 
#> 113 lines have been read.
#> Sorting paths ... ok. (0.00 secs) 
#> Guessing file path type ... ok. (0.00 secs) 
#> ok. (0.00 secs)

head(file_paths)
#>                                           path type    size
#> 1                             radio/effect.png file 1048576
#> 2                             radio/person.pdf file 1048576
#> 3                               radio/please.R file 1048576
#> 4             radio/provide/double/between.xls file 1048576
#> 5     radio/provide/double/corner/describe.png file 1048576
#> 6 radio/provide/double/corner/select/final.pdf file 1048576
head(folder_paths)
#>                                 path      type size
#> 1                              radio directory    0
#> 2                      radio/provide directory    0
#> 3               radio/provide/double directory    0
#> 4        radio/provide/double/corner directory    0
#> 5 radio/provide/double/corner/select directory    0
#> 6        radio/provide/double/design directory    0

read_path_information()

This function reads file information files specified by a name pattern matching the files to be read. It calls read_file_info() in a loop over the files and returns a list with the list elements named according to the file names. The content of a file matching path-info_<yyyy-mm-dd_HHMM>_<name>.csv appears in the list as element <name>.

file_infos <- kwb.fakin::read_path_information(
  file_info_dir = kwb.fakin::extdata_file(""), 
  pattern = "^example_file_info.*\\.csv$", 
  sep = ";"
)
#> No files matching '^example_file_info.*\.csv$' in
#>   '/tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/extdata/'
#> Available files:
#>   'example_file_paths.csv'
#>   'example_folder_paths.csv'
#>   'example_sizeable_sankey.R'
#>   'folder_rules'
#>   'main.c'
#>   'makefile'
#>   'powershell_template_search_query.txt'
#>   'property_names.txt'
#>   'testcalls_1.R'
#>   'testcalls_2.R'
#>   'testcalls_3.R'

File Listing Functions

The functions treated so far have in common that they read file information from files. The following functions can be used to create these files. They use dir or more advanced functions from the fs package.

list_files

kwb.fakin::list_files
#> function (root, file, use_batch = TRUE) 
#> {
#>     kwb.utils::safePath(dirname(file))
#>     cat_time("Start")
#>     if (use_batch) {
#>         batchfile <- write_batch_list_files(root, file)
#>         system2(batchfile)
#>     }
#>     else {
#>         locale_all <- strsplit(Sys.getlocale("LC_ALL"), ";")[[1]]
#>         Sys.setlocale("LC_ALL", "C")
#>         on.exit(for (locale_one in locale_all) {
#>             parts <- strsplit(locale_one, "=")[[1]]
#>             Sys.setlocale(parts[1], parts[2])
#>         })
#>         paths <- kwb.utils::catAndRun(paste("Scanning all files in", 
#>             root), dir(root, all.files = TRUE, full.names = TRUE, 
#>             recursive = TRUE, no.. = TRUE))
#>         kwb.utils::writeText(paths, file, "paths to")
#>     }
#>     cat_time("End")
#> }
#> <bytecode: 0x55d852d3dab0>
#> <environment: namespace:kwb.fakin>

get_recursive_file_info

This is just a wrapper around fs::dir_info() with recurse = TRUE. The function is run inside a call to kwb.utils::catAndRun().

root <- system.file(package = "kwb.fakin")
file_info <- fakin.path.app::get_recursive_file_info(root)
#> Getting file information on files below /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin ... ok. (0.00 secs)
head(file_info[, 1:3])
#> # A tibble: 6 × 3
#>   path                                                          type        size
#>   <fs::path>                                                    <fct>    <fs::b>
#> 1 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/DESCRIPTION       file       2.29K
#> 2 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/INDEX             file       2.24K
#> 3 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/LICENSE           file       1.08K
#> 4 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta              directo…      4K
#> 5 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta/Rd.rds       file       1.56K
#> 6 /tmp/RtmpkdVejs/Rinst14b03ac9262c/kwb.fakin/Meta/features.rds file         123