Working with config files

Assume we are exploring some data set, and during our work, we’ve created a number of files with their specific structure.

library(path.chain)
library(magrittr)

# Create an example file stucture
tmp <- create_temp_dir("files")
create_sample_dir(tmp, override = TRUE)
#> [1] TRUE

# Sample structure we've already created looks as follows
fs::dir_tree(tmp)
#> /tmp/RtmpUu4eEW/files
#> ├── data
#> │   ├── example1.RData
#> │   ├── example2.RData
#> │   └── persons.csv
#> └── docs
#>     └── schema.txt

Then, we have two possible ways to represent this structure using path.chain package.

1. Using full_path_chain function, which returns a list of nested directories.

tmp <- create_temp_dir("files")
full.path.chain <- full_path_chain(tmp)

print(full.path.chain)
#> $.
#> [1] "/tmp/RtmpUu4eEW/files"
#> 
#> $data
#> $data$.
#> [1] "/tmp/RtmpUu4eEW/files/data"
#> 
#> $data$example1.RData
#> [1] "/tmp/RtmpUu4eEW/files/data/example1.RData"
#> 
#> $data$example2.RData
#> [1] "/tmp/RtmpUu4eEW/files/data/example2.RData"
#> 
#> $data$persons.csv
#> [1] "/tmp/RtmpUu4eEW/files/data/persons.csv"
#> 
#> 
#> $docs
#> $docs$.
#> [1] "/tmp/RtmpUu4eEW/files/docs"
#> 
#> $docs$schema.txt
#> [1] "/tmp/RtmpUu4eEW/files/docs/schema.txt"

full.path.chain %>% 
  yaml::write_yaml(temp_path("config.yaml"))

Configuration file config.yaml will look as follows:

'.': files
data:
  '.': files/data
  example1.RData: files/data/example1.RData
  example2.RData: files/data/example2.RData
  persons.csv: files/data/persons.csv
docs:
  '.': files/docs
  schema.txt: files/docs/schema.txt

We can customize keys naming convention using naming argument.


naming_fun <- function(x){
  paste0("k", tools::file_path_sans_ext(stringi::stri_trans_totitle(basename(x))))
}

full.path.chain.2 <- full_path_chain(temp_path("files"), "kRoot", naming_fun)

full.path.chain.2 %>% 
  yaml::write_yaml(temp_path("config.yaml"))
kRoot: files
kData:
  kRoot: files/data
  kExample1: files/data/example1.RData
  kExample2: files/data/example2.RData
  kPersons: files/data/persons.csv
kDocs:
  kRoot: files/docs
  kSchema: files/docs/schema.txt

If dir structure is not a only element in ou config file, we can wrap it with some additional list.

list(kDirs = full.path.chain.2) %>% 
  list(default = .) %>% 
  yaml::write_yaml(temp_path("config.yaml"))
default:
  kDirs:
    kRoot: files
    kData:
      kRoot: files/data
      kExample1: files/data/example1.RData
      kExample2: files/data/example2.RData
      kPersons: files/data/persons.csv
    kDocs:
      kRoot: files/docs
      kSchema: files/docs/schema.txt

Then, we can load such config file using {config} package:

k.dirs <- config::get("kDirs", config = "default", file = temp_path("config.yaml"))
k.dirs$kDocs$kRoot
#> [1] "/tmp/RtmpUu4eEW/files/docs"
k.dirs$kData$kExample1
#> [1] "/tmp/RtmpUu4eEW/files/data/example1.RData"

What if we want to keep absolute paths rather than the relative ones?

full_path_chain(normalizePath(temp_path("files")), "kRoot", naming_fun) %>% 
  as_config("default", "kDirs") %>% 
  yaml::write_yaml(temp_path("config.yaml"))

Wrapping with default key is required by config package.

default:
  kDirs:
    kRoot: /home/username/Desktop/myproject/files
    kData:
      kRoot: /home/username/Desktop/myproject/files/data
      kExample1: /home/username/Desktop/myproject/files/data/example1.RData
      kExample2: /home/username/Desktop/myproject/files/data/example2.RData
      kPersons: /home/username/Desktop/myproject/files/data/persons.csv
    kDocs:
      kRoot: /home/username/Desktop/myproject/files/docs
      kSchema: /home/username/Desktop/myproject/files/docs/schema.txt

2. Using path_chain function, which returns nested path_chain objects.

path.chain <- path_chain(temp_path("files"), naming = naming_fun)

class(path.chain)
#> [1] "path_chain"

print(path.chain$kData$kExample1)
#> [1] "Invalid file"
#> [1] "files/data/example1.RData"

# Most verbose version
path.chain %>% 
  as.list(root.name = "kRoot") %>%
  as_config("default", "kDirs") %>% 
  yaml::write_yaml(temp_path("config.yaml"))
#> [1] "Invalid file"
#> [1] "Invalid file"
#> [1] "Invalid file"

# Conciser
path.chain %>% 
  as_config("default", "kDirs", root.name = "kRoot") %>% 
  yaml::write_yaml(temp_path("config.yaml"))
#> [1] "Invalid file"
#> [1] "Invalid file"
#> [1] "Invalid file"

You can also use only.directories flag or specify levels to narrow down the results.

default:
  kDirs:
    kRoot: files/
    kData:
      kRoot: data/
      kExample1: kExample1
      kExample2: kExample2
      kPersons: kPersons
    kDocs:
      kRoot: docs/
      kSchema: kSchema

As you can see, such structure of the config file seems to be more legible than the one produced with full_path_chain function. The config file shown above can be easily consumed using config package and as_path_chain function.

k.dirs <- config::get("kDirs", "default", temp_path("config.yaml")) %>% 
  as_path_chain()

class(k.dirs)
#> [1] "path_chain"

k.dirs$kData$.
#> [1] "Invalid file"
#> [1] "files/data/"
k.dirs$kData$kExample1
#> [1] "Invalid file"
#> [1] "files/data/example1.RData"