{
  "_id": "6a290530732311cd875922a6",
  "Package": "arete",
  "Title": "Automated REtrieval from TExt",
  "Version": "0.2",
  "Date": "2026-05-11",
  "Author": "Vasco V. Branco [cre, aut] (ORCID:\n<https://orcid.org/0000-0001-7797-3183>), Vaughn Shirey [ctb]\n(ORCID: <https://orcid.org/0000-0002-3589-9699>), Thomas\nMerrien [ctb] (ORCID: <https://orcid.org/0000-0002-0339-5656>),\nPedro Cardoso [aut] (ORCID:\n<https://orcid.org/0000-0001-8119-9960>)",
  "Authors@R": "c(\nperson(\"Vasco V.\", \"Branco\", role = c(\"cre\",\"aut\"),\nemail = \"vasco.branco@helsinki.fi\",\ncomment = c(ORCID = \"0000-0001-7797-3183\")),\nperson(\"Vaughn\", \"Shirey\", role = c(\"ctb\"),\nemail = \"vms55@georgetown.edu\",\ncomment = c(ORCID = \"0000-0002-3589-9699\")),\nperson(\"Thomas\", \"Merrien\", role = c(\"ctb\"),\nemail = \"thomas.merrien@helsinki.fi\",\ncomment = c(ORCID = \"0000-0002-0339-5656\")),\nperson(\"Pedro\", \"Cardoso\", role = c(\"aut\"),\nemail = \"pedro.cardoso@helsinki.fi\",\ncomment = c(ORCID = \"0000-0001-8119-9960\"))\n)",
  "Maintainer": "Vasco V. Branco <vasco.branco@helsinki.fi>",
  "Description": "A Python based pipeline for extraction of species\noccurrence data through the usage of large language models.\nIncludes validation tools designed to handle model\nhallucinations for a scientific, rigorous use of LLM. Currently\nsupports usage of GPT with more planned, including local and\nnon-proprietary models. For more details on the methodology\nused please consult the references listed under each function,\nsuch as Kent, A. et al. (1995) <doi:10.1002/asi.5090060209>,\nvan Rijsbergen, C.J. (1979, ISBN:978-0408709293, Levenshtein,\nV.I. (1966)\n<https://nymity.ch/sybilhunting/pdf/Levenshtein1966a.pdf> and\nKlaus Krippendorff (2011)\n<https://repository.upenn.edu/handle/20.500.14332/2089>.",
  "License": "GPL-3",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.2",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-06-10 06:22:37 UTC",
    "User": "root"
  },
  "Config/Needs/website": "rmarkdown",
  "VignetteBuilder": "knitr",
  "Repository": "https://cran.r-universe.dev",
  "Date/Publication": "2026-05-11 20:09:06 UTC",
  "RemoteUrl": "https://github.com/cran/arete",
  "RemoteRef": "HEAD",
  "RemoteSha": "a98837b9c4b0ca0dd194928ff581623c013a0c14",
  "MD5sum": "b330c186d132dc42708c0f3581a62bb7",
  "_user": "cran",
  "_type": "src",
  "_file": "arete_0.2.tar.gz",
  "_fileid": "df02b4873a89372f6eef4c318e94f93dde4d8e84eb0f82fdcfa497e2e559772f",
  "_filesize": 1072693,
  "_sha256": "df02b4873a89372f6eef4c318e94f93dde4d8e84eb0f82fdcfa497e2e559772f",
  "_created": "2026-06-10T06:22:37.000Z",
  "_published": "2026-06-10T06:33:20.623Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 80495100233,
      "time": 557,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7528200803"
    },
    {
      "job": 80495100294,
      "time": 499,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7528183781"
    },
    {
      "job": 80494236510,
      "time": 388,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7528052222"
    },
    {
      "job": 80495100211,
      "time": 232,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7528111465"
    }
  ],
  "_buildurl": "https://github.com/r-universe/cran/actions/runs/27257117010",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/cran/arete",
  "_commit": {
    "id": "a98837b9c4b0ca0dd194928ff581623c013a0c14",
    "author": "Vasco V. Branco <vasco.branco@helsinki.fi>",
    "committer": "cran-robot <csardi.gabor+cran@gmail.com>",
    "message": "version 0.2\n",
    "time": 1778530146
  },
  "_maintainer": {
    "name": "Vasco V. Branco",
    "email": "vasco.branco@helsinki.fi",
    "login": "vascobranco",
    "twitter": "@VV_Branco",
    "description": "Researcher and R programmer uncovering the secrets of the flesh.",
    "uuid": 56229977,
    "orcid": "0000-0001-7797-3183"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.3.0",
      "role": "Depends"
    },
    {
      "package": "terra",
      "role": "Imports"
    },
    {
      "package": "cld2",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "pdftools",
      "role": "Imports"
    },
    {
      "package": "fedmatch",
      "role": "Imports"
    },
    {
      "package": "kableExtra",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "role": "Imports"
    },
    {
      "package": "gecko",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "ggplot2",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Imports"
    },
    {
      "package": "googledrive",
      "role": "Imports"
    },
    {
      "package": "irr",
      "role": "Imports"
    },
    {
      "package": "rmarkdown",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    }
  ],
  "_owner": "cran",
  "_selfowned": false,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-43",
      "n": 1
    },
    {
      "week": "2026-20",
      "n": 1
    }
  ],
  "_tags": [
    {
      "name": "0.1",
      "date": "2025-10-20"
    },
    {
      "name": "0.2",
      "date": "2026-05-11"
    }
  ],
  "_stars": 1,
  "_contributors": [
    {
      "user": "vascobranco",
      "count": 2,
      "uuid": 56229977
    }
  ],
  "_userbio": {
    "uuid": 6899542,
    "type": "organization",
    "name": "cran",
    "description": "Unofficial read-only mirror of all CRAN R packages"
  },
  "_downloads": {
    "count": 500,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/arete"
  },
  "_searchresults": 3,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/arete.html",
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/vascobranco/arete",
  "_realowner": "vascobranco",
  "_cranurl": false,
  "_releases": [
    {
      "version": "0.1",
      "date": "2025-10-20"
    },
    {
      "version": "0.2",
      "date": "2026-05-11"
    }
  ],
  "_exports": [
    "arete_data",
    "arete_setup",
    "aux_string_to_coords",
    "check_lang",
    "compare_IUCN",
    "create_training_data",
    "file_comparison",
    "gazetteer",
    "get_geodata",
    "install_OCR_packages",
    "install_python_packages",
    "labels",
    "labels_unique",
    "performance_report",
    "process_document",
    "process_species_names",
    "string_to_coords",
    "webanno_open",
    "webanno_summary"
  ],
  "_help": [
    {
      "page": "arete_data",
      "title": "Example data packaged with arete",
      "topics": [
        "arete_data"
      ]
    },
    {
      "page": "ARETE_package",
      "title": "Summary of methods in the arete package",
      "topics": [
        "arete",
        "arete_package"
      ]
    },
    {
      "page": "arete_setup",
      "title": "Setup arete",
      "topics": [
        "arete_setup"
      ]
    },
    {
      "page": "aux_string_to_coords",
      "title": "Mechanical coordinate conversion",
      "topics": [
        "aux_string_to_coords"
      ]
    },
    {
      "page": "check_lang",
      "title": "Check if text is language-appropriate",
      "topics": [
        "check_lang"
      ]
    },
    {
      "page": "compare_IUCN",
      "title": "Check EOO differences between two sets of coordinates",
      "topics": [
        "compare_IUCN"
      ]
    },
    {
      "page": "create_training_data",
      "title": "Create training data for GPT",
      "topics": [
        "create_training_data"
      ]
    },
    {
      "page": "file_comparison",
      "title": "Compare the contents of two WebAnno tsv files.",
      "topics": [
        "file_comparison"
      ]
    },
    {
      "page": "gazetteer",
      "title": "Get geographic coordinates from localities",
      "topics": [
        "gazetteer"
      ]
    },
    {
      "page": "get_geodata",
      "title": "Call a Large Language Model (LLM) to extract species geographic data",
      "topics": [
        "get_geodata"
      ]
    },
    {
      "page": "install_OCR_packages",
      "title": "Update OCR dependencies",
      "topics": [
        "install_OCR_packages"
      ]
    },
    {
      "page": "install_python_packages",
      "title": "Update python dependencies",
      "topics": [
        "install_python_packages"
      ]
    },
    {
      "page": "labels",
      "title": "Labels for model training",
      "topics": [
        "labels"
      ]
    },
    {
      "page": "labels_unique",
      "title": "Get the unique labels of a WebAnno document",
      "topics": [
        "labels_unique"
      ]
    },
    {
      "page": "OCR_document",
      "title": "Scan PDF with optical character recognition (OCR)",
      "topics": [
        "OCR_document"
      ]
    },
    {
      "page": "performance_report",
      "title": "Evaluate the performance of a LLM",
      "topics": [
        "performance_report"
      ]
    },
    {
      "page": "process_document",
      "title": "Extract and process text from a document",
      "topics": [
        "process_document"
      ]
    },
    {
      "page": "process_species_names",
      "title": "Process and fix species names",
      "topics": [
        "process_species_names"
      ]
    },
    {
      "page": "string_to_coords",
      "title": "Convert strings to numerical coordinates",
      "topics": [
        "string_to_coords"
      ]
    },
    {
      "page": "webanno_open",
      "title": "Open a WebAnno TSV v3.3 file.",
      "topics": [
        "webanno_open"
      ]
    },
    {
      "page": "webanno_summary",
      "title": "Summarize the contents of a group of WebAnno tsv files",
      "topics": [
        "webanno_summary"
      ]
    },
    {
      "page": "WebAnnoTSV-class",
      "title": "WebAnno TSV v3.3 class creator.",
      "topics": [
        "WebAnnoTSV-class",
        "webanno_creator"
      ]
    }
  ],
  "_pkglogo": "https://github.com/cran/arete/raw/HEAD/man/figures/logo.png",
  "_rundeps": [
    "abind",
    "ape",
    "askpass",
    "base64enc",
    "BAT",
    "BH",
    "biomod2",
    "bit",
    "bit64",
    "boot",
    "bslib",
    "cachem",
    "caret",
    "class",
    "classInt",
    "cld2",
    "cli",
    "clock",
    "cluster",
    "clusterGeneration",
    "coda",
    "codetools",
    "combinat",
    "cpp11",
    "crayon",
    "curl",
    "data.table",
    "DBI",
    "DEoptim",
    "diagram",
    "digest",
    "dismo",
    "doParallel",
    "dplyr",
    "e1071",
    "evaluate",
    "expm",
    "farver",
    "fastcluster",
    "fastmap",
    "fastmatch",
    "fedmatch",
    "FNN",
    "fontawesome",
    "forcats",
    "foreach",
    "fs",
    "future",
    "future.apply",
    "gargle",
    "gbm",
    "gdistance",
    "gecko",
    "generics",
    "geometry",
    "geosphere",
    "ggplot2",
    "globals",
    "glue",
    "googledrive",
    "gower",
    "gtable",
    "hardhat",
    "here",
    "highr",
    "hitandrun",
    "hms",
    "htmltools",
    "httr",
    "hypervolume",
    "igraph",
    "ipred",
    "irr",
    "isoband",
    "iterators",
    "jquerylib",
    "jsonlite",
    "kableExtra",
    "kernlab",
    "KernSmooth",
    "knitr",
    "ks",
    "labeling",
    "lattice",
    "lava",
    "lifecycle",
    "linprog",
    "listenv",
    "lpSolve",
    "lubridate",
    "magic",
    "magrittr",
    "maps",
    "MASS",
    "Matrix",
    "mclust",
    "memoise",
    "mgcv",
    "mime",
    "mnormt",
    "ModelMetrics",
    "multicool",
    "mvtnorm",
    "nlme",
    "nls2",
    "nnet",
    "numDeriv",
    "openssl",
    "optimParallel",
    "palmerpenguins",
    "parallelly",
    "pbapply",
    "pdftools",
    "pdist",
    "permute",
    "phangorn",
    "phytools",
    "pillar",
    "pkgconfig",
    "PlotTools",
    "plyr",
    "png",
    "pracma",
    "predicts",
    "PresenceAbsence",
    "prettyunits",
    "pROC",
    "prodlim",
    "progress",
    "progressr",
    "proto",
    "proxy",
    "purrr",
    "qpdf",
    "quadprog",
    "R6",
    "rappdirs",
    "raster",
    "rbibutils",
    "rcdd",
    "RColorBrewer",
    "Rcpp",
    "RcppArmadillo",
    "RcppProgress",
    "RcppTOML",
    "Rdpack",
    "recipes",
    "red",
    "reshape",
    "reshape2",
    "reticulate",
    "rlang",
    "rmarkdown",
    "rpart",
    "rprojroot",
    "rstudioapi",
    "s2",
    "S7",
    "sass",
    "scales",
    "scatterplot3d",
    "sf",
    "shape",
    "SnowballC",
    "sp",
    "sparsevctrs",
    "SQUAREM",
    "stringdist",
    "stringi",
    "stringr",
    "survival",
    "svglite",
    "sys",
    "systemfonts",
    "terra",
    "textshaping",
    "tibble",
    "tidyr",
    "tidyselect",
    "timechange",
    "timeDate",
    "tinytex",
    "TreeTools",
    "tzdb",
    "units",
    "utf8",
    "uuid",
    "vctrs",
    "vegan",
    "viridisLite",
    "withr",
    "wk",
    "xfun",
    "xml2",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "request_example.Rmd",
      "filename": "request_example.html",
      "title": "Package workflow",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Data extraction",
        "Process coordinates",
        "Process species names",
        "Process outliers",
        "Create performance reports"
      ],
      "created": "2025-10-20 09:20:07",
      "modified": "2025-10-20 09:20:07",
      "commits": 1
    }
  ],
  "_score": 2.3010299956639813,
  "_indexed": false,
  "_nocasepkg": "arete",
  "_universes": [
    "cran"
  ],
  "_indexurl": "https://vascobranco.r-universe.dev/arete",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2",
      "date": "2026-06-10T06:26:35.000Z",
      "distro": "noble",
      "commit": "a98837b9c4b0ca0dd194928ff581623c013a0c14",
      "fileid": "4a48ef0d8580a4478d9e0e203ff351c2283f609c8cc8b2831ae3fbb9d903b918",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/27257117010"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2",
      "date": "2026-06-10T06:26:29.000Z",
      "distro": "noble",
      "commit": "a98837b9c4b0ca0dd194928ff581623c013a0c14",
      "fileid": "ae0aa47c62d2f2cfd710fa003a9e8d6632f6569edd630ccc641757a1f5635990",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/27257117010"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.2",
      "date": "2026-06-10T06:27:26.000Z",
      "commit": "a98837b9c4b0ca0dd194928ff581623c013a0c14",
      "fileid": "7c6f4d8a72a82afc85df93461b82d32389828ea555a82cc9c72ce35007ed9688",
      "status": "success",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/27257117010"
    }
  ]
}