{
  "_id": "6a101d50acfb0bcc41c8a167",
  "Package": "llmclean",
  "Type": "Package",
  "Title": "'LLM'-Assisted Data Cleaning with Multi-Provider Support",
  "Version": "0.1.0",
  "Date": "2026-04-15",
  "Authors@R": "person(\ngiven   = \"Sadikul\",\nfamily  = \"Islam\",\nemail   = \"sadikul.islamiasri@gmail.com\",\nrole    = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0003-2924-7122\")\n)",
  "Author": "Sadikul Islam [aut, cre] (ORCID:\n<https://orcid.org/0000-0003-2924-7122>)",
  "Maintainer": "Sadikul Islam <sadikul.islamiasri@gmail.com>",
  "Description": "Detects and suggests fixes for semantic inconsistencies in\ndata frames by calling large language models (LLMs) through a\nunified, provider-agnostic interface. Supported providers\ninclude 'OpenAI' ('GPT-4o', 'GPT-4o-mini'), 'Anthropic'\n('Claude'), 'Google' ('Gemini'), 'Groq' (free-tier 'LLaMA' and\n'Mixtral'), and local 'Ollama' models. The package identifies\nissues that rule-based tools cannot detect: abbreviation\nvariants, typographic errors, case inconsistencies, and\nmalformed values. Results are returned as tidy data frames with\ncolumn, row index, detected value, issue type, suggested fix,\nand confidence score. An offline fallback using statistical and\nfuzzy-matching methods is provided for use without any API key.\nInteractive fix application with human review is supported via\n'apply_fixes()'. Methods follow de Jonge and van der Loo (2013)\n<https://cran.r-project.org/doc/contrib/de_Jonge+van_der_Loo-Introduction_to_data_cleaning_with_R.pdf>\nand Chaudhuri et al. (2003) <doi:10.1145/872757.872796>.",
  "License": "GPL-3",
  "VignetteBuilder": "knitr",
  "Encoding": "UTF-8",
  "LazyData": "true",
  "RoxygenNote": "7.3.3",
  "Config/testthat/edition": "3",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-22 09:06:42 UTC",
    "User": "root"
  },
  "Repository": "https://cran.r-universe.dev",
  "Date/Publication": "2026-04-22 14:14:02 UTC",
  "RemoteUrl": "https://github.com/cran/llmclean",
  "RemoteRef": "HEAD",
  "RemoteSha": "ca2c6035cff3ba095da28bf493ff53e4ee4d4c6f",
  "MD5sum": "1a69d01d49af680c054f1979486de020",
  "_user": "cran",
  "_type": "src",
  "_file": "llmclean_0.1.0.tar.gz",
  "_fileid": "3ca2b897a09ccae91dbc51918fe808161a16631bb0f4953d18915827e191e211",
  "_filesize": 202236,
  "_sha256": "3ca2b897a09ccae91dbc51918fe808161a16631bb0f4953d18915827e191e211",
  "_created": "2026-05-22T09:06:42.000Z",
  "_published": "2026-05-22T09:09:36.310Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77349821080,
      "time": 137,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7156908127"
    },
    {
      "job": 77349821010,
      "time": 128,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7156905373"
    },
    {
      "job": 77349341729,
      "time": 183,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7156861569"
    },
    {
      "job": 77349820978,
      "time": 106,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7156898594"
    }
  ],
  "_buildurl": "https://github.com/r-universe/cran/actions/runs/26278716101",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/cran/llmclean",
  "_commit": {
    "id": "ca2c6035cff3ba095da28bf493ff53e4ee4d4c6f",
    "author": "Sadikul Islam <sadikul.islamiasri@gmail.com>",
    "committer": "cran-robot <csardi.gabor+cran@gmail.com>",
    "message": "version 0.1.0\n",
    "time": 1776867242
  },
  "_maintainer": {
    "name": "Sadikul Islam",
    "email": "sadikul.islamiasri@gmail.com",
    "orcid": "0000-0003-2924-7122"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "rlang",
      "version": ">= 1.0.0",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "httr2",
      "version": ">= 1.0.0",
      "role": "Suggests"
    },
    {
      "package": "jsonlite",
      "version": ">= 1.8.0",
      "role": "Suggests"
    }
  ],
  "_owner": "cran",
  "_selfowned": false,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-17",
      "n": 1
    }
  ],
  "_tags": [
    {
      "name": "0.1.0",
      "date": "2026-04-22"
    }
  ],
  "_stars": 0,
  "_userbio": {
    "uuid": 6899542,
    "type": "organization",
    "name": "cran",
    "description": "Unofficial read-only mirror of all CRAN R packages"
  },
  "_downloads": {
    "count": 485,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/llmclean"
  },
  "_searchresults": 0,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/llmclean.html",
    "manual.pdf"
  ],
  "_realowner": "cran",
  "_cranurl": false,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2026-04-22"
    }
  ],
  "_exports": [
    "apply_fixes",
    "detect_issues",
    "get_llm_provider",
    "llmclean_report",
    "offline_detect",
    "set_llm_provider",
    "suggest_fixes"
  ],
  "_datasets": [
    {
      "name": "messy_employees",
      "title": "Hypothetical Messy Employee Records Dataset",
      "object": "messy_employees",
      "class": [
        "data.frame"
      ],
      "fields": [
        "emp_id",
        "name",
        "department",
        "email",
        "age",
        "salary",
        "status",
        "hire_date"
      ],
      "rows": 20,
      "table": true,
      "tojson": true
    },
    {
      "name": "messy_survey",
      "title": "Hypothetical Messy Survey Response Dataset",
      "object": "messy_survey",
      "class": [
        "data.frame"
      ],
      "fields": [
        "respondent_id",
        "country",
        "satisfaction",
        "age_group",
        "income_usd"
      ],
      "rows": 15,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "llmclean-package",
      "title": "llmclean: LLM-Assisted Data Cleaning with Multi-Provider Support",
      "topics": [
        "llmclean-package",
        "llmclean"
      ]
    },
    {
      "page": "apply_fixes",
      "title": "Apply Suggested Fixes to a Data Frame",
      "topics": [
        "apply_fixes"
      ]
    },
    {
      "page": "detect_issues",
      "title": "Detect Semantic Inconsistencies in a Data Frame Using an LLM",
      "topics": [
        "detect_issues"
      ]
    },
    {
      "page": "get_llm_provider",
      "title": "Get Current LLM Provider Configuration",
      "topics": [
        "get_llm_provider"
      ]
    },
    {
      "page": "llmclean_report",
      "title": "Generate a Summary Report of LLM-Assisted Data Cleaning",
      "topics": [
        "llmclean_report"
      ]
    },
    {
      "page": "messy_employees",
      "title": "Hypothetical Messy Employee Records Dataset",
      "topics": [
        "messy_employees"
      ]
    },
    {
      "page": "messy_survey",
      "title": "Hypothetical Messy Survey Response Dataset",
      "topics": [
        "messy_survey"
      ]
    },
    {
      "page": "offline_detect",
      "title": "Offline Detection of Data Inconsistencies Without an LLM",
      "topics": [
        "offline_detect"
      ]
    },
    {
      "page": "set_llm_provider",
      "title": "Configure the LLM Provider for Data Cleaning",
      "topics": [
        "set_llm_provider"
      ]
    },
    {
      "page": "suggest_fixes",
      "title": "Request Enriched Fix Suggestions for Detected Issues",
      "topics": [
        "suggest_fixes"
      ]
    }
  ],
  "_rundeps": [
    "cli",
    "dplyr",
    "generics",
    "glue",
    "lifecycle",
    "magrittr",
    "pillar",
    "pkgconfig",
    "R6",
    "rlang",
    "tibble",
    "tidyselect",
    "utf8",
    "vctrs",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "llmclean-intro.Rmd",
      "filename": "llmclean-intro.html",
      "title": "LLM-Assisted Data Cleaning with llmclean",
      "author": "Sadikul Islam",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Supported Providers",
        "Quick Start",
        "Step 1 — Configure the provider",
        "Step 2 — Inspect the built-in messy datasets",
        "Stage 1 — Detect Issues",
        "Issue type breakdown",
        "Case inconsistencies",
        "Typos and near-duplicates (Levenshtein)",
        "Malformed email addresses",
        "Numeric outliers (Tukey outer fence)",
        "Stage 2 — Suggest Fixes",
        "Stage 3 — Apply Fixes",
        "Dry run mode",
        "Stage 4 — Offline Detection (No API Key)",
        "Stage 5 — Summary Report",
        "Complete Workflow (One Pipeline)",
        "API Key Storage Best Practice",
        "Session Information",
        "References"
      ],
      "created": "2026-04-22 14:14:02",
      "modified": "2026-04-22 14:14:02",
      "commits": 1
    }
  ],
  "_score": 2,
  "_indexed": true,
  "_nocasepkg": "llmclean",
  "_universes": [
    "cran"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-22T09:08:56.000Z",
      "distro": "noble",
      "commit": "ca2c6035cff3ba095da28bf493ff53e4ee4d4c6f",
      "fileid": "949471d3ee23ff5308ca294187cfa139ad7aa7fba3ca577ab78579755cddd817",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26278716101"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-22T09:08:51.000Z",
      "distro": "noble",
      "commit": "ca2c6035cff3ba095da28bf493ff53e4ee4d4c6f",
      "fileid": "87a6e3122ea346ad3b993c2043a15ff886100feb6a6cdc20167d08302ae301a7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26278716101"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1.0",
      "date": "2026-05-22T09:08:51.000Z",
      "commit": "ca2c6035cff3ba095da28bf493ff53e4ee4d4c6f",
      "fileid": "35c8152a1e7385fbc48aee6d02378cc39dffd662de25a1d99d6870bec7d2c57c",
      "status": "success",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26278716101"
    }
  ]
}