{
  "_id": "6a2270c7cd65a98ecbd48a66",
  "Package": "quanteda",
  "Version": "4.4",
  "Title": "Quantitative Analysis of Textual Data",
  "Description": "A fast, flexible, and comprehensive framework for\nquantitative text analysis in R.  Provides functionality for\ncorpus management, creating and manipulating tokens and\nn-grams, exploring keywords in context, forming and\nmanipulating sparse matrices of documents by features and\nfeature co-occurrences, analyzing keywords, computing feature\nsimilarities and distances, applying content dictionaries,\napplying supervised and unsupervised machine learning, visually\nrepresenting text and text analyses, and more.",
  "Authors@R": "c( \nperson(\"Kenneth\", \"Benoit\", email = \"kbenoit@lse.ac.uk\", role = c(\"cre\", \"aut\", \"cph\"), comment = c(ORCID = \"0000-0002-0797-564X\")),\nperson(\"Kohei\", \"Watanabe\", email = \"watanabe.kohei@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0001-6519-5265\")),\nperson(\"Haiyan\", \"Wang\", email = \"whyinsa@yahoo.com\", role = \"aut\", comment = c(ORCID = \"0000-0003-4992-4311\")),\nperson(\"Paul\", \"Nulty\", email = \"paul.nulty@gmail.com\", role = \"aut\", comment = c(ORCID = \"0000-0002-7214-4666\")),\nperson(\"Adam\", \"Obeng\", email = \"quanteda@binaryeagle.com\", role = \"aut\", comment = c(ORCID = \"0000-0002-2906-4775\")),\nperson(\"Stefan\", \"Müller\", email = \"stefan.mueller@ucd.ie\", role = \"aut\", comment = c(ORCID = \"0000-0002-6315-4125\")),\nperson(\"Akitaka\", \"Matsuo\", email = \"a.matsuo@essex.ac.uk\", role = \"aut\", comment = c(ORCID = \"0000-0002-3323-6330\")),\nperson(\"William\", \"Lowe\", email = \"lowe@hertie-school.org\", role = \"aut\", comment = c(ORCID = \"0000-0002-1549-6163\")),\nperson(\"Christian\", \"Müller\", email = \"C.Mueller@lse.ac.uk\", role = \"ctb\"),\nperson(\"Olivier\", \"Delmarcelle\", email = \"olivier.delmarcelle@ugent.be\", role = \"ctb\", comment = c(ORCID = \"0000-0003-4347-070X\")),\nperson(\"European Research Council\", role = \"fnd\", comment = \"ERC-2011-StG 283794-QUANTESS\")\n)",
  "License": "GPL-3",
  "NeedsCompilation": "yes",
  "URL": "https://quanteda.io",
  "Encoding": "UTF-8",
  "BugReports": "https://github.com/quanteda/quanteda/issues",
  "LazyData": "TRUE",
  "VignetteBuilder": "knitr",
  "Language": "en-GB",
  "RoxygenNote": "7.3.3",
  "Collate": "'RcppExports.R' 'tokenizers.R' 'meta.R'\n'quanteda-documentation.R' 'aaa.R' 'bootstrap_dfm.R'\n'casechange-functions.R' 'char_select.R' 'convert.R'\n'corpus-addsummary-metadata.R' 'corpus-methods.R' 'corpus.R'\n'corpus_chunk.R' 'corpus_group.R' 'corpus_reshape.R'\n'corpus_sample.R' 'corpus_segment.R' 'corpus_subset.R'\n'corpus_trim.R' 'data-documentation.R' 'dfm-classes.R'\n'dfm-methods.R' 'dfm-print.R' 'dfm-subsetting.R' 'dfm.R'\n'dfm_compress.R' 'dfm_group.R' 'dfm_lookup.R' 'dfm_match.R'\n'dfm_replace.R' 'dfm_sample.R' 'dfm_select.R' 'dfm_sort.R'\n'dfm_subset.R' 'dfm_trim.R' 'dfm_weight.R' 'dictionaries.R'\n'dimnames.R' 'fcm-classes.R' 'docnames.R' 'docvars.R'\n'fcm-methods.R' 'fcm-print.R' 'fcm-subsetting.R' 'fcm.R'\n'fcm_select.R' 'index.R' 'kwic.R' 'message.R' 'nfunctions.R'\n'object-builder.R' 'object2fixed.R' 'pattern2fixed.R'\n'phrases.R' 'quanteda-package.R' 'quanteda_options.R'\n'spacyr-methods.R' 'stopwords.R' 'summary.R' 'textmodel.R'\n'textplot.R' 'texts.R' 'textstat.R' 'tokens-methods.R'\n'tokens.R' 'tokens_annotate.R' 'tokens_chunk.R'\n'tokens_compound.R' 'tokens_group.R' 'tokens_lookup.R'\n'tokens_ngrams.R' 'tokens_replace.R' 'tokens_restore.R'\n'tokens_sample.R' 'tokens_segment.R' 'tokens_select.R'\n'tokens_split.R' 'tokens_subset.R' 'tokens_trim.R'\n'tokens_xptr.R' 'utils.R' 'validator.R' 'wordstem.R' 'zzz.R'",
  "Packaged": {
    "Date": "2026-06-05 06:38:54 UTC",
    "User": "root"
  },
  "Author": "Kenneth Benoit [cre, aut, cph] (ORCID:\n<https://orcid.org/0000-0002-0797-564X>), Kohei Watanabe [aut]\n(ORCID: <https://orcid.org/0000-0001-6519-5265>), Haiyan Wang\n[aut] (ORCID: <https://orcid.org/0000-0003-4992-4311>), Paul\nNulty [aut] (ORCID: <https://orcid.org/0000-0002-7214-4666>),\nAdam Obeng [aut] (ORCID:\n<https://orcid.org/0000-0002-2906-4775>), Stefan Müller [aut]\n(ORCID: <https://orcid.org/0000-0002-6315-4125>), Akitaka\nMatsuo [aut] (ORCID: <https://orcid.org/0000-0002-3323-6330>),\nWilliam Lowe [aut] (ORCID:\n<https://orcid.org/0000-0002-1549-6163>), Christian Müller\n[ctb], Olivier Delmarcelle [ctb] (ORCID:\n<https://orcid.org/0000-0003-4347-070X>), European Research\nCouncil [fnd] (ERC-2011-StG 283794-QUANTESS)",
  "Maintainer": "Kenneth Benoit <kbenoit@lse.ac.uk>",
  "Repository": "https://cran.r-universe.dev",
  "Date/Publication": "2026-04-06 19:33:23 UTC",
  "RemoteUrl": "https://github.com/cran/quanteda",
  "RemoteRef": "HEAD",
  "RemoteSha": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
  "MD5sum": "fb4372be5f6b0d452d5803fe34b2a813",
  "_user": "cran",
  "_type": "src",
  "_file": "quanteda_4.4.tar.gz",
  "_fileid": "532edf2abadbf2f8223c5af38eaf676a118068db989f4dbd05bcf8e77f3dc732",
  "_filesize": 5156602,
  "_sha256": "532edf2abadbf2f8223c5af38eaf676a118068db989f4dbd05bcf8e77f3dc732",
  "_created": "2026-06-05T06:38:54.000Z",
  "_published": "2026-06-05T06:46:31.738Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79677458075,
      "time": 294,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "7430480039"
    },
    {
      "job": 79677458058,
      "time": 325,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "7430486830"
    },
    {
      "job": 79677458073,
      "time": 301,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7430481599"
    },
    {
      "job": 79677458038,
      "time": 323,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "7430486946"
    },
    {
      "job": 79676706751,
      "time": 378,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7430408586"
    },
    {
      "job": 79677458049,
      "time": 214,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7430460011"
    }
  ],
  "_buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/cran/quanteda",
  "_commit": {
    "id": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
    "author": "Kenneth Benoit <kbenoit@lse.ac.uk>",
    "committer": "cran-robot <csardi.gabor+cran@gmail.com>",
    "message": "version 4.4\n",
    "time": 1775504003
  },
  "_maintainer": {
    "name": "Kenneth Benoit",
    "email": "kbenoit@lse.ac.uk",
    "login": "kbenoit",
    "bluesky": "@kenbenoit.bsky.social",
    "orcid": "0000-0002-0797-564X",
    "twitter": "@kenbenoit",
    "description": "Professor of Computational Social Science; Dean, School of Social Science, Singapore Management University;\nDirector of non-profit @quanteda Initiative",
    "uuid": 2182246
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "methods",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "fastmatch",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Imports"
    },
    {
      "package": "lifecycle",
      "role": "Imports"
    },
    {
      "package": "magrittr",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "version": ">= 1.5-0",
      "role": "Imports"
    },
    {
      "package": "Rcpp",
      "version": ">= 0.12.12",
      "role": "Imports"
    },
    {
      "package": "SnowballC",
      "role": "Imports"
    },
    {
      "package": "stopwords",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "xml2",
      "role": "Imports"
    },
    {
      "package": "yaml",
      "role": "Imports"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "formatR",
      "role": "Suggests"
    },
    {
      "package": "tm",
      "version": ">= 0.6",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "lsa",
      "role": "Suggests"
    },
    {
      "package": "rlang",
      "role": "Suggests"
    },
    {
      "package": "slam",
      "role": "Suggests"
    },
    {
      "package": "torch",
      "role": "Suggests"
    },
    {
      "package": "dplyr",
      "role": "Enhances"
    },
    {
      "package": "lda",
      "role": "Enhances"
    },
    {
      "package": "purrr",
      "role": "Enhances"
    },
    {
      "package": "spacyr",
      "role": "Enhances"
    },
    {
      "package": "stm",
      "role": "Enhances"
    },
    {
      "package": "text2vec",
      "role": "Enhances"
    },
    {
      "package": "tibble",
      "role": "Enhances"
    },
    {
      "package": "tidytext",
      "role": "Enhances"
    },
    {
      "package": "tokenizers",
      "role": "Enhances"
    },
    {
      "package": "topicmodels",
      "role": "Enhances"
    }
  ],
  "_owner": "cran",
  "_selfowned": false,
  "_usedby": 62,
  "_updates": [
    {
      "week": "2025-28",
      "n": 1
    },
    {
      "week": "2026-15",
      "n": 1
    }
  ],
  "_tags": [
    {
      "name": "4.3.1",
      "date": "2025-07-10"
    },
    {
      "name": "4.4",
      "date": "2026-04-06"
    }
  ],
  "_stars": 0,
  "_contributors": [
    {
      "user": "kbenoit",
      "count": 64,
      "uuid": 2182246
    }
  ],
  "_userbio": {
    "uuid": 6899542,
    "type": "organization",
    "name": "cran",
    "description": "Unofficial read-only mirror of all CRAN R packages"
  },
  "_downloads": {
    "count": 20099,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/quanteda"
  },
  "_mentions": 15,
  "_devurl": "https://github.com/quanteda/quanteda",
  "_pkgdown": "https://quanteda.io",
  "_searchresults": 6112,
  "_topics": [
    "onetbb",
    "cpp"
  ],
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/quanteda.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/quanteda/quanteda",
  "_realowner": "quanteda",
  "_cranurl": false,
  "_releases": [
    {
      "version": "0.7.2",
      "date": "2015-04-07"
    },
    {
      "version": "0.7.2-1",
      "date": "2015-06-02"
    },
    {
      "version": "0.8.0-3",
      "date": "2015-07-11"
    },
    {
      "version": "0.8.0-4",
      "date": "2015-07-13"
    },
    {
      "version": "0.8.2-0",
      "date": "2015-07-31"
    },
    {
      "version": "0.8.2-1",
      "date": "2015-08-18"
    },
    {
      "version": "0.8.4-2",
      "date": "2015-09-09"
    },
    {
      "version": "0.8.6-0",
      "date": "2015-10-15"
    },
    {
      "version": "0.9.0-1",
      "date": "2015-11-27"
    },
    {
      "version": "0.9.2-0",
      "date": "2016-01-11"
    },
    {
      "version": "0.9.4",
      "date": "2016-02-21"
    },
    {
      "version": "0.9.6-1",
      "date": "2016-05-09"
    },
    {
      "version": "0.9.6-9",
      "date": "2016-05-20"
    },
    {
      "version": "0.9.7-16",
      "date": "2016-06-20"
    },
    {
      "version": "0.9.7-17",
      "date": "2016-06-21"
    },
    {
      "version": "0.9.8",
      "date": "2016-07-28"
    },
    {
      "version": "0.9.8.3",
      "date": "2016-10-16"
    },
    {
      "version": "0.9.8.5",
      "date": "2016-10-31"
    },
    {
      "version": "0.9.9-3",
      "date": "2017-01-10"
    },
    {
      "version": "0.9.9-17",
      "date": "2017-01-27"
    },
    {
      "version": "0.9.9-22",
      "date": "2017-02-09"
    },
    {
      "version": "0.9.9-24",
      "date": "2017-02-14"
    },
    {
      "version": "0.9.9-50",
      "date": "2017-04-20"
    },
    {
      "version": "0.9.9-65",
      "date": "2017-05-26"
    },
    {
      "version": "0.99",
      "date": "2017-08-15"
    },
    {
      "version": "0.99.9",
      "date": "2017-09-22"
    },
    {
      "version": "0.99.12",
      "date": "2017-10-06"
    },
    {
      "version": "0.99.22",
      "date": "2017-11-13"
    },
    {
      "version": "1.0.0",
      "date": "2018-01-28"
    },
    {
      "version": "1.1.0",
      "date": "2018-03-05"
    },
    {
      "version": "1.1.1",
      "date": "2018-03-07"
    },
    {
      "version": "1.2.0",
      "date": "2018-04-15"
    },
    {
      "version": "1.3.0",
      "date": "2018-06-05"
    },
    {
      "version": "1.3.4",
      "date": "2018-07-15"
    },
    {
      "version": "1.3.13",
      "date": "2018-11-01"
    },
    {
      "version": "1.3.14",
      "date": "2018-11-19"
    },
    {
      "version": "1.4.0",
      "date": "2019-01-30"
    },
    {
      "version": "1.4.1",
      "date": "2019-02-26"
    },
    {
      "version": "1.4.3",
      "date": "2019-04-01"
    },
    {
      "version": "1.5.0",
      "date": "2019-07-04"
    },
    {
      "version": "1.5.1",
      "date": "2019-07-30"
    },
    {
      "version": "1.5.2",
      "date": "2019-11-26"
    },
    {
      "version": "2.0.0",
      "date": "2020-02-26"
    },
    {
      "version": "2.0.1",
      "date": "2020-03-18"
    },
    {
      "version": "2.1.0",
      "date": "2020-07-05"
    },
    {
      "version": "2.1.1",
      "date": "2020-07-27"
    },
    {
      "version": "2.1.2",
      "date": "2020-09-23"
    },
    {
      "version": "3.0.0",
      "date": "2021-04-06"
    },
    {
      "version": "3.1.0",
      "date": "2021-08-17"
    },
    {
      "version": "3.2.0",
      "date": "2021-11-30"
    },
    {
      "version": "3.2.1",
      "date": "2022-03-01"
    },
    {
      "version": "3.2.2",
      "date": "2022-08-09"
    },
    {
      "version": "3.2.3",
      "date": "2022-08-29"
    },
    {
      "version": "3.2.4",
      "date": "2022-12-08"
    },
    {
      "version": "3.3.0",
      "date": "2023-04-07"
    },
    {
      "version": "3.3.1",
      "date": "2023-05-18"
    },
    {
      "version": "4.0.0",
      "date": "2024-04-04"
    },
    {
      "version": "4.0.1",
      "date": "2024-04-08"
    },
    {
      "version": "4.0.2",
      "date": "2024-04-24"
    },
    {
      "version": "4.1.0",
      "date": "2024-09-04"
    },
    {
      "version": "4.2.0",
      "date": "2025-01-08"
    },
    {
      "version": "4.3.0",
      "date": "2025-05-20"
    },
    {
      "version": "4.3.1",
      "date": "2025-07-10"
    },
    {
      "version": "4.4",
      "date": "2026-04-06"
    }
  ],
  "_exports": [
    "%>%",
    "as.corpus",
    "as.dfm",
    "as.dictionary",
    "as.fcm",
    "as.list",
    "as.phrase",
    "as.tensor",
    "as.tokens",
    "as.tokens_xptr",
    "as.yaml",
    "bootstrap_dfm",
    "breakrules_get",
    "breakrules_reset",
    "breakrules_set",
    "char_keep",
    "char_ngrams",
    "char_remove",
    "char_segment",
    "char_select",
    "char_tolower",
    "char_toupper",
    "char_trim",
    "char_wordstem",
    "check_character",
    "check_double",
    "check_integer",
    "check_logical",
    "colMeans",
    "colSums",
    "Compare",
    "concat",
    "concatenator",
    "convert",
    "corpus",
    "corpus_chunk",
    "corpus_group",
    "corpus_reshape",
    "corpus_sample",
    "corpus_segment",
    "corpus_subset",
    "corpus_trim",
    "dfm",
    "dfm_compress",
    "dfm_group",
    "dfm_keep",
    "dfm_lookup",
    "dfm_match",
    "dfm_remove",
    "dfm_replace",
    "dfm_sample",
    "dfm_select",
    "dfm_smooth",
    "dfm_sort",
    "dfm_subset",
    "dfm_tfidf",
    "dfm_tolower",
    "dfm_toupper",
    "dfm_trim",
    "dfm_weight",
    "dfm_wordstem",
    "dictionary",
    "dictionary_tokenize",
    "docfreq",
    "docid",
    "docnames",
    "docnames<-",
    "docvars",
    "docvars<-",
    "fcm",
    "fcm_compress",
    "fcm_keep",
    "fcm_remove",
    "fcm_select",
    "fcm_sort",
    "fcm_tolower",
    "fcm_toupper",
    "featfreq",
    "featnames",
    "flatten_dictionary",
    "index",
    "info_tbb",
    "is.collocations",
    "is.corpus",
    "is.dfm",
    "is.dictionary",
    "is.fcm",
    "is.index",
    "is.kwic",
    "is.phrase",
    "is.tokens",
    "is.tokens_xptr",
    "kwic",
    "meta",
    "meta<-",
    "ndoc",
    "nfeat",
    "normalize_characters",
    "nsentence",
    "ntoken",
    "ntype",
    "object2fixed",
    "object2id",
    "pattern2fixed",
    "pattern2id",
    "phrase",
    "print",
    "quanteda_options",
    "rowMeans",
    "rownames<-",
    "rowSums",
    "segid",
    "sparsity",
    "stopwords",
    "t",
    "texts",
    "texts<-",
    "tokenize_character",
    "tokenize_custom",
    "tokenize_fasterword",
    "tokenize_fastestword",
    "tokenize_paragraph",
    "tokenize_sentence",
    "tokenize_word1",
    "tokenize_word2",
    "tokenize_word3",
    "tokenize_word4",
    "tokens",
    "tokens_annotate",
    "tokens_chunk",
    "tokens_compound",
    "tokens_group",
    "tokens_keep",
    "tokens_lookup",
    "tokens_ngrams",
    "tokens_remove",
    "tokens_replace",
    "tokens_restore",
    "tokens_sample",
    "tokens_segment",
    "tokens_select",
    "tokens_skipgrams",
    "tokens_split",
    "tokens_subset",
    "tokens_tolower",
    "tokens_toupper",
    "tokens_trim",
    "tokens_wordstem",
    "topfeatures",
    "types"
  ],
  "_datasets": [
    {
      "name": "data_char_sampletext",
      "title": "A paragraph of text for testing various text-based functions",
      "object": "data_char_sampletext",
      "class": [
        "character"
      ],
      "fields": [],
      "table": false,
      "tojson": true
    },
    {
      "name": "data_char_ukimmig2010",
      "title": "Immigration-related sections of 2010 UK party manifestos",
      "object": "data_char_ukimmig2010",
      "class": [
        "character"
      ],
      "fields": [],
      "table": false,
      "tojson": true
    },
    {
      "name": "data_corpus_inaugural",
      "title": "US presidential inaugural address texts",
      "object": "data_corpus_inaugural",
      "class": [
        "corpus",
        "character"
      ],
      "fields": [],
      "table": false,
      "tojson": true
    },
    {
      "name": "data_dfm_lbgexample",
      "title": "dfm from data in Table 1 of Laver, Benoit, and Garry (2003)",
      "object": "data_dfm_lbgexample",
      "class": [
        "dfm"
      ],
      "fields": [],
      "table": false,
      "tojson": false
    },
    {
      "name": "data_dictionary_LSD2015",
      "title": "Lexicoder Sentiment Dictionary (2015)",
      "object": "data_dictionary_LSD2015",
      "class": [
        "dictionary2"
      ],
      "fields": [],
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "as.character.corpus",
      "title": "Coercion and checking methods for corpus objects",
      "topics": [
        "as.character.corpus",
        "as.corpus",
        "is.corpus"
      ]
    },
    {
      "page": "as.dfm",
      "title": "Coercion and checking functions for dfm objects",
      "topics": [
        "as.dfm",
        "is.dfm"
      ]
    },
    {
      "page": "as.dictionary",
      "title": "Coercion and checking functions for dictionary objects",
      "topics": [
        "as.dictionary",
        "as.dictionary.data.frame",
        "is.dictionary"
      ]
    },
    {
      "page": "as.fcm",
      "title": "Coercion and checking functions for fcm objects",
      "topics": [
        "as.fcm"
      ]
    },
    {
      "page": "as.tokens",
      "title": "Coercion, checking, and combining functions for tokens objects",
      "topics": [
        "as.character.tokens",
        "as.list.tokens",
        "as.tensor",
        "as.tensor.tokens",
        "as.tokens",
        "as.tokens.spacyr_parsed",
        "is.tokens"
      ]
    },
    {
      "page": "as.matrix.dfm",
      "title": "Coerce a dfm to a matrix or data.frame",
      "topics": [
        "as.matrix.dfm"
      ]
    },
    {
      "page": "as.yaml",
      "title": "Convert quanteda dictionary objects to the YAML format",
      "topics": [
        "as.yaml"
      ]
    },
    {
      "page": "bootstrap_dfm",
      "title": "Bootstrap a dfm",
      "topics": [
        "bootstrap_dfm"
      ]
    },
    {
      "page": "char_select",
      "title": "Select or remove elements from a character vector",
      "topics": [
        "char_keep",
        "char_remove",
        "char_select"
      ]
    },
    {
      "page": "char_tolower",
      "title": "Convert the case of character objects",
      "topics": [
        "char_tolower",
        "char_toupper"
      ]
    },
    {
      "page": "concat",
      "title": "Return the concatenator character from an object",
      "topics": [
        "concat",
        "concatenator"
      ]
    },
    {
      "page": "convert",
      "title": "Convert quanteda objects to non-quanteda formats",
      "topics": [
        "convert",
        "convert.corpus",
        "convert.dfm"
      ]
    },
    {
      "page": "corpus",
      "title": "Construct a corpus object",
      "topics": [
        "corpus",
        "corpus.character",
        "corpus.Corpus",
        "corpus.corpus",
        "corpus.data.frame",
        "corpus.kwic"
      ]
    },
    {
      "page": "corpus_chunk",
      "title": "Segment a corpus into chunks of a given size",
      "topics": [
        "corpus_chunk"
      ]
    },
    {
      "page": "corpus_group",
      "title": "Combine documents in corpus by a grouping variable",
      "topics": [
        "corpus_group"
      ]
    },
    {
      "page": "corpus_reshape",
      "title": "Recast the document units of a corpus",
      "topics": [
        "corpus_reshape"
      ]
    },
    {
      "page": "corpus_sample",
      "title": "Randomly sample documents from a corpus",
      "topics": [
        "corpus_sample"
      ]
    },
    {
      "page": "corpus_segment",
      "title": "Segment texts on a pattern match",
      "topics": [
        "char_segment",
        "corpus_segment"
      ]
    },
    {
      "page": "corpus_subset",
      "title": "Extract a subset of a corpus",
      "topics": [
        "corpus_subset"
      ]
    },
    {
      "page": "corpus_trim",
      "title": "Remove sentences based on their token lengths or a pattern match",
      "topics": [
        "char_trim",
        "corpus_trim"
      ]
    },
    {
      "page": "data_char_sampletext",
      "title": "A paragraph of text for testing various text-based functions",
      "topics": [
        "data_char_sampletext"
      ]
    },
    {
      "page": "data_char_ukimmig2010",
      "title": "Immigration-related sections of 2010 UK party manifestos",
      "topics": [
        "data_char_ukimmig2010"
      ]
    },
    {
      "page": "data_corpus_inaugural",
      "title": "US presidential inaugural address texts",
      "topics": [
        "data_corpus_inaugural"
      ]
    },
    {
      "page": "data_dfm_lbgexample",
      "title": "dfm from data in Table 1 of Laver, Benoit, and Garry (2003)",
      "topics": [
        "data_dfm_lbgexample"
      ]
    },
    {
      "page": "data_dictionary_LSD2015",
      "title": "Lexicoder Sentiment Dictionary (2015)",
      "topics": [
        "data_dictionary_LSD2015"
      ]
    },
    {
      "page": "dfm",
      "title": "Create a document-feature matrix",
      "topics": [
        "dfm"
      ]
    },
    {
      "page": "dfm_compress",
      "title": "Recombine a dfm or fcm by combining identical dimension elements",
      "topics": [
        "dfm_compress",
        "fcm_compress"
      ]
    },
    {
      "page": "dfm_group",
      "title": "Combine documents in a dfm by a grouping variable",
      "topics": [
        "dfm_group"
      ]
    },
    {
      "page": "dfm_lookup",
      "title": "Apply a dictionary to a dfm",
      "topics": [
        "dfm_lookup"
      ]
    },
    {
      "page": "dfm_match",
      "title": "Match the feature set of a dfm to given feature names",
      "topics": [
        "dfm_match"
      ]
    },
    {
      "page": "dfm_replace",
      "title": "Replace features in dfm",
      "topics": [
        "dfm_replace"
      ]
    },
    {
      "page": "dfm_sample",
      "title": "Randomly sample documents from a dfm",
      "topics": [
        "dfm_sample"
      ]
    },
    {
      "page": "dfm_select",
      "title": "Select features from a dfm or fcm",
      "topics": [
        "dfm_keep",
        "dfm_remove",
        "dfm_select",
        "fcm_keep",
        "fcm_remove",
        "fcm_select"
      ]
    },
    {
      "page": "dfm_sort",
      "title": "Sort a dfm by frequency of one or more margins",
      "topics": [
        "dfm_sort"
      ]
    },
    {
      "page": "dfm_subset",
      "title": "Extract a subset of a dfm",
      "topics": [
        "dfm_subset"
      ]
    },
    {
      "page": "dfm_tfidf",
      "title": "Weight a dfm by _tf-idf_",
      "topics": [
        "dfm_tfidf"
      ]
    },
    {
      "page": "dfm_tolower",
      "title": "Convert the case of the features of a dfm and combine",
      "topics": [
        "dfm_tolower",
        "dfm_toupper",
        "fcm_tolower",
        "fcm_toupper"
      ]
    },
    {
      "page": "dfm_trim",
      "title": "Trim a dfm using frequency threshold-based feature selection",
      "topics": [
        "dfm_trim"
      ]
    },
    {
      "page": "dfm_weight",
      "title": "Weight the feature frequencies in a dfm",
      "topics": [
        "dfm_smooth",
        "dfm_weight"
      ]
    },
    {
      "page": "dictionary",
      "title": "Create a dictionary object",
      "topics": [
        "dictionary"
      ]
    },
    {
      "page": "docfreq",
      "title": "Compute the (weighted) document frequency of a feature",
      "topics": [
        "docfreq"
      ]
    },
    {
      "page": "docnames",
      "title": "Get or set document names",
      "topics": [
        "docid",
        "docnames",
        "docnames<-",
        "segid"
      ]
    },
    {
      "page": "docvars",
      "title": "Get or set document-level variables",
      "topics": [
        "$.corpus",
        "$.dfm",
        "$.tokens",
        "$<-.corpus",
        "$<-.dfm",
        "$<-.tokens",
        "docvars",
        "docvars<-"
      ]
    },
    {
      "page": "fcm",
      "title": "Create a feature co-occurrence matrix",
      "topics": [
        "fcm",
        "is.fcm"
      ]
    },
    {
      "page": "fcm_sort",
      "title": "Sort an fcm in alphabetical order of the features",
      "topics": [
        "fcm_sort"
      ]
    },
    {
      "page": "featfreq",
      "title": "Compute the frequencies of features",
      "topics": [
        "featfreq"
      ]
    },
    {
      "page": "featnames",
      "title": "Get the feature labels from a dfm",
      "topics": [
        "featnames"
      ]
    },
    {
      "page": "index",
      "title": "Locate a pattern in a tokens object",
      "topics": [
        "index",
        "is.index"
      ]
    },
    {
      "page": "is.collocations",
      "title": "Check if an object is collocations",
      "topics": [
        "is.collocations"
      ]
    },
    {
      "page": "kwic",
      "title": "Locate keywords-in-context",
      "topics": [
        "as.data.frame.kwic",
        "is.kwic",
        "kwic"
      ]
    },
    {
      "page": "meta",
      "title": "Get or set object metadata",
      "topics": [
        "meta",
        "meta<-"
      ]
    },
    {
      "page": "ndoc",
      "title": "Count the number of documents or features",
      "topics": [
        "ndoc",
        "nfeat"
      ]
    },
    {
      "page": "nsentence",
      "title": "Count the number of sentences",
      "topics": [
        "nsentence"
      ]
    },
    {
      "page": "ntoken",
      "title": "Count the number of tokens or types",
      "topics": [
        "ntoken",
        "ntype"
      ]
    },
    {
      "page": "phrase",
      "title": "Declare a pattern to be a sequence of separate patterns",
      "topics": [
        "as.phrase",
        "is.phrase",
        "phrase"
      ]
    },
    {
      "page": "print-methods",
      "title": "Print methods for quanteda core objects",
      "topics": [
        "print,dfm-method",
        "print,dictionary2-method",
        "print,fcm-method",
        "print-methods",
        "print.corpus",
        "print.dfm",
        "print.dictionary",
        "print.kwic",
        "print.tokens"
      ]
    },
    {
      "page": "quanteda_options",
      "title": "Get or set package options for quanteda",
      "topics": [
        "quanteda_options"
      ]
    },
    {
      "page": "spacyr-methods",
      "title": "Extensions for and from spacy_parse objects",
      "topics": [
        "spacyr-methods"
      ]
    },
    {
      "page": "sparsity",
      "title": "Compute the sparsity of a document-feature matrix",
      "topics": [
        "sparsity"
      ]
    },
    {
      "page": "textmodels",
      "title": "Models for scaling and classification of textual data",
      "topics": [
        "textmodels"
      ]
    },
    {
      "page": "textplots",
      "title": "Plots for textual data",
      "topics": [
        "textplots"
      ]
    },
    {
      "page": "textstats",
      "title": "Statistics for textual data",
      "topics": [
        "textstats"
      ]
    },
    {
      "page": "tokens",
      "title": "Construct a tokens object",
      "topics": [
        "tokens"
      ]
    },
    {
      "page": "tokens_annotate",
      "title": "Annotate a tokens object using a dictionary",
      "topics": [
        "tokens_annotate"
      ]
    },
    {
      "page": "tokens_chunk",
      "title": "Segment tokens object by chunks of a given size",
      "topics": [
        "tokens_chunk"
      ]
    },
    {
      "page": "tokens_compound",
      "title": "Convert token sequences into compound tokens",
      "topics": [
        "tokens_compound"
      ]
    },
    {
      "page": "tokens_group",
      "title": "Combine documents in a tokens object by a grouping variable",
      "topics": [
        "tokens_group"
      ]
    },
    {
      "page": "tokens_lookup",
      "title": "Apply a dictionary to a tokens object",
      "topics": [
        "tokens_lookup"
      ]
    },
    {
      "page": "tokens_ngrams",
      "title": "Create n-grams and skip-grams from tokens",
      "topics": [
        "char_ngrams",
        "tokens_ngrams",
        "tokens_skipgrams"
      ]
    },
    {
      "page": "tokens_replace",
      "title": "Replace tokens in a tokens object",
      "topics": [
        "tokens_replace"
      ]
    },
    {
      "page": "tokens_sample",
      "title": "Randomly sample documents from a tokens object",
      "topics": [
        "tokens_sample"
      ]
    },
    {
      "page": "tokens_segment",
      "title": "Segment tokens object by patterns",
      "topics": [
        "tokens_segment"
      ]
    },
    {
      "page": "tokens_select",
      "title": "Select or remove tokens from a tokens object",
      "topics": [
        "tokens_keep",
        "tokens_remove",
        "tokens_select"
      ]
    },
    {
      "page": "tokens_split",
      "title": "Split tokens by a separator pattern",
      "topics": [
        "tokens_split"
      ]
    },
    {
      "page": "tokens_subset",
      "title": "Extract a subset of a tokens",
      "topics": [
        "tokens_subset"
      ]
    },
    {
      "page": "tokens_tolower",
      "title": "Convert the case of tokens",
      "topics": [
        "tokens_tolower",
        "tokens_toupper"
      ]
    },
    {
      "page": "tokens_trim",
      "title": "Trim tokens using frequency threshold-based feature selection",
      "topics": [
        "tokens_trim"
      ]
    },
    {
      "page": "tokens_wordstem",
      "title": "Stem the terms in an object",
      "topics": [
        "char_wordstem",
        "dfm_wordstem",
        "tokens_wordstem"
      ]
    },
    {
      "page": "tokens_xptr",
      "title": "Methods for tokens_xptr objects",
      "topics": [
        "as.tokens_xptr",
        "as.tokens_xptr.tokens",
        "as.tokens_xptr.tokens_xptr",
        "is.tokens_xptr",
        "tokens_xptr"
      ]
    },
    {
      "page": "topfeatures",
      "title": "Identify the most frequent features in a dfm",
      "topics": [
        "topfeatures"
      ]
    },
    {
      "page": "types",
      "title": "Get word types from a tokens object",
      "topics": [
        "types"
      ]
    }
  ],
  "_readme": "https://github.com/cran/quanteda/raw/HEAD/README.md",
  "_rundeps": [
    "cli",
    "fastmatch",
    "ISOcodes",
    "jsonlite",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "Rcpp",
    "rlang",
    "SnowballC",
    "stopwords",
    "stringi",
    "xml2",
    "yaml"
  ],
  "_sysdeps": [
    {
      "shlib": "libtbb",
      "package": "libtbb12",
      "headers": "libtbb-dev",
      "source": "onetbb",
      "version": "2021.11.0-2ubuntu2",
      "name": "onetbb",
      "homepage": "https://www.threadingbuildingblocks.org/",
      "description": "parallelism library for C++ - runtime files"
    },
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    }
  ],
  "_vignettes": [
    {
      "source": "quickstart.Rmd",
      "filename": "quickstart.html",
      "title": "Quick Start Guide",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Installing the package",
        "Additional recommended packages:",
        "Creating a Corpus",
        "Currently available corpus sources",
        "Building a corpus from a character vector",
        "Loading in files using the readtext package",
        "Working with a quanteda corpus",
        "Corpus principles",
        "Tools for handling corpus objects",
        "Adding two corpus objects together",
        "Subsetting corpus objects",
        "Exploring corpus texts",
        "Tokenizing texts",
        "\"Pre-processing\" tokens",
        "Splitting and compounding tokens",
        "Constructing a document-feature matrix",
        "Analysing the document-feature matrix",
        "Grouping documents by document variable",
        "Dictionary functions",
        "Further examples"
      ],
      "created": "2015-04-07 16:00:23",
      "modified": "2024-04-05 02:34:01",
      "commits": 32
    }
  ],
  "_score": 10.358870739977899,
  "_indexed": false,
  "_nocasepkg": "quanteda",
  "_universes": [
    "cran"
  ],
  "_indexurl": "https://quanteda.r-universe.dev/quanteda",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "4.4",
      "date": "2026-06-05T06:43:06.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
      "fileid": "6b482b2ba6a3407d0e54531a0aefb9d21ecc8dc8e991456e0cef79770afec7a4",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "4.4",
      "date": "2026-06-05T06:43:14.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
      "fileid": "ba373e1a6a0ffa33ea1455479594f05606ff4c78ccfae9747a00de56e3fbec74",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "4.4",
      "date": "2026-06-05T06:43:12.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
      "fileid": "9877d11e37c994590b72c90416ab232e91ccf997c00df36d01a5d013df30f355",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "4.4",
      "date": "2026-06-05T06:43:12.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
      "fileid": "fe3af35f69f8a90c6ec0eb09c7a48086a3fecfa971ec731cf20a6f875b74b886",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "4.4",
      "date": "2026-06-05T06:43:38.000Z",
      "arch": "emscripten",
      "commit": "6ca9bee2ed73c947b5ca58b49b8887df7c0e4cae",
      "fileid": "32c2ac28c120afedbe7109a44eb0d83619b9ab45d6f7042ae0b783619f908ad6",
      "status": "success",
      "buildurl": "https://github.com/r-universe/cran/actions/runs/26999552692"
    }
  ]
}