| Title: | Counting Codes in a Text and Preparing Data for Analysis |
|---|---|
| Description: | Data analysis often requires coding, especially when data are collected through interviews, observations, or questionnaires. As a result, code counting and data preparation are essential steps in the analysis process. Analysts may need to count the codes in a text (Tokenization, counting of pre-established codes, computing the co-occurrence matrix by line) and prepare the data (e.g., min-max normalization, Z-score, robust scaling, Box-Cox transformation, and non-parametric bootstrap). For the Box-Cox transformation (Box & Cox, 1964, <https://www.jstor.org/stable/2984418>), the optimal Lambda is determined using the log-likelihood method. Non-parametric bootstrap involves randomly sampling data with replacement. Two random number generators are also integrated: a Lehmer congruential generator for uniform distribution and a Box-Muller generator for normal distribution. Package for educational purposes. |
| Authors: | Philippe Cohard [aut, cre] |
| Maintainer: | Philippe Cohard <[email protected]> |
| License: | GPL-3 |
| Version: | 0.0.4.8 |
| Built: | 2026-06-05 07:38:35 UTC |
| Source: | https://github.com/cran/codecountR |
analysCodesList
analysCodesList(dataS, codesLis)analysCodesList(dataS, codesLis)
dataS |
a character |
codesLis |
a character |
a list
codes=list("@essai@","@test@") data = "this is an example @essai@, a bit long @essai@ text" Result=analysCodesList(data,codes) Resultcodes=list("@essai@","@test@") data = "this is an example @essai@, a bit long @essai@ text" Result=analysCodesList(data,codes) Result
bootStrap
bootStrap(nameDframe, grpSize)bootStrap(nameDframe, grpSize)
nameDframe |
a data.frame |
grpSize |
a number |
a matrix
j=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) res=bootStrap(x,5) resj=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) res=bootStrap(x,5) res
BoxAndCox
BoxAndCox(rawVect, minLambda)BoxAndCox(rawVect, minLambda)
rawVect |
a vector |
minLambda |
a number |
a list
vec=rlnorm(100, log(3), log(3)) BandC=BoxAndCox(vec, -3) BandC BAC=unlist(BandC$par) BAC rawVectBCFinal=unlist(subCalcBoxAndCox(vec, BandC$par))vec=rlnorm(100, log(3), log(3)) BandC=BoxAndCox(vec, -3) BandC BAC=unlist(BandC$par) BAC rawVectBCFinal=unlist(subCalcBoxAndCox(vec, BandC$par))
BoxMullerGen
BoxMullerGen(r, s)BoxMullerGen(r, s)
r |
a number |
s |
a number |
a vector
#with runif v=BoxMullerGen(runif(1), runif(1)) print(v) #with congruGen seed = 123456789 X=c() for(i in 1: 2) { Z=congruGen(seed) seed=Z$seedUpdate X=append(X, Z$aleaNum) } #print(X) N=BoxMullerGen(X[1], X[2]) print(N[1]) print(N[2])#with runif v=BoxMullerGen(runif(1), runif(1)) print(v) #with congruGen seed = 123456789 X=c() for(i in 1: 2) { Z=congruGen(seed) seed=Z$seedUpdate X=append(X, Z$aleaNum) } #print(X) N=BoxMullerGen(X[1], X[2]) print(N[1]) print(N[2])
codeCount
codeCount(dataSet, code)codeCount(dataSet, code)
dataSet |
a character |
code |
a character |
a number
data = "this is an example @essai@" codeCount(data, "@essai@") #number of lines containing the chaindata = "this is an example @essai@" codeCount(data, "@essai@") #number of lines containing the chain
congruGen
congruGen(seed, a)congruGen(seed, a)
seed |
a number |
a |
a number |
a list
seed = 123456789 for(i in 1: 10) { Z=congruGen(seed) seed=Z$seedUpdate num=Z$aleaNum print(num) }seed = 123456789 for(i in 1: 10) { Z=congruGen(seed) seed=Z$seedUpdate num=Z$aleaNum print(num) }
cooc
cooc(lines, code1, code2)cooc(lines, code1, code2)
lines |
character |
code1 |
character |
code2 |
character |
an integer
lines ="Companies can boost responsiveness @performance@ by digital @digital@." code1 = "@performance@" code2 = "@digital@" res=cooc(lines, code1, code2) print(res)lines ="Companies can boost responsiveness @performance@ by digital @digital@." code1 = "@performance@" code2 = "@digital@" res=cooc(lines, code1, code2) print(res)
loadCodes
loadCodes(txtFile)loadCodes(txtFile)
txtFile |
a character |
a list
theFile =system.file("codesList.txt", package = "codecountR") data=loadCodes(theFile)theFile =system.file("codesList.txt", package = "codecountR") data=loadCodes(theFile)
normMinMax
normMinMax(nameDframe)normMinMax(nameDframe)
nameDframe |
a data.frame |
a data.frame
j=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xMinMax=normMinMax(x) xMinMaxj=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xMinMax=normMinMax(x) xMinMax
robustScal
robustScal(nameDframe)robustScal(nameDframe)
nameDframe |
a data.frame |
a data.frame
j=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xRsc=robustScal(x) xRscj=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xRsc=robustScal(x) xRsc
subCalcBoxAndCox
subCalcBoxAndCox(sortedVect, actualLambda)subCalcBoxAndCox(sortedVect, actualLambda)
sortedVect |
a vector |
actualLambda |
a number |
a vector
vec=rlnorm(100, log(3), log(3)) BandC=subCalcBoxAndCox(vec, -3)vec=rlnorm(100, log(3), log(3)) BandC=subCalcBoxAndCox(vec, -3)
testPairs
testPairs(listCodes, lines)testPairs(listCodes, lines)
listCodes |
character |
lines |
character |
a list
#Co-occurrences computed line by line in the file. Structure the file accordingly. #Multiple identical pairs on one line count as one unit. lines =c("Companies can boost responsiveness @performance@ by digital @digital@.", "softwares @digital@ may reduce response time @performance@ improving @satisfaction@.") listCodes=c("@satisfaction@", "@digital@", "@performance@") coocurences = testPairs(listCodes, lines) print(coocurences$matrix) #save to file #nameFile = paste("CooccurrenceMatrix_",format(Sys.time(),"%d_%m_%Y-%Hh%Mm%Ss"),".csv",sep = "") #write.csv(coocurences$matrix, nameFile, row.names = TRUE)#Co-occurrences computed line by line in the file. Structure the file accordingly. #Multiple identical pairs on one line count as one unit. lines =c("Companies can boost responsiveness @performance@ by digital @digital@.", "softwares @digital@ may reduce response time @performance@ improving @satisfaction@.") listCodes=c("@satisfaction@", "@digital@", "@performance@") coocurences = testPairs(listCodes, lines) print(coocurences$matrix) #save to file #nameFile = paste("CooccurrenceMatrix_",format(Sys.time(),"%d_%m_%Y-%Hh%Mm%Ss"),".csv",sep = "") #write.csv(coocurences$matrix, nameFile, row.names = TRUE)
tokenization
tokenization(txtFile)tokenization(txtFile)
txtFile |
a character |
a list
theFile =system.file("ExText.txt", package = "codecountR") data=tokenization(theFile)theFile =system.file("ExText.txt", package = "codecountR") data=tokenization(theFile)
verify
verify(lines, code1, code2)verify(lines, code1, code2)
lines |
character |
code1 |
character |
code2 |
character |
lines ="Companies can boost responsiveness @performance@ by digital @digital@." code1 = "@performance@" code2 = "@digital@" verify(lines,code1,code2)lines ="Companies can boost responsiveness @performance@ by digital @digital@." code1 = "@performance@" code2 = "@digital@" verify(lines,code1,code2)
zScore
zScore(nameDframe)zScore(nameDframe)
nameDframe |
a data.frame |
a data.frame
j=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xZsc=zScore(x) xZscj=c(10,14,56,30,58,78,99,1) k=c(10,12,14,16,18,20,22,24) x=data.frame(j,k) xZsc=zScore(x) xZsc