How to use RSDA 3.2.1

RSDA Package version 3.3

Oldemar Rodríguez R.

Installing the package

CRAN

install.packages("RSDA", dependencies=TRUE)

Github

devtools::install_github("PROMiDAT/RSDA")

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 × 7
#>      F1              F2      F3    F4        F5               F6              F7
#>   <dbl>      <symblc_n> <symbl> <dbl> <symblc_>       <symblc_n>      <symblc_n>
#> 1   2.8   [1.00 : 2.00]  <hist>   6       {a,d}   [0.00 : 90.00]  [9.00 : 24.00]
#> 2   1.4   [3.00 : 9.00]  <hist>   8     {b,c,d} [-90.00 : 98.00]  [-9.00 : 9.00]
#> 3   3.2  [-1.00 : 4.00]  <hist>  -7       {a,b}  [65.00 : 90.00] [65.00 : 70.00]
#> 4  -2.1   [0.00 : 2.00]  <hist>   0   {a,b,c,d}  [45.00 : 89.00] [25.00 : 67.00]
#> 5  -3   [-4.00 : -2.00]  <hist>  -9.5       {b}  [20.00 : 40.00]  [9.00 : 40.00]
#> 6   0.1 [10.00 : 21.00]  <hist>  -1       {a,d}    [5.00 : 8.00]   [5.00 : 8.00]
#> 7   9    [4.00 : 21.00]  <hist>   0.5       {a}    [3.14 : 6.76]   [4.00 : 6.00]

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 × 7
#>      F1              F2                      F3    F4        F5               F6
#>   <dbl>      <symblc_n>              <symblc_m> <dbl> <symblc_>       <symblc_n>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}    [3.14 : 6.76]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 × 7
#>      F1            F2                      F3    F4         F5               F6
#>   <dbl>    <symblc_n>              <symblc_m> <dbl> <symblc_s>       <symblc_n>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8  {a,b,c,d} [-90.00 : 98.00]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 × 1
#>                        F3
#>                <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 × 1
#>           F5
#>   <symblc_s>
#> 1  {a,b,c,d}
#> 2  {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

  • x = a data.frame
  • concept = variables to be used as a concept
  • variables = variables to be used, conceptible with tidyselect options
  • default.numeric = function that will be used by default for numerical values (sym.interval)
  • default.categorical = functions to be used by default for categorical values (sym.model)

Example 1

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
#> # A tibble: 6 × 3
#>           county         group            age
#>       <symblc_n>    <symblc_n>     <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

We can add new variables indicating the type we want them to be.

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 × 4
#>     age_hist         county         group            age
#>   <symblc_h>     <symblc_n>    <symblc_n>     <symblc_n>
#> 1     <hist> [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2     <hist>  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3     <hist> [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4     <hist>  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5     <hist> [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6     <hist> [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1     8    1       0.19          0.33         0.02         0.90         0.12
#> 2    53    1       0.00          0.16         0.12         0.74         0.45
#> 3    24    1       0.00          0.42         0.49         0.56         0.17
#> 4    34    1       0.04          0.77         1.00         0.08         0.12
#> 5    42    1       0.01          0.55         0.02         0.95         0.09
#> 6     6    1       0.02          0.28         0.06         0.54         1.00
#>   racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1        0.17        0.34        0.47        0.29       0.32      0.20      1.0
#> 2        0.07        0.26        0.59        0.35       0.27      0.02      1.0
#> 3        0.04        0.39        0.47        0.28       0.32      0.00      0.0
#> 4        0.10        0.51        0.50        0.34       0.21      0.06      1.0
#> 5        0.05        0.38        0.38        0.23       0.36      0.02      0.9
#> 6        0.25        0.31        0.48        0.27       0.37      0.04      1.0
#>   medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1      0.37     0.72         0.34       0.60       0.29        0.15       0.43
#> 2      0.31     0.72         0.11       0.45       0.25        0.29       0.39
#> 3      0.30     0.58         0.19       0.39       0.38        0.40       0.84
#> 4      0.58     0.89         0.21       0.43       0.36        0.20       0.82
#> 5      0.50     0.72         0.16       0.68       0.44        0.11       0.71
#> 6      0.52     0.68         0.20       0.61       0.28        0.15       0.25
#>   medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1      0.39      0.40        0.39        0.32         0.27        0.27
#> 2      0.29      0.37        0.38        0.33         0.16        0.30
#> 3      0.28      0.27        0.29        0.27         0.07        0.29
#> 4      0.51      0.36        0.40        0.39         0.16        0.25
#> 5      0.46      0.43        0.41        0.28         0.00        0.74
#> 6      0.62      0.72        0.76        0.77         0.28        0.52
#>   OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1        0.36       0.41        0.08           0.19            0.10
#> 2        0.22       0.35        0.01           0.24            0.14
#> 3        0.28       0.39        0.01           0.27            0.27
#> 4        0.36       0.44        0.01           0.10            0.09
#> 5        0.51       0.48        0.00           0.06            0.25
#> 6        0.48       0.60        0.01           0.12            0.13
#>   PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1         0.18        0.48          0.27      0.68        0.23            0.41
#> 2         0.24        0.30          0.27      0.73        0.57            0.15
#> 3         0.43        0.19          0.36      0.58        0.32            0.29
#> 4         0.25        0.31          0.33      0.71        0.36            0.45
#> 5         0.30        0.33          0.12      0.65        0.67            0.38
#> 6         0.12        0.80          0.10      0.65        0.19            0.77
#>   PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1         0.25             0.52           0.68           0.40         0.75
#> 2         0.42             0.36           1.00           0.63         0.91
#> 3         0.49             0.32           0.63           0.41         0.71
#> 4         0.37             0.39           0.34           0.45         0.49
#> 5         0.42             0.46           0.22           0.27         0.20
#> 6         0.06             0.91           0.49           0.57         0.61
#>   TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1        0.75       0.35       0.55        0.59             0.61        0.56
#> 2        1.00       0.29       0.43        0.47             0.60        0.39
#> 3        0.70       0.45       0.42        0.44             0.43        0.43
#> 4        0.44       0.75       0.65        0.54             0.83        0.65
#> 5        0.21       0.51       0.91        0.91             0.89        0.85
#> 6        0.58       0.44       0.62        0.69             0.87        0.53
#>   PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1                0.74       0.76     0.04     0.14     0.03           0.24
#> 2                0.46       0.53     0.00     0.24     0.01           0.52
#> 3                0.71       0.67     0.01     0.46     0.00           0.07
#> 4                0.85       0.86     0.03     0.33     0.02           0.11
#> 5                0.40       0.60     0.00     0.06     0.00           0.03
#> 6                0.30       0.43     0.00     0.11     0.04           0.30
#>   PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1         0.27         0.37          0.39           0.07         0.07
#> 2         0.62         0.64          0.63           0.25         0.27
#> 3         0.06         0.15          0.19           0.02         0.02
#> 4         0.20         0.30          0.31           0.05         0.08
#> 5         0.07         0.20          0.27           0.01         0.02
#> 6         0.35         0.43          0.47           0.50         0.50
#>   PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1         0.08          0.08             0.89                0.06
#> 2         0.25          0.23             0.84                0.10
#> 3         0.04          0.05             0.88                0.04
#> 4         0.11          0.11             0.81                0.08
#> 5         0.04          0.05             0.88                0.05
#> 6         0.56          0.57             0.45                0.28
#>   PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1            0.14              0.13             0.33              0.39
#> 2            0.16              0.10             0.17              0.29
#> 3            0.20              0.20             0.46              0.52
#> 4            0.56              0.62             0.85              0.77
#> 5            0.16              0.19             0.59              0.60
#> 6            0.25              0.19             0.29              0.53
#>   PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1               0.28            0.55             0.09           0.51      0.5
#> 2               0.17            0.26             0.20           0.82      0.0
#> 3               0.43            0.42             0.15           0.51      0.5
#> 4               1.00            0.94             0.12           0.01      0.5
#> 5               0.37            0.89             0.02           0.19      0.5
#> 6               0.18            0.39             0.26           0.73      0.0
#>   HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1       0.21         0.71          0.52             0.05           0.26
#> 2       0.02         0.79          0.24             0.02           0.25
#> 3       0.01         0.86          0.41             0.29           0.30
#> 4       0.01         0.97          0.96             0.60           0.47
#> 5       0.01         0.89          0.87             0.04           0.55
#> 6       0.02         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1             0.46                  0.25          0.04         0           0.12
#> 2             0.32                  0.18          0.00         0           0.21
#> 3             0.39                  0.28          0.00         0           0.14
#> 4             0.51                  0.47          0.00         0           0.19
#> 5             0.51                  0.21          0.00         0           0.11
#> 6             0.59                  0.11          0.00         0           0.70
#>   PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1             0.42           0.50          0.51           0.64     0.12    0.26
#> 2             0.50           0.34          0.60           0.52     0.02    0.12
#> 3             0.49           0.54          0.67           0.56     0.01    0.21
#> 4             0.30           0.73          0.64           0.65     0.02    0.39
#> 5             0.72           0.64          0.61           0.53     0.04    0.09
#> 6             0.42           0.49          0.73           0.64     0.01    0.58
#>   PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1           0.20                0.32                0.20
#> 2           0.45                0.00                0.67
#> 3           0.02                0.00                0.43
#> 4           0.28                0.00                0.12
#> 5           0.02                0.00                0.03
#> 6           0.10                0.00                0.14
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 × 4
#>    ViolentCrimesPerPop_hist NumInShelters      NumImmig ViolentCrimesPerPop
#>                  <symblc_h>    <symblc_n>    <symblc_n>          <symblc_n>
#>  1                   <hist> [0.00 : 0.32] [0.00 : 0.04]       [0.01 : 1.00]
#>  2                   <hist> [0.01 : 0.18] [0.01 : 0.09]       [0.05 : 0.36]
#>  3                   <hist> [0.00 : 1.00] [0.00 : 0.57]       [0.05 : 0.57]
#>  4                   <hist> [0.00 : 0.08] [0.00 : 0.02]       [0.02 : 1.00]
#>  5                   <hist> [0.00 : 1.00] [0.00 : 1.00]       [0.01 : 1.00]
#>  6                   <hist> [0.00 : 0.68] [0.00 : 0.23]       [0.07 : 0.75]
#>  7                   <hist> [0.00 : 0.79] [0.00 : 0.14]       [0.00 : 0.94]
#>  8                   <hist> [0.01 : 0.01] [0.01 : 0.01]       [0.37 : 0.37]
#>  9                   <hist> [1.00 : 1.00] [0.39 : 0.39]       [1.00 : 1.00]
#> 10                   <hist> [0.00 : 0.52] [0.00 : 1.00]       [0.06 : 1.00]
#> # ℹ 36 more rows

Example 3

data("ex_mcfa1") 
head(ex_mcfa1)
#>   suspect age     hair    eyes    region
#> 1       1  42    h_red e_brown     Bronx
#> 2       2  20  h_black e_green     Bronx
#> 3       3  64  h_brown e_brown  Brooklyn
#> 4       4  55 h_blonde e_brown     Bronx
#> 5       5   4  h_brown e_green Manhattan
#> 6       6  61 h_blonde e_green     Bronx
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 3
#>                  hair              eyes               region
#>            <symblc_s>        <symblc_s>           <symblc_s>
#>  1            {h_red} {e_brown,e_black}              {Bronx}
#>  2 {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Example 4

We can modify the function that will be applied by default to the categorical variables

sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Converting a SODAS 1.0 *.SDS files to RSDA files

hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 × 6
#>                             R3101                 RNINO12
#>                        <symblc_m>              <symblc_m>
#>  1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#>  2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#>  3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#>  4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#>  5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#>  6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#>  7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#>  8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#>  9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # ℹ 22 more rows
#> # ℹ 4 more variables: RNINO3 <symblc_m>, RNINO4 <symblc_m>, RNINO34 <symblc_m>,
#> #   RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)

Converting a SODAS 2.0 *.XML files to RSDA files

abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 × 7
#>           LENGTH      DIAMETER        HEIGHT  WHOLE_WEIGHT SHUCKED_WEIGHT
#>       <symblc_n>    <symblc_n>    <symblc_n>    <symblc_n>     <symblc_n>
#>  1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37]  [0.03 : 0.64]
#>  2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25]  [0.06 : 1.16]
#>  3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66]  [0.07 : 1.49]
#>  4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51]  [0.11 : 1.23]
#>  5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20]  [0.12 : 0.84]
#>  6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53]  [0.16 : 0.93]
#>  7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12]  [0.16 : 0.82]
#>  8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81]  [0.32 : 0.71]
#>  9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07]  [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89]  [0.00 : 0.50]
#> # ℹ 14 more rows
#> # ℹ 2 more variables: VISCERA_WEIGHT <symblc_n>, SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)

Basic statistics

Symbolic Mean

data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]

Symbolic median

median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]

Variance and standard deviation

var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226

Symbolic correlation

cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#>           [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041

Radar plot for intervals

library(ggpolypath)
#> Loading required package: ggplot2

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.

sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.


res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))

res
#> $frequency
#> [1] 25 49  1 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1
#> [4,]  4.3

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))

res
#> $frequency
#> [1] 50 25 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1

Distances for intervals

Gowda-Diday

data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)

Ichino

DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)

Hausdorff

DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)

Linear regression for intervals

Training

data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#> 
#> Call:
#> stats::lm(formula = formula, data = centers)
#> 
#> Coefficients:
#> (Intercept)       lcavol      lweight          age         lbph          svi  
#>    0.411537     0.579327     0.614128    -0.018659     0.143918     0.730937  
#>         lcp      gleason        pgg45  
#>   -0.205536    -0.030924     0.009507

Prediction

pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)

Testing

RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964

LASSO regression for intervals

data(int_prost_train)
data(int_prost_test)

Training

res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.lasso)

plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.7172516
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.7136356
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5090679
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5133541
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4968871

RIDGE regression for intervals

Training

data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.ridge)

plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.703543
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7004145
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5286114
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5322683
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4808652

PCA for intervals

Example 1

data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 2

res <- sym.pca(oils,'tops')
plot(res, choix = "ind")

Example 3

res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")

Example 4

res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 5

res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")

plot(res, choix = "var")

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))

res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

Symbolic UMAP

Ejemplo Oils

datos <- oils
datos
#> # A tibble: 8 × 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
x <- sym.umap(datos)
x
#>              V1         V2         V3         V4
#> 1   -8.37902826  1.7589304 14.9405244 -5.3931236
#> 2   -8.42078608  1.8006874 14.8987716 -5.4348743
#> 3   -8.24521299  1.6251160 15.0741625 -5.2593171
#> 4   -8.39463437  1.7745337 14.9249101 -5.4087272
#> 5   -8.24754809  1.6274486 15.0723260 -5.2616579
#> 6   -8.32004462  1.6999278 15.0033025 -5.3342120
#> 7   -8.13752358  1.5174309 15.1816270 -5.1516228
#> 8   -8.31374014  1.6936346 15.0062465 -5.3278528
#> 9   -1.88458393 -0.7932010 -5.7247579 -7.1248175
#> 10  -1.82727443 -0.7882245 -5.7469910 -7.1667824
#> 11  -1.98163613 -0.5059227 -5.8332501 -7.0857915
#> 12  -1.88228007 -0.6002083 -5.8888435 -7.1519361
#> 13  -1.80557507 -0.8065018 -5.8697935 -7.2918087
#> 14  -1.86030648 -0.7882319 -5.7630142 -7.1517635
#> 15  -1.70992047 -0.8503514 -5.9097267 -7.3531853
#> 16  -1.74782446 -0.8139683 -5.9604684 -7.4020037
#> 17  -0.79949258 -0.3003863 -5.8860600 -7.5577562
#> 18  -0.85666113 -0.2451562 -5.7998941 -7.4310198
#> 19  -0.90051533 -0.1497627 -5.8841912 -7.3891967
#> 20  -0.87936558 -0.2082160 -5.8995514 -7.4524492
#> 21  -0.58783318 -0.4006869 -6.1708411 -7.6144673
#> 22  -0.48330336 -0.1399684 -6.0118497 -7.6388784
#> 23  -0.61859816 -0.3345049 -6.0772751 -7.5306385
#> 24  -0.59201024 -0.2655131 -6.1181991 -7.5287715
#> 25  -1.06069200 -0.6156222 -6.0766914 -7.4613034
#> 26  -1.01423888 -0.5391786 -5.7884047 -7.3326552
#> 27  -1.04485287 -0.5238565 -5.9902753 -7.3080227
#> 28  -1.08403481 -0.5691747 -5.9324121 -7.3715823
#> 29  -0.82589956 -0.6850691 -6.1119184 -7.4920332
#> 30  -0.79939366 -0.7188496 -5.9357465 -7.3394631
#> 31  -0.79385867 -0.6797366 -6.1399281 -7.5252298
#> 32  -0.73418509 -0.6589565 -5.7738585 -7.2934103
#> 33  -6.34142441 -4.0246786  1.5534918  4.5887979
#> 34  -6.23858760 -3.9467859  1.5084358  4.4438543
#> 35  -6.21661549 -3.9269199  1.2435873  4.2591355
#> 36  -6.09696278 -3.9369396  1.3346522  4.1762886
#> 37  -6.78923444 -3.9603964  1.5616384  4.6728085
#> 38  -6.85567801 -4.2145385  1.5215488  4.5850426
#> 39  -6.60267216 -4.2737525  1.3794869  4.3558086
#> 40  -6.57783757 -4.0768643  1.6413853  4.4247339
#> 41  -6.93001670 -4.0461118  2.0690480  3.0074027
#> 42  -6.81159076 -3.9847527  1.9695287  3.1111623
#> 43  -6.73598493 -3.9068822  1.8178970  2.8468451
#> 44  -6.61941224 -3.8175576  1.7413446  2.7154604
#> 45  -7.15794965 -4.0323846  1.9815896  3.2755902
#> 46  -7.32351507 -3.9752273  1.9117856  3.3985240
#> 47  -7.05982030 -3.9303025  2.0119821  3.0151542
#> 48  -7.03076054 -4.0110518  1.9332707  3.1193380
#> 49  -6.53421616 -4.0870018  1.5294818  4.6560563
#> 50  -6.72013958 -3.7703166  1.8716196  4.8074187
#> 51  -6.39957250 -4.0053397  1.5631214  4.7602667
#> 52  -6.57338121 -3.8265797  1.8583499  4.9493444
#> 53  -6.66594679 -4.2509265  1.5484313  4.8905350
#> 54  -6.75963648 -3.8439155  1.8243622  4.8970106
#> 55  -6.75567307 -4.1917154  1.6859532  4.8210608
#> 56  -6.84739293 -3.9771194  1.8676644  4.9838389
#> 57  -7.09201632 -3.9078463  1.8967090  3.9891895
#> 58  -7.33428204 -3.8203390  1.9752774  4.2508901
#> 59  -6.93826242 -3.8201413  1.9273197  3.9573216
#> 60  -7.24779839 -3.8729659  1.9891211  4.3522677
#> 61  -7.36246178 -4.1060589  1.8246539  3.9703460
#> 62  -7.62196396 -3.9501856  1.6552640  4.1867359
#> 63  -7.47866957 -4.0315795  1.6807634  4.0392194
#> 64  -7.46552869 -4.0353127  1.8378600  4.3315125
#> 65   0.15213120 14.5060570 -1.1262315  1.9539076
#> 66   0.06951483 14.5832555 -0.9322825  1.7561237
#> 67  -1.11775123 16.2068686 -2.1817558 -0.3552349
#> 68  -1.14605349 16.1518202 -2.1157635 -0.3035530
#> 69   0.02799480 14.6277158 -0.9598975  1.7880064
#> 70  -0.07724807 14.7329394 -1.1086721  1.9251347
#> 71  -0.97624131 16.1864205 -2.0906700 -0.4894007
#> 72  -1.12977254 16.0940241 -2.0207088 -0.3393824
#> 73  -0.07864990 14.7453886 -1.1932205  2.0284603
#> 74   0.03839232 14.6246889 -1.1585491  2.0268446
#> 75  -0.90919816 15.9752032 -2.2297603 -0.5706925
#> 76  -1.03955060 15.8214117 -2.0537953 -0.4379304
#> 77  -0.17954218 14.8490562 -1.0701874  1.9480176
#> 78  -0.13052193 14.7919830 -1.0523515  1.9212957
#> 79  -1.09661289 15.9565655 -2.1360587 -0.3753457
#> 80  -1.01791411 15.8455995 -2.1968057 -0.4751090
#> 81  -5.56860097 -3.7623990  1.1196856  3.6888463
#> 82  -5.52167935 -3.7779527  1.1392184  3.6761474
#> 83  -5.39658766 -3.6892797  1.0345319  3.3917634
#> 84  -5.37800132 -3.6701903  1.0158302  3.3364452
#> 85  -5.77065852 -3.8694708  1.1976241  3.9318872
#> 86  -5.66334143 -3.7923664  1.1194914  3.8453479
#> 87  -5.52362566 -3.4885593  0.8150734  3.4509153
#> 88  -5.51437291 -3.5330939  0.8567840  3.4314830
#> 89  -6.12858144 -3.8344922  1.3649028  2.5776446
#> 90  -6.21461974 -3.6765204  1.5125233  2.6312362
#> 91  -5.90507857 -3.7776897  1.2542520  2.5017414
#> 92  -5.82565324 -3.7520575  1.3901140  2.6269039
#> 93  -6.35534864 -3.7926113  1.4224025  2.7243468
#> 94  -6.43236379 -3.7937862  1.5499531  2.5757414
#> 95  -5.81581375 -3.6998732  1.3412901  2.5907435
#> 96  -5.82270089 -3.6376222  1.2368374  2.4694627
#> 97  12.35231727 -1.7009201 -1.1066603  0.9926477
#> 98  12.71802285 -1.5621899 -1.1824788  0.7976689
#> 99  12.56330284 -1.8179228 -1.1404710  0.5871285
#> 100 12.82223297 -1.7502257 -1.2375324  0.5088911
#> 101 12.30819685 -1.6603294 -1.0024885  1.0518916
#> 102 12.65796321 -1.4795467 -1.2304065  1.0100476
#> 103 12.36231732 -1.6833964 -1.3524693  0.6220538
#> 104 12.66225894 -1.7650290 -1.3384657  0.4244600
#> 105 13.47654049 -1.9484168 -0.7288533  0.7228498
#> 106 13.40558754 -1.7009576 -1.1534771  0.6291025
#> 107 13.45507944 -1.8786449 -0.9419220  0.4340517
#> 108 13.52338999 -1.7915025 -1.1011170  0.3845739
#> 109 13.39901623 -1.8753632 -0.6543078  0.6409783
#> 110 13.44367032 -1.7277000 -0.9769634  0.8419683
#> 111 13.66465517 -1.7336099 -1.0049378  0.2673945
#> 112 13.46118925 -1.9315899 -1.0509934  0.3291736
#> 113 12.57150301 -1.7594346 -0.7361842  1.1665825
#> 114 12.48765021 -1.6790168 -0.7876959  1.1784846
#> 115 12.32755787 -1.5973719 -0.9601972  0.7998678
#> 116 12.29330675 -1.6108861 -1.1911450  1.0500525
#> 117 12.50710550 -1.8662834 -0.5635176  1.1990157
#> 118 12.45722951 -1.8252408 -0.4354456  1.2332384
#> 119 12.25368885 -1.9747000 -0.6787458  1.0667481
#> 120 12.09475443 -1.8866024 -0.8063566  1.0682922
#> 121 13.31742263 -2.1099873 -0.4317733  0.9740529
#> 122 13.47475543 -1.8444703 -0.5623985  1.1524140
#> 123 13.67877460 -2.1173543 -0.6127262  0.5998969
#> 124 13.65460762 -2.0434422 -0.7361023  0.5592199
#> 125 13.20132157 -2.1661539 -0.3025189  1.1085611
#> 126 13.23739944 -2.1383536 -0.3443533  1.0876800
#> 127 13.50047823 -2.2399728 -0.4250420  0.8992960
#> 128 13.53261360 -2.2383882 -0.4906107  0.8873105
plot(x)

Ejemplo Cardiological

datos <- Cardiological
datos
#> # A tibble: 11 × 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
x <- sym.umap(datos)
x
#>              V1          V2          V3
#> 1  -1.613637622 -0.39340496  4.18622768
#> 2  -1.435191077 -0.03079160  3.11280792
#> 3  -1.457252690 -0.28772840  4.02548875
#> 4  -1.317467026  0.10952404  3.03513731
#> 5  -1.188346137 -0.37362626  4.04383631
#> 6  -0.757023164  0.41898741  3.07507230
#> 7  -1.192526155 -0.26415871  3.94929081
#> 8  -0.618837158  0.28999423  3.02617051
#> 9  -0.952248407  0.16281928  3.71172426
#> 10 -0.439731539  0.41645329  2.68828749
#> 11  0.141952691  0.83046969  1.52043728
#> 12  0.133905286  0.80164528  1.60730633
#> 13 -0.484830652  0.32403187  3.54912104
#> 14 -0.468590138  0.65524383  2.87733514
#> 15  0.209108969  0.78010796  0.69618357
#> 16 -0.238303537  0.81772155  0.88854134
#> 17  0.284938981  0.80361237  0.59400743
#> 18  1.006778481 -0.89923505 -1.99825491
#> 19  0.830853362  1.03483282  0.18445761
#> 20  1.607092519  0.62385043 -2.30889396
#> 21  0.067164314  0.88086189  0.40279501
#> 22  0.405000775 -0.75116132 -1.87482050
#> 23  0.705546457  1.14420592  0.11845692
#> 24  1.458634534  0.95190008 -2.42125353
#> 25 -0.305964004  0.63568628  2.45853919
#> 26  1.228258381 -1.55778395 -2.53176940
#> 27  0.332029070  1.00308899  0.97637494
#> 28  1.604477485 -1.08869872 -2.59214951
#> 29 -0.955885658  0.44432489  0.30448008
#> 30  0.240703245 -1.36371090 -2.57864313
#> 31 -0.238814408  0.65470201  0.17794313
#> 32  0.362326057 -0.75424978 -2.82886826
#> 33 -1.619019624 -0.15928583  3.93008616
#> 34 -1.338143062  0.11344111  3.12909924
#> 35 -1.659977455 -0.29964370  3.67319006
#> 36 -1.228729814  0.03592121  2.89792866
#> 37 -1.139590192 -0.08584472  4.11377921
#> 38 -0.807186937  0.53851317  2.87877612
#> 39 -1.061695423  0.03608948  3.77880402
#> 40 -0.505883421  0.52902781  2.74875936
#> 41  0.063024531  0.77327005  1.33626699
#> 42  1.580266933 -1.38410882 -2.29724268
#> 43  0.757704106  1.14891569  0.57966229
#> 44  1.738035965 -0.36748172 -2.54534438
#> 45 -0.548356563  0.56679909  0.11348875
#> 46  0.211675479 -0.92217556 -2.62864427
#> 47  0.229698813  0.83390193 -0.10707023
#> 48  0.630407004 -0.07912680 -2.57289957
#> 49 -2.620492232  0.06314178 -0.43230703
#> 50 -2.438576293  0.13490501 -0.31208159
#> 51 -2.546188550  0.03537667 -0.09562374
#> 52 -2.278071242 -0.06340544 -0.13433978
#> 53 -2.457989256 -0.08436404 -0.32843674
#> 54 -2.527466032  0.13261288 -0.53183475
#> 55 -2.497472221 -0.02080222 -0.08701939
#> 56 -2.446040540 -0.10816567 -0.13410079
#> 57 -0.095606688  0.90151701  1.31422731
#> 58  1.679108608 -1.43115167 -2.56799996
#> 59  0.970720081  1.12445326  0.66939895
#> 60  1.825569784 -0.44647585 -2.30297588
#> 61 -0.225588113  0.69462383  0.73593256
#> 62  0.981916597 -1.28853377 -2.48312162
#> 63  0.636647470  0.88772157  0.62464234
#> 64  1.273063953 -0.26013664 -2.56901087
#> 65 -0.484666768  0.45902160  2.25580743
#> 66  1.363450603 -1.81387429 -2.27619850
#> 67  1.253961003  0.98617844  0.23529639
#> 68  1.582879420  0.45530900 -2.27150504
#> 69 -0.848334734  0.32258091  0.03992778
#> 70  0.002866626 -1.21031415 -2.38467505
#> 71  1.260737929  1.04753257 -1.88606461
#> 72  1.245060529  0.69280224 -2.46381392
#> 73  0.919973941 -0.88456691 -1.71264812
#> 74  1.131070616 -1.02523570 -2.26077820
#> 75  1.535412539  0.90274756 -2.14319576
#> 76  1.367519844  0.56038488 -2.14350147
#> 77  0.360041079 -0.56478731 -1.76647741
#> 78  0.352943341 -0.67675585 -2.42214030
#> 79  1.293303666  1.07988590 -2.20881833
#> 80  1.177271834  0.86989184 -2.32082660
#> 81  1.165759563 -1.89900954 -1.83506146
#> 82  1.281228638 -1.89750240 -2.21095323
#> 83  1.278008242 -0.66600063 -1.66779527
#> 84  1.713461998 -0.79662554 -2.50995518
#> 85 -0.039056146 -1.25174719 -1.76434486
#> 86  0.194893901 -1.46234850 -2.36925498
#> 87  0.681395481 -0.43030504 -1.70700793
#> 88  0.720929958 -0.36630542 -2.67537330
plot(x)

Length of intervals

data(oils)
datos <- oils
interval.length(datos)
#>      GRA FRE IOD SAP
#> L  0.005   9  34  78
#> P  0.007   1  16   9
#> Co 0.002   5  14   9
#> S  0.006   2  12   6
#> Ca 0.001  10   2   4
#> O  0.005   6  11   9
#> B  0.010   8   8   9
#> H  0.006  10  24  12

PCA Histogram

Hardwood Data

data("hardwoodBrito")
Hardwood.histogram<-hardwoodBrito
Hardwood.cols<-colnames(Hardwood.histogram)
Hardwood.names<-row.names(Hardwood.histogram)
Hardwood.histogram
#> # A tibble: 5 × 4
#>         ANNT       JULT       ANNP       MITM
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Hardwood.histogram[[1]][[1]]
#> $breaks
#> [1] -3.9  4.2 10.3 20.6
#> 
#> $props
#> [1] 0.5 0.4 0.1

Weighted Center Matrix

weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)

Bin Matrix

BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))

PCA

pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix)
#> Warning: Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
pca.hist$classic.PCA
#> **Results for the Principal Component Analysis (PCA)**
#> The analysis was performed on 85 individuals, described by 4 variables
#> *The results are available in the following objects:
#> 
#>    name               description                                
#> 1  "$eig"             "eigenvalues"                              
#> 2  "$var"             "results for the variables"                
#> 3  "$var$coord"       "coord. for the variables"                 
#> 4  "$var$cor"         "correlations variables - dimensions"      
#> 5  "$var$cos2"        "cos2 for the variables"                   
#> 6  "$var$contrib"     "contributions of the variables"           
#> 7  "$ind"             "results for the individuals"              
#> 8  "$ind$coord"       "coord. for the individuals"               
#> 9  "$ind$cos2"        "cos2 for the individuals"                 
#> 10 "$ind$contrib"     "contributions of the individuals"         
#> 11 "$ind.sup"         "results for the supplementary individuals"
#> 12 "$ind.sup$coord"   "coord. for the supplementary individuals" 
#> 13 "$ind.sup$cos2"    "cos2 for the supplementary individuals"   
#> 14 "$call"            "summary statistics"                       
#> 15 "$call$centre"     "mean of the variables"                    
#> 16 "$call$ecart.type" "standard error of the variables"          
#> 17 "$call$row.w"      "weights for the individuals"              
#> 18 "$call$col.w"      "weights for the variables"
pca.hist$sym.hist.matrix.PCA
#> # A tibble: 5 × 4
#>         PC.1       PC.2       PC.3       PC.4
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Plots

ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                             title.graph = " ",
                             concepts.name = c("ACER"),
                             title.x = "First Principal Component (84.83%)",
                             title.y = "Frequency",
                             pca.axes = 1)

ACER.p1

ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                    title.graph = " ",
                    concepts.name = unique(pca.hist$Bins.df$Object.Name),
                    title.x = "First Principal Component (84.83%)",
                    title.y = "Frequency",
                    pca.axes = 1)

ALL.p1
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3)
#> Warning in min(which(props.cum >= percentils.RSDA[i])): no non-missing
#> arguments to min; returning Inf
#> Warning: Setting row names on a tibble is deprecated.

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.ACER.plot

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-row.names(Hardwood.quantiles.PCA)
var.names<-c("PC.1","PC.2")

quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.plot
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_segment()`).

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "PC 1 (84.83%)"
axes.y.label<- "PC 2 (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA)
sym.all.quantiles.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)
#> Warning: Ignoring 4 observations
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

KS

Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100)
#> Warning: Setting row names on a tibble is deprecated.
h<-Hardwood.quantiles.PCA.2[[1]][[1]]
tmp<-HistRSDAToEcdf(h)

h2<-Hardwood.quantiles.PCA.2[[1]][[2]]
tmp2<-HistRSDAToEcdf(h2)

h3<-Hardwood.quantiles.PCA.2[[1]][[3]]
tmp3<-HistRSDAToEcdf(h3)

h4<-Hardwood.quantiles.PCA.2[[1]][[4]]
tmp4<-HistRSDAToEcdf(h4)

h5<-Hardwood.quantiles.PCA.2[[1]][[5]]
tmp5<-HistRSDAToEcdf(h5)

breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks))
tmp.unique<-breaks.unique[order(breaks.unique)]

tmp<-tmp(v = tmp.unique)
tmp2<-tmp2(v = tmp.unique)
tmp3<-tmp3(v = tmp.unique)
tmp4<-tmp4(v = tmp.unique)
tmp5<-tmp5(v = tmp.unique)
abs_dif <-  abs(tmp2 - tmp)
# La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas.
distancia_ks <- max(abs_dif)
distancia_ks
#> [1] 0.05857869
library(tidyr)
# Se unen los valores calculados en un dataframe.
df.HW <- data.frame(
  PC.1 = tmp.unique,
  ACER = tmp,
  ALNUS = tmp2,
  FRAXINUS = tmp3,
  JUGLANS = tmp4,
  QUERCUS = tmp5
) %>%
  pivot_longer(
    cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS),
    names_to = "HardWood",
    values_to = "ecdf"
  )

grafico_ecdf <- ggplot(data = df.HW,
                       aes(x = PC.1, y = ecdf, color = HardWood)) +
  geom_line(size = 1) +
  labs(
    color = "Hardwood",
    y = "Empirical Cumulative Distribution "
  ) +
  theme_bw() +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 12))+geom_line()

grafico_ecdf