How to use RSDA 3.2.1

RSDA Package version 3.3

Oldemar Rodríguez R.

Installing the package

CRAN

install.packages("RSDA", dependencies=TRUE)

Github

devtools::install_github("PROMiDAT/RSDA")

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 × 7
#>      F1              F2      F3    F4        F5               F6              F7
#>   <dbl>      <symblc_n> <symbl> <dbl> <symblc_>       <symblc_n>      <symblc_n>
#> 1   2.8   [1.00 : 2.00]  <hist>   6       {a,d}   [0.00 : 90.00]  [9.00 : 24.00]
#> 2   1.4   [3.00 : 9.00]  <hist>   8     {b,c,d} [-90.00 : 98.00]  [-9.00 : 9.00]
#> 3   3.2  [-1.00 : 4.00]  <hist>  -7       {a,b}  [65.00 : 90.00] [65.00 : 70.00]
#> 4  -2.1   [0.00 : 2.00]  <hist>   0   {a,b,c,d}  [45.00 : 89.00] [25.00 : 67.00]
#> 5  -3   [-4.00 : -2.00]  <hist>  -9.5       {b}  [20.00 : 40.00]  [9.00 : 40.00]
#> 6   0.1 [10.00 : 21.00]  <hist>  -1       {a,d}    [5.00 : 8.00]   [5.00 : 8.00]
#> 7   9    [4.00 : 21.00]  <hist>   0.5       {a}    [3.14 : 6.76]   [4.00 : 6.00]

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 × 7
#>      F1              F2                      F3    F4        F5               F6
#>   <dbl>      <symblc_n>              <symblc_m> <dbl> <symblc_>       <symblc_n>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}    [3.14 : 6.76]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 × 7
#>      F1            F2                      F3    F4         F5               F6
#>   <dbl>    <symblc_n>              <symblc_m> <dbl> <symblc_s>       <symblc_n>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8  {a,b,c,d} [-90.00 : 98.00]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 × 1
#>                        F3
#>                <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 × 1
#>           F5
#>   <symblc_s>
#> 1  {a,b,c,d}
#> 2  {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

  • x = a data.frame
  • concept = variables to be used as a concept
  • variables = variables to be used, conceptible with tidyselect options
  • default.numeric = function that will be used by default for numerical values (sym.interval)
  • default.categorical = functions to be used by default for categorical values (sym.model)

Example 1

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
#> # A tibble: 6 × 3
#>           county         group            age
#>       <symblc_n>    <symblc_n>     <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

We can add new variables indicating the type we want them to be.

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 × 4
#>     age_hist         county         group            age
#>   <symblc_h>     <symblc_n>    <symblc_n>     <symblc_n>
#> 1     <hist> [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2     <hist>  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3     <hist> [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4     <hist>  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5     <hist> [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6     <hist> [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1     8    1       0.19          0.33         0.02         0.90         0.12
#> 2    53    1       0.00          0.16         0.12         0.74         0.45
#> 3    24    1       0.00          0.42         0.49         0.56         0.17
#> 4    34    1       0.04          0.77         1.00         0.08         0.12
#> 5    42    1       0.01          0.55         0.02         0.95         0.09
#> 6     6    1       0.02          0.28         0.06         0.54         1.00
#>   racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1        0.17        0.34        0.47        0.29       0.32      0.20      1.0
#> 2        0.07        0.26        0.59        0.35       0.27      0.02      1.0
#> 3        0.04        0.39        0.47        0.28       0.32      0.00      0.0
#> 4        0.10        0.51        0.50        0.34       0.21      0.06      1.0
#> 5        0.05        0.38        0.38        0.23       0.36      0.02      0.9
#> 6        0.25        0.31        0.48        0.27       0.37      0.04      1.0
#>   medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1      0.37     0.72         0.34       0.60       0.29        0.15       0.43
#> 2      0.31     0.72         0.11       0.45       0.25        0.29       0.39
#> 3      0.30     0.58         0.19       0.39       0.38        0.40       0.84
#> 4      0.58     0.89         0.21       0.43       0.36        0.20       0.82
#> 5      0.50     0.72         0.16       0.68       0.44        0.11       0.71
#> 6      0.52     0.68         0.20       0.61       0.28        0.15       0.25
#>   medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1      0.39      0.40        0.39        0.32         0.27        0.27
#> 2      0.29      0.37        0.38        0.33         0.16        0.30
#> 3      0.28      0.27        0.29        0.27         0.07        0.29
#> 4      0.51      0.36        0.40        0.39         0.16        0.25
#> 5      0.46      0.43        0.41        0.28         0.00        0.74
#> 6      0.62      0.72        0.76        0.77         0.28        0.52
#>   OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1        0.36       0.41        0.08           0.19            0.10
#> 2        0.22       0.35        0.01           0.24            0.14
#> 3        0.28       0.39        0.01           0.27            0.27
#> 4        0.36       0.44        0.01           0.10            0.09
#> 5        0.51       0.48        0.00           0.06            0.25
#> 6        0.48       0.60        0.01           0.12            0.13
#>   PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1         0.18        0.48          0.27      0.68        0.23            0.41
#> 2         0.24        0.30          0.27      0.73        0.57            0.15
#> 3         0.43        0.19          0.36      0.58        0.32            0.29
#> 4         0.25        0.31          0.33      0.71        0.36            0.45
#> 5         0.30        0.33          0.12      0.65        0.67            0.38
#> 6         0.12        0.80          0.10      0.65        0.19            0.77
#>   PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1         0.25             0.52           0.68           0.40         0.75
#> 2         0.42             0.36           1.00           0.63         0.91
#> 3         0.49             0.32           0.63           0.41         0.71
#> 4         0.37             0.39           0.34           0.45         0.49
#> 5         0.42             0.46           0.22           0.27         0.20
#> 6         0.06             0.91           0.49           0.57         0.61
#>   TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1        0.75       0.35       0.55        0.59             0.61        0.56
#> 2        1.00       0.29       0.43        0.47             0.60        0.39
#> 3        0.70       0.45       0.42        0.44             0.43        0.43
#> 4        0.44       0.75       0.65        0.54             0.83        0.65
#> 5        0.21       0.51       0.91        0.91             0.89        0.85
#> 6        0.58       0.44       0.62        0.69             0.87        0.53
#>   PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1                0.74       0.76     0.04     0.14     0.03           0.24
#> 2                0.46       0.53     0.00     0.24     0.01           0.52
#> 3                0.71       0.67     0.01     0.46     0.00           0.07
#> 4                0.85       0.86     0.03     0.33     0.02           0.11
#> 5                0.40       0.60     0.00     0.06     0.00           0.03
#> 6                0.30       0.43     0.00     0.11     0.04           0.30
#>   PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1         0.27         0.37          0.39           0.07         0.07
#> 2         0.62         0.64          0.63           0.25         0.27
#> 3         0.06         0.15          0.19           0.02         0.02
#> 4         0.20         0.30          0.31           0.05         0.08
#> 5         0.07         0.20          0.27           0.01         0.02
#> 6         0.35         0.43          0.47           0.50         0.50
#>   PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1         0.08          0.08             0.89                0.06
#> 2         0.25          0.23             0.84                0.10
#> 3         0.04          0.05             0.88                0.04
#> 4         0.11          0.11             0.81                0.08
#> 5         0.04          0.05             0.88                0.05
#> 6         0.56          0.57             0.45                0.28
#>   PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1            0.14              0.13             0.33              0.39
#> 2            0.16              0.10             0.17              0.29
#> 3            0.20              0.20             0.46              0.52
#> 4            0.56              0.62             0.85              0.77
#> 5            0.16              0.19             0.59              0.60
#> 6            0.25              0.19             0.29              0.53
#>   PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1               0.28            0.55             0.09           0.51      0.5
#> 2               0.17            0.26             0.20           0.82      0.0
#> 3               0.43            0.42             0.15           0.51      0.5
#> 4               1.00            0.94             0.12           0.01      0.5
#> 5               0.37            0.89             0.02           0.19      0.5
#> 6               0.18            0.39             0.26           0.73      0.0
#>   HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1       0.21         0.71          0.52             0.05           0.26
#> 2       0.02         0.79          0.24             0.02           0.25
#> 3       0.01         0.86          0.41             0.29           0.30
#> 4       0.01         0.97          0.96             0.60           0.47
#> 5       0.01         0.89          0.87             0.04           0.55
#> 6       0.02         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1             0.46                  0.25          0.04         0           0.12
#> 2             0.32                  0.18          0.00         0           0.21
#> 3             0.39                  0.28          0.00         0           0.14
#> 4             0.51                  0.47          0.00         0           0.19
#> 5             0.51                  0.21          0.00         0           0.11
#> 6             0.59                  0.11          0.00         0           0.70
#>   PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1             0.42           0.50          0.51           0.64     0.12    0.26
#> 2             0.50           0.34          0.60           0.52     0.02    0.12
#> 3             0.49           0.54          0.67           0.56     0.01    0.21
#> 4             0.30           0.73          0.64           0.65     0.02    0.39
#> 5             0.72           0.64          0.61           0.53     0.04    0.09
#> 6             0.42           0.49          0.73           0.64     0.01    0.58
#>   PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1           0.20                0.32                0.20
#> 2           0.45                0.00                0.67
#> 3           0.02                0.00                0.43
#> 4           0.28                0.00                0.12
#> 5           0.02                0.00                0.03
#> 6           0.10                0.00                0.14
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 × 4
#>    ViolentCrimesPerPop_hist NumInShelters      NumImmig ViolentCrimesPerPop
#>                  <symblc_h>    <symblc_n>    <symblc_n>          <symblc_n>
#>  1                   <hist> [0.00 : 0.32] [0.00 : 0.04]       [0.01 : 1.00]
#>  2                   <hist> [0.01 : 0.18] [0.01 : 0.09]       [0.05 : 0.36]
#>  3                   <hist> [0.00 : 1.00] [0.00 : 0.57]       [0.05 : 0.57]
#>  4                   <hist> [0.00 : 0.08] [0.00 : 0.02]       [0.02 : 1.00]
#>  5                   <hist> [0.00 : 1.00] [0.00 : 1.00]       [0.01 : 1.00]
#>  6                   <hist> [0.00 : 0.68] [0.00 : 0.23]       [0.07 : 0.75]
#>  7                   <hist> [0.00 : 0.79] [0.00 : 0.14]       [0.00 : 0.94]
#>  8                   <hist> [0.01 : 0.01] [0.01 : 0.01]       [0.37 : 0.37]
#>  9                   <hist> [1.00 : 1.00] [0.39 : 0.39]       [1.00 : 1.00]
#> 10                   <hist> [0.00 : 0.52] [0.00 : 1.00]       [0.06 : 1.00]
#> # ℹ 36 more rows

Example 3

data("ex_mcfa1") 
head(ex_mcfa1)
#>   suspect age     hair    eyes    region
#> 1       1  42    h_red e_brown     Bronx
#> 2       2  20  h_black e_green     Bronx
#> 3       3  64  h_brown e_brown  Brooklyn
#> 4       4  55 h_blonde e_brown     Bronx
#> 5       5   4  h_brown e_green Manhattan
#> 6       6  61 h_blonde e_green     Bronx
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 3
#>                  hair              eyes               region
#>            <symblc_s>        <symblc_s>           <symblc_s>
#>  1            {h_red} {e_brown,e_black}              {Bronx}
#>  2 {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Example 4

We can modify the function that will be applied by default to the categorical variables

sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Converting a SODAS 1.0 *.SDS files to RSDA files

hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 × 6
#>                             R3101                 RNINO12
#>                        <symblc_m>              <symblc_m>
#>  1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#>  2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#>  3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#>  4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#>  5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#>  6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#>  7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#>  8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#>  9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # ℹ 22 more rows
#> # ℹ 4 more variables: RNINO3 <symblc_m>, RNINO4 <symblc_m>, RNINO34 <symblc_m>,
#> #   RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)

Converting a SODAS 2.0 *.XML files to RSDA files

abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 × 7
#>           LENGTH      DIAMETER        HEIGHT  WHOLE_WEIGHT SHUCKED_WEIGHT
#>       <symblc_n>    <symblc_n>    <symblc_n>    <symblc_n>     <symblc_n>
#>  1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37]  [0.03 : 0.64]
#>  2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25]  [0.06 : 1.16]
#>  3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66]  [0.07 : 1.49]
#>  4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51]  [0.11 : 1.23]
#>  5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20]  [0.12 : 0.84]
#>  6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53]  [0.16 : 0.93]
#>  7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12]  [0.16 : 0.82]
#>  8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81]  [0.32 : 0.71]
#>  9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07]  [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89]  [0.00 : 0.50]
#> # ℹ 14 more rows
#> # ℹ 2 more variables: VISCERA_WEIGHT <symblc_n>, SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)

Basic statistics

Symbolic Mean

data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]

Symbolic median

median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]

Variance and standard deviation

var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226

Symbolic correlation

cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#>           [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041

Radar plot for intervals

library(ggpolypath)
#> Loading required package: ggplot2

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.

sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.


res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))

res
#> $frequency
#> [1] 25 49  1 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1
#> [4,]  4.3

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))

res
#> $frequency
#> [1] 50 25 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1

Distances for intervals

Gowda-Diday

data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)

Ichino

DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)

Hausdorff

DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)

Linear regression for intervals

Training

data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#> 
#> Call:
#> stats::lm(formula = formula, data = centers)
#> 
#> Coefficients:
#> (Intercept)       lcavol      lweight          age         lbph          svi  
#>    0.411537     0.579327     0.614128    -0.018659     0.143918     0.730937  
#>         lcp      gleason        pgg45  
#>   -0.205536    -0.030924     0.009507

Prediction

pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)

Testing

RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964

LASSO regression for intervals

data(int_prost_train)
data(int_prost_test)

Training

res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.lasso)

plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.706699
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.7033206
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5236074
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5276086
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4929714

RIDGE regression for intervals

Training

data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.ridge)

plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.703543
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.7004145
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5286114
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5322683
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4808652

PCA for intervals

Example 1

data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 2

res <- sym.pca(oils,'tops')
plot(res, choix = "ind")

Example 3

res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")

Example 4

res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 5

res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")

plot(res, choix = "var")

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))

res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

Symbolic UMAP

Ejemplo Oils

datos <- oils
datos
#> # A tibble: 8 × 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
x <- sym.umap(datos)
x
#>              V1        V2            V3         V4
#> 1    0.06298583 -4.154103  8.0557543431  1.5740743
#> 2    0.07660521 -4.134546  8.0688884696  1.6105316
#> 3    0.05159054 -3.984176  7.9660391243  1.7719193
#> 4    0.11596645 -4.106754  8.1014511349  1.6510167
#> 5   -0.06599916 -4.002456  8.1242622202  1.5045454
#> 6   -0.09833629 -4.296103  7.8793898422  1.4857565
#> 7    0.04339482 -4.074001  8.0065585692  1.5418152
#> 8    0.17496703 -4.241778  8.2000378646  1.6497577
#> 9   -6.55924613  1.795043 -5.7520093195 -7.5559904
#> 10  -6.46467755  1.822166 -5.7608037695 -7.6976811
#> 11  -6.44345520  2.018156 -6.0367001883 -7.6141802
#> 12  -6.36363297  1.710211 -6.0628518842 -7.5750071
#> 13  -6.43211244  1.859649 -5.8395874847 -7.6822856
#> 14  -6.23001342  1.763283 -5.7722800245 -7.8368917
#> 15  -6.23473493  2.029182 -5.9862334486 -7.8040984
#> 16  -6.26170051  2.090420 -6.1082802312 -7.8103635
#> 17  -7.26161204  2.228080 -5.5678681933 -8.3557877
#> 18  -7.10202492  2.379032 -5.4329888575 -8.3366009
#> 19  -7.22945366  2.454450 -5.5810455638 -8.4628358
#> 20  -7.19924597  2.338427 -5.5305278174 -8.4307327
#> 21  -7.39867913  2.101645 -5.4684820276 -8.3216633
#> 22  -7.33937098  2.316247 -5.5279071873 -8.5724329
#> 23  -7.39662992  2.373748 -5.5370252602 -8.5227154
#> 24  -7.23437146  2.395597 -5.3207253589 -8.7875243
#> 25  -6.79933102  2.130032 -5.9079589214 -8.3513142
#> 26  -6.73275011  2.277659 -5.6465133555 -8.5181071
#> 27  -6.76813613  2.178996 -5.9087274330 -8.3251935
#> 28  -6.59681443  2.358376 -5.6877698177 -8.7126882
#> 29  -6.51678780  2.426967 -5.8139745301 -8.3805298
#> 30  -6.64541350  2.223320 -5.5368634597 -8.7006223
#> 31  -6.75960871  2.379619 -5.6317620012 -8.6279609
#> 32  -6.84531880  2.300504 -5.7370907978 -8.6893739
#> 33  -8.57173394 -4.937808 -0.8306563364  0.8108793
#> 34  -8.50603689 -5.125464 -0.7784381104  0.8385486
#> 35  -8.63855490 -4.745512 -0.8553663563  0.7252428
#> 36  -8.60452839 -4.662883 -0.9843933075  0.7964835
#> 37  -8.51134289 -5.231030 -0.4175492210  0.6842862
#> 38  -8.62017485 -5.282043 -0.4130553115  0.6787055
#> 39  -8.70502601 -4.916923 -0.5960510284  0.5862479
#> 40  -8.63435544 -5.172644 -0.3716434189  0.6365347
#> 41  -8.09891590 -4.277490  0.2672857799  2.0740662
#> 42  -7.96298829 -4.243166  0.1153011594  2.1143230
#> 43  -7.89654374 -3.807377 -0.0999574840  2.3105322
#> 44  -7.92390315 -3.941861  0.0004534344  2.2074372
#> 45  -8.07315704 -4.450112  0.3428101684  2.0095650
#> 46  -8.10968616 -4.518831  0.3332712726  1.9530979
#> 47  -7.98411039 -4.525632  0.5286083438  2.0717307
#> 48  -8.08633155 -4.434373  0.4925466312  2.0915602
#> 49  -8.60064735 -5.467908 -0.7363402138  0.7115201
#> 50  -8.64993806 -5.748007 -0.6284711546  0.8759076
#> 51  -8.69760287 -5.296623 -0.6807999572  0.4505177
#> 52  -8.75596095 -5.752574 -0.6034292195  0.8725175
#> 53  -8.68746024 -5.613203 -0.5296531513  0.5018999
#> 54  -8.82407231 -5.793151 -0.3341243477  0.6609881
#> 55  -8.76639182 -5.621038 -0.2828540587  0.6716967
#> 56  -8.81255800 -5.934720 -0.4577079407  0.8010377
#> 57  -8.31302172 -5.040685  0.2327732625  1.3740199
#> 58  -8.59641152 -5.398462  0.1028293904  1.3466303
#> 59  -8.46807744 -4.952885  0.0567259014  1.4864640
#> 60  -8.64179159 -5.387533  0.1035897977  1.3916701
#> 61  -8.39925158 -5.225000  0.3604533453  1.3706117
#> 62  -8.60993896 -5.548081  0.2223344005  1.2486476
#> 63  -8.61716987 -5.084897  0.2509822743  1.1856173
#> 64  -8.79080458 -5.385910  0.1184338250  1.3510267
#> 65  -4.39327136 20.465114 -0.1349367493  0.9480576
#> 66  -4.23982257 20.616551 -0.2911583757  0.8003023
#> 67  -4.79761578 20.405484  1.8490264685  0.7775725
#> 68  -5.02296904 20.399455  2.0662335747  0.7815717
#> 69  -4.29403054 20.563076 -0.2377556941  0.8496552
#> 70  -4.23286390 20.623292 -0.2392218225  0.8518867
#> 71  -4.87798770 20.389505  2.0907163564  0.7723261
#> 72  -4.82016924 20.471588  1.9054901902  0.8460325
#> 73  -4.15536491 20.696662 -0.0925309073  0.9922161
#> 74  -4.16678850 20.683364 -0.1030731003  0.9880158
#> 75  -4.88999847 20.421840  1.9162876520  0.7954164
#> 76  -4.82528510 20.500419  1.8855715831  0.8736612
#> 77  -4.32963296 20.525906 -0.1541549547  0.9044769
#> 78  -4.19902494 20.655399 -0.2914526040  0.7989400
#> 79  -4.83456709 20.506259  1.9472282536  0.8775791
#> 80  -4.89124090 20.546073  1.9912738304  0.9186387
#> 81  -8.55638601 -4.164497 -1.2730518379  0.8654836
#> 82  -8.49732431 -4.051445 -1.4885492070  0.9728759
#> 83  -8.49532072 -3.931493 -1.1802088724  1.2429819
#> 84  -8.61436426 -3.911728 -1.3750470747  1.2076863
#> 85  -8.63526781 -4.163800 -1.2151493627  0.8325391
#> 86  -8.63586669 -4.178644 -1.2405573483  0.8155418
#> 87  -8.49644258 -3.771561 -1.1563174807  1.0977429
#> 88  -8.61683500 -3.882544 -1.3659681743  1.0896848
#> 89  -8.11745509 -3.566375 -0.2161146707  2.1787802
#> 90  -8.03980321 -3.578137 -0.2851585176  2.1865557
#> 91  -7.93456058 -3.408617 -0.6080068504  1.9171698
#> 92  -8.05128495 -3.504365 -0.4821925861  2.0825010
#> 93  -7.89320030 -3.753135 -0.1190386639  2.2460261
#> 94  -7.94084099 -3.775282 -0.1254113871  2.2436416
#> 95  -7.87165256 -3.455392 -0.5645221134  2.0350574
#> 96  -7.94153524 -3.568163 -0.5233026293  2.0945930
#> 97  19.49038388 -3.989319  2.2897120542  3.5729731
#> 98  19.58546390 -3.913441  2.2805735866  3.6932010
#> 99  19.89461944 -3.674899  2.2727117876  3.8922148
#> 100 19.77836921 -3.565683  2.3835427472  3.7535187
#> 101 19.39203491 -3.791648  2.0747397143  3.4585008
#> 102 19.38547188 -3.812624  2.1326339661  3.5879255
#> 103 19.70735002 -3.697429  2.0543845223  3.9502464
#> 104 19.81111130 -3.671962  2.2460705024  3.9000023
#> 105 20.30748258 -3.719063  2.6555675654  3.1340152
#> 106 20.25306128 -3.873193  2.8017927339  3.3000066
#> 107 20.42527908 -3.622107  2.6059292302  3.5894652
#> 108 20.32929773 -3.555610  2.7311199970  3.7135210
#> 109 20.52097108 -4.043652  2.5904839426  3.2140582
#> 110 20.34419215 -3.674986  2.9661152985  3.3582683
#> 111 20.39117001 -3.562352  2.6440662621  3.4864189
#> 112 20.30182790 -3.489812  2.8362026522  3.7158291
#> 113 19.53215018 -4.145707  2.2387094074  3.2154306
#> 114 19.47316989 -4.185862  2.2788877503  3.1389566
#> 115 19.59428137 -3.795859  1.9565698424  3.7242292
#> 116 19.26773992 -3.644937  1.9803268362  3.6621185
#> 117 19.56063186 -4.248789  2.0156342420  3.0555211
#> 118 19.61434040 -4.298773  2.0018636275  3.0053891
#> 119 19.51900326 -4.099944  1.8062213622  3.2753472
#> 120 19.42461656 -4.196714  1.8004489762  3.1249029
#> 121 20.27165614 -4.054251  2.7328269362  2.9422881
#> 122 20.32869138 -4.129848  2.7084756100  2.8484471
#> 123 20.57561258 -3.800579  2.9183040171  3.1298784
#> 124 20.49240183 -3.719557  2.8465251565  3.2930035
#> 125 20.35594318 -4.434716  2.6746518867  2.7678841
#> 126 20.13263758 -4.379824  2.5956297736  2.8611568
#> 127 20.39986539 -4.193192  2.8794080688  2.8556249
#> 128 20.46038110 -4.251545  2.9366100155  2.8875971
plot(x)

Ejemplo Cardiological

datos <- Cardiological
datos
#> # A tibble: 11 × 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
x <- sym.umap(datos)
x
#>            V1           V2          V3
#> 1   2.6707749  2.378424158  3.21093975
#> 2   1.8850966  2.696632703  2.89234388
#> 3   2.7923122  2.514838049  3.34951522
#> 4   1.7930687  2.711165938  2.87515964
#> 5   2.7067429  2.138471351  3.20128669
#> 6   1.6603480  2.055358836  2.38842536
#> 7   2.8287208  2.063880902  3.14105591
#> 8   1.7955775  2.203681276  2.28966815
#> 9   2.2615552  2.038225735  2.78412650
#> 10  1.5469554  2.089044999  2.30305803
#> 11  1.2553123  0.872866720  0.71638615
#> 12  1.1455436  1.033390269  0.67569766
#> 13  1.9821104  1.662419993  2.51295605
#> 14  1.6024629  1.666793018  2.15022198
#> 15  1.0145824  0.225317981  0.49393962
#> 16  0.6277037  0.291477494  0.75532425
#> 17  1.0318395  0.171087106  0.30201628
#> 18 -1.7733267 -1.169490295 -1.64659448
#> 19  0.6433475  0.125432642 -0.17375102
#> 20 -1.9101500  0.105164818 -2.27495435
#> 21  0.8383611 -0.077148867  0.35934373
#> 22 -1.4049879 -1.445019819 -1.90043132
#> 23  0.5628641  0.052508832 -0.09193352
#> 24 -1.5937678  0.329762629 -2.23765444
#> 25  1.5131216  1.481059117  1.60928208
#> 26 -2.9700599 -1.423523426 -1.61791033
#> 27  0.9026259  0.785356139  0.32722826
#> 28 -2.8237258 -0.790182164 -2.06596903
#> 29  0.6513117 -1.070591604  0.66879835
#> 30 -1.9670270 -2.169820351 -1.90626315
#> 31  0.5359765 -0.432352794  0.30943074
#> 32 -1.8124219 -1.749597808 -2.32462457
#> 33  2.2021201  2.555201427  2.94876296
#> 34  1.6001340  2.883410662  2.80244271
#> 35  2.4609639  2.679228638  3.24168046
#> 36  1.6390634  2.673810537  2.74785492
#> 37  2.4532386  1.902213355  2.95123736
#> 38  1.3525421  1.947112256  2.20921467
#> 39  2.5044713  1.877888721  3.10360383
#> 40  1.3364061  1.795848241  2.13112115
#> 41  0.9575777  0.709596412  0.69929780
#> 42 -2.5533842 -1.322257389 -1.94921698
#> 43  0.8641083  0.746595260 -0.19797973
#> 44 -2.4321860 -0.542921730 -2.39000175
#> 45  0.5931292 -0.793906728  0.49947450
#> 46 -1.6929393 -1.967947987 -2.12879806
#> 47  0.3458569 -0.276151234  0.20751252
#> 48 -1.5114733 -1.081612632 -2.53718755
#> 49  1.1949740 -2.201787549  1.13548076
#> 50  1.3437493 -2.487541813  1.34935441
#> 51  1.3130187 -2.157682945  0.99045799
#> 52  1.4175999 -2.188901430  1.13858769
#> 53  1.3435420 -2.058725011  1.18053161
#> 54  1.1579644 -2.263880171  1.13577106
#> 55  1.0168605 -2.373924643  0.85946563
#> 56  1.7369940 -2.221476365  0.96995082
#> 57  1.2662252  0.684751793  0.78822557
#> 58 -2.8093831 -1.088367898 -1.91544336
#> 59  0.6506356  0.870546480 -0.34509026
#> 60 -2.4417826 -0.432314059 -2.32970325
#> 61  0.7981213  0.376720317  0.74512234
#> 62 -2.2203625 -1.516621177 -2.06849476
#> 63  0.7518403  0.541099174 -0.17868167
#> 64 -2.0693686 -0.739745555 -2.47555668
#> 65  1.3721112  1.847353909  1.64893890
#> 66 -2.7820302 -1.267031383 -1.39745166
#> 67  0.5073791  0.858187211 -0.60701728
#> 68 -2.0640470 -0.003760756 -2.31295714
#> 69  0.4391433 -1.273541704  0.49522764
#> 70 -1.5945386 -2.231840943 -1.83916804
#> 71 -1.2610826  0.466774554 -2.07549395
#> 72 -1.4173317  0.029665677 -2.46045592
#> 73 -1.4501013 -1.085793674 -1.46185036
#> 74 -2.1380852 -1.152777474 -1.88553121
#> 75 -1.6856534  0.545274483 -2.15486224
#> 76 -1.7395080  0.096082270 -2.50820193
#> 77 -1.1021212 -1.619362358 -1.78462120
#> 78 -1.4664541 -1.679024755 -2.27211796
#> 79 -1.2558439  0.524055920 -2.02363907
#> 80 -1.3403215  0.106974352 -2.41453033
#> 81 -2.4963286 -1.181765981 -0.97113403
#> 82 -2.6623542 -1.147098099 -1.63704332
#> 83 -1.7769115 -0.812687933 -1.43300848
#> 84 -2.5019272 -0.659377111 -2.10937351
#> 85 -1.3334975 -1.964111870 -1.17027113
#> 86 -1.8640101 -2.089941328 -1.69157003
#> 87 -1.2311496 -0.998260579 -1.84558972
#> 88 -1.7184424 -1.200882963 -2.48339280
plot(x)

Length of intervals

data(oils)
datos <- oils
interval.length(datos)
#>      GRA FRE IOD SAP
#> L  0.005   9  34  78
#> P  0.007   1  16   9
#> Co 0.002   5  14   9
#> S  0.006   2  12   6
#> Ca 0.001  10   2   4
#> O  0.005   6  11   9
#> B  0.010   8   8   9
#> H  0.006  10  24  12

PCA Histogram

Hardwood Data

data("hardwoodBrito")
Hardwood.histogram<-hardwoodBrito
Hardwood.cols<-colnames(Hardwood.histogram)
Hardwood.names<-row.names(Hardwood.histogram)
Hardwood.histogram
#> # A tibble: 5 × 4
#>         ANNT       JULT       ANNP       MITM
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Hardwood.histogram[[1]][[1]]
#> $breaks
#> [1] -3.9  4.2 10.3 20.6
#> 
#> $props
#> [1] 0.5 0.4 0.1

Weighted Center Matrix

weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)

Bin Matrix

BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))

PCA

pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix)
#> Warning: Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
pca.hist$classic.PCA
#> **Results for the Principal Component Analysis (PCA)**
#> The analysis was performed on 85 individuals, described by 4 variables
#> *The results are available in the following objects:
#> 
#>    name               description                                
#> 1  "$eig"             "eigenvalues"                              
#> 2  "$var"             "results for the variables"                
#> 3  "$var$coord"       "coord. for the variables"                 
#> 4  "$var$cor"         "correlations variables - dimensions"      
#> 5  "$var$cos2"        "cos2 for the variables"                   
#> 6  "$var$contrib"     "contributions of the variables"           
#> 7  "$ind"             "results for the individuals"              
#> 8  "$ind$coord"       "coord. for the individuals"               
#> 9  "$ind$cos2"        "cos2 for the individuals"                 
#> 10 "$ind$contrib"     "contributions of the individuals"         
#> 11 "$ind.sup"         "results for the supplementary individuals"
#> 12 "$ind.sup$coord"   "coord. for the supplementary individuals" 
#> 13 "$ind.sup$cos2"    "cos2 for the supplementary individuals"   
#> 14 "$call"            "summary statistics"                       
#> 15 "$call$centre"     "mean of the variables"                    
#> 16 "$call$ecart.type" "standard error of the variables"          
#> 17 "$call$row.w"      "weights for the individuals"              
#> 18 "$call$col.w"      "weights for the variables"
pca.hist$sym.hist.matrix.PCA
#> # A tibble: 5 × 4
#>         PC.1       PC.2       PC.3       PC.4
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Plots

ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                             title.graph = " ",
                             concepts.name = c("ACER"),
                             title.x = "First Principal Component (84.83%)",
                             title.y = "Frequency",
                             pca.axes = 1)

ACER.p1

ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                    title.graph = " ",
                    concepts.name = unique(pca.hist$Bins.df$Object.Name),
                    title.x = "First Principal Component (84.83%)",
                    title.y = "Frequency",
                    pca.axes = 1)

ALL.p1
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3)
#> Warning in min(which(props.cum >= percentils.RSDA[i])): no non-missing
#> arguments to min; returning Inf
#> Warning: Setting row names on a tibble is deprecated.

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.ACER.plot

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-row.names(Hardwood.quantiles.PCA)
var.names<-c("PC.1","PC.2")

quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.plot
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_segment()`).

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "PC 1 (84.83%)"
axes.y.label<- "PC 2 (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA)
sym.all.quantiles.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)
#> Warning: Ignoring 4 observations
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

KS

Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100)
#> Warning: Setting row names on a tibble is deprecated.
h<-Hardwood.quantiles.PCA.2[[1]][[1]]
tmp<-HistRSDAToEcdf(h)

h2<-Hardwood.quantiles.PCA.2[[1]][[2]]
tmp2<-HistRSDAToEcdf(h2)

h3<-Hardwood.quantiles.PCA.2[[1]][[3]]
tmp3<-HistRSDAToEcdf(h3)

h4<-Hardwood.quantiles.PCA.2[[1]][[4]]
tmp4<-HistRSDAToEcdf(h4)

h5<-Hardwood.quantiles.PCA.2[[1]][[5]]
tmp5<-HistRSDAToEcdf(h5)

breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks))
tmp.unique<-breaks.unique[order(breaks.unique)]

tmp<-tmp(v = tmp.unique)
tmp2<-tmp2(v = tmp.unique)
tmp3<-tmp3(v = tmp.unique)
tmp4<-tmp4(v = tmp.unique)
tmp5<-tmp5(v = tmp.unique)
abs_dif <-  abs(tmp2 - tmp)
# La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas.
distancia_ks <- max(abs_dif)
distancia_ks
#> [1] 0.05857869
library(tidyr)
# Se unen los valores calculados en un dataframe.
df.HW <- data.frame(
  PC.1 = tmp.unique,
  ACER = tmp,
  ALNUS = tmp2,
  FRAXINUS = tmp3,
  JUGLANS = tmp4,
  QUERCUS = tmp5
) %>%
  pivot_longer(
    cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS),
    names_to = "HardWood",
    values_to = "ecdf"
  )

grafico_ecdf <- ggplot(data = df.HW,
                       aes(x = PC.1, y = ecdf, color = HardWood)) +
  geom_line(size = 1) +
  labs(
    color = "Hardwood",
    y = "Empirical Cumulative Distribution "
  ) +
  theme_bw() +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 12))+geom_line()

grafico_ecdf