How to use RSDA 3.2.1

RSDA Package version 3.3

Oldemar Rodríguez R.

Installing the package

CRAN

install.packages("RSDA", dependencies=TRUE)

Github

devtools::install_github("PROMiDAT/RSDA")

How to read a Symbolic Table from a CSV file with RSDA?

ex3 <- read.sym.table(file = 'tsym1.csv', header=TRUE, sep=';',dec='.', row.names=1)
ex3
#> # A tibble: 7 × 7
#>      F1              F2      F3    F4        F5               F6              F7
#>   <dbl>      <symblc_n> <symbl> <dbl> <symblc_>       <symblc_n>      <symblc_n>
#> 1   2.8   [1.00 : 2.00]  <hist>   6       {a,d}   [0.00 : 90.00]  [9.00 : 24.00]
#> 2   1.4   [3.00 : 9.00]  <hist>   8     {b,c,d} [-90.00 : 98.00]  [-9.00 : 9.00]
#> 3   3.2  [-1.00 : 4.00]  <hist>  -7       {a,b}  [65.00 : 90.00] [65.00 : 70.00]
#> 4  -2.1   [0.00 : 2.00]  <hist>   0   {a,b,c,d}  [45.00 : 89.00] [25.00 : 67.00]
#> 5  -3   [-4.00 : -2.00]  <hist>  -9.5       {b}  [20.00 : 40.00]  [9.00 : 40.00]
#> 6   0.1 [10.00 : 21.00]  <hist>  -1       {a,d}    [5.00 : 8.00]   [5.00 : 8.00]
#> 7   9    [4.00 : 21.00]  <hist>   0.5       {a}    [3.14 : 6.76]   [4.00 : 6.00]

##How to save a Symbolic Table in a CSV file with RSDA?

write.sym.table(ex3, file = 'tsymtemp.csv', sep = ';',dec = '.',
                row.names = TRUE, col.names = TRUE)

Symbolic Data Frame Example in RSDA

data(example3)
example3
#> # A tibble: 7 × 7
#>      F1              F2                      F3    F4        F5               F6
#>   <dbl>      <symblc_n>              <symblc_m> <dbl> <symblc_>       <symblc_n>
#> 1   2.8   [1.00 : 2.00] M1:0.10 M2:0.70 M3:0.20   6   {e,g,i,k}   [0.00 : 90.00]
#> 2   1.4   [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10   8   {a,b,c,d} [-90.00 : 98.00]
#> 3   3.2  [-1.00 : 4.00] M1:0.20 M2:0.20 M3:0.60  -7   {2,b,1,c}  [65.00 : 90.00]
#> 4  -2.1   [0.00 : 2.00] M1:0.90 M2:0.00 M3:0.10   0   {a,3,4,c}  [45.00 : 89.00]
#> 5  -3   [-4.00 : -2.00] M1:0.60 M2:0.00 M3:0.40  -9.5 {e,g,i,k}  [20.00 : 40.00]
#> 6   0.1 [10.00 : 21.00] M1:0.00 M2:0.70 M3:0.30  -1     {e,1,i}    [5.00 : 8.00]
#> 7   9    [4.00 : 21.00] M1:0.20 M2:0.20 M3:0.60   0.5   {e,a,2}    [3.14 : 6.76]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[2,]
#> # A tibble: 1 × 7
#>      F1            F2                      F3    F4         F5               F6
#>   <dbl>    <symblc_n>              <symblc_m> <dbl> <symblc_s>       <symblc_n>
#> 1   1.4 [3.00 : 9.00] M1:0.60 M2:0.30 M3:0.10     8  {a,b,c,d} [-90.00 : 98.00]
#> # ℹ 1 more variable: F7 <symblc_n>
example3[,3]
#> # A tibble: 7 × 1
#>                        F3
#>                <symblc_m>
#> 1 M1:0.10 M2:0.70 M3:0.20
#> 2 M1:0.60 M2:0.30 M3:0.10
#> 3 M1:0.20 M2:0.20 M3:0.60
#> 4 M1:0.90 M2:0.00 M3:0.10
#> 5 M1:0.60 M2:0.00 M3:0.40
#> 6 M1:0.00 M2:0.70 M3:0.30
#> 7 M1:0.20 M2:0.20 M3:0.60
example3[2:3,5]
#> # A tibble: 2 × 1
#>           F5
#>   <symblc_s>
#> 1  {a,b,c,d}
#> 2  {2,b,1,c}
example3$F1
#> [1]  2.8  1.4  3.2 -2.1 -3.0  0.1  9.0

How to generated a symbolic data table from a classic data table in RSDA?

data(ex1_db2so)
ex1_db2so
#>         state sex county group age
#> 1     Florida   M      2     6   3
#> 2  California   F      4     3   4
#> 3       Texas   M     12     3   4
#> 4     Florida   F      2     3   4
#> 5       Texas   M      4     6   4
#> 6       Texas   F      2     3   3
#> 7     Florida   M      6     3   4
#> 8     Florida   F      2     6   4
#> 9  California   M      2     3   6
#> 10 California   F     21     3   4
#> 11 California   M      2     3   4
#> 12 California   M      2     6   7
#> 13      Texas   F     23     3   4
#> 14    Florida   M      2     3   4
#> 15    Florida   F     12     7   4
#> 16      Texas   M      2     3   8
#> 17 California   F      3     7   9
#> 18 California   M      2     3  11
#> 19 California   M      1     3  11

The classic.to.sym function allows to convert a traditional table into a symbolic one, to this we must indicate the following parameters.

  • x = a data.frame
  • concept = variables to be used as a concept
  • variables = variables to be used, conceptible with tidyselect options
  • default.numeric = function that will be used by default for numerical values (sym.interval)
  • default.categorical = functions to be used by default for categorical values (sym.model)

Example 1

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c(state, sex),
                         variables = c(county, group, age))
result
#> # A tibble: 6 × 3
#>           county         group            age
#>       <symblc_n>    <symblc_n>     <symblc_n>
#> 1 [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3 [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5 [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6 [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

We can add new variables indicating the type we want them to be.

result <- classic.to.sym(x = ex1_db2so, 
                         concept = c("state", "sex"),
                         variables = c(county, group, age),
                         age_hist = sym.histogram(age, breaks = pretty(ex1_db2so$age, 5)))
result
#> # A tibble: 6 × 4
#>     age_hist         county         group            age
#>   <symblc_h>     <symblc_n>    <symblc_n>     <symblc_n>
#> 1     <hist> [3.00 : 21.00] [3.00 : 7.00]  [4.00 : 9.00]
#> 2     <hist>  [1.00 : 2.00] [3.00 : 6.00] [4.00 : 11.00]
#> 3     <hist> [2.00 : 12.00] [3.00 : 7.00]  [4.00 : 4.00]
#> 4     <hist>  [2.00 : 6.00] [3.00 : 6.00]  [3.00 : 4.00]
#> 5     <hist> [2.00 : 23.00] [3.00 : 3.00]  [3.00 : 4.00]
#> 6     <hist> [2.00 : 12.00] [3.00 : 6.00]  [4.00 : 8.00]

Example 2

data(USCrime)
head(USCrime)
#>   state fold population householdsize racepctblack racePctWhite racePctAsian
#> 1     8    1       0.19          0.33         0.02         0.90         0.12
#> 2    53    1       0.00          0.16         0.12         0.74         0.45
#> 3    24    1       0.00          0.42         0.49         0.56         0.17
#> 4    34    1       0.04          0.77         1.00         0.08         0.12
#> 5    42    1       0.01          0.55         0.02         0.95         0.09
#> 6     6    1       0.02          0.28         0.06         0.54         1.00
#>   racePctHisp agePct12t21 agePct12t29 agePct16t24 agePct65up numbUrban pctUrban
#> 1        0.17        0.34        0.47        0.29       0.32      0.20      1.0
#> 2        0.07        0.26        0.59        0.35       0.27      0.02      1.0
#> 3        0.04        0.39        0.47        0.28       0.32      0.00      0.0
#> 4        0.10        0.51        0.50        0.34       0.21      0.06      1.0
#> 5        0.05        0.38        0.38        0.23       0.36      0.02      0.9
#> 6        0.25        0.31        0.48        0.27       0.37      0.04      1.0
#>   medIncome pctWWage pctWFarmSelf pctWInvInc pctWSocSec pctWPubAsst pctWRetire
#> 1      0.37     0.72         0.34       0.60       0.29        0.15       0.43
#> 2      0.31     0.72         0.11       0.45       0.25        0.29       0.39
#> 3      0.30     0.58         0.19       0.39       0.38        0.40       0.84
#> 4      0.58     0.89         0.21       0.43       0.36        0.20       0.82
#> 5      0.50     0.72         0.16       0.68       0.44        0.11       0.71
#> 6      0.52     0.68         0.20       0.61       0.28        0.15       0.25
#>   medFamInc perCapInc whitePerCap blackPerCap indianPerCap AsianPerCap
#> 1      0.39      0.40        0.39        0.32         0.27        0.27
#> 2      0.29      0.37        0.38        0.33         0.16        0.30
#> 3      0.28      0.27        0.29        0.27         0.07        0.29
#> 4      0.51      0.36        0.40        0.39         0.16        0.25
#> 5      0.46      0.43        0.41        0.28         0.00        0.74
#> 6      0.62      0.72        0.76        0.77         0.28        0.52
#>   OtherPerCap HispPerCap NumUnderPov PctPopUnderPov PctLess9thGrade
#> 1        0.36       0.41        0.08           0.19            0.10
#> 2        0.22       0.35        0.01           0.24            0.14
#> 3        0.28       0.39        0.01           0.27            0.27
#> 4        0.36       0.44        0.01           0.10            0.09
#> 5        0.51       0.48        0.00           0.06            0.25
#> 6        0.48       0.60        0.01           0.12            0.13
#>   PctNotHSGrad PctBSorMore PctUnemployed PctEmploy PctEmplManu PctEmplProfServ
#> 1         0.18        0.48          0.27      0.68        0.23            0.41
#> 2         0.24        0.30          0.27      0.73        0.57            0.15
#> 3         0.43        0.19          0.36      0.58        0.32            0.29
#> 4         0.25        0.31          0.33      0.71        0.36            0.45
#> 5         0.30        0.33          0.12      0.65        0.67            0.38
#> 6         0.12        0.80          0.10      0.65        0.19            0.77
#>   PctOccupManu PctOccupMgmtProf MalePctDivorce MalePctNevMarr FemalePctDiv
#> 1         0.25             0.52           0.68           0.40         0.75
#> 2         0.42             0.36           1.00           0.63         0.91
#> 3         0.49             0.32           0.63           0.41         0.71
#> 4         0.37             0.39           0.34           0.45         0.49
#> 5         0.42             0.46           0.22           0.27         0.20
#> 6         0.06             0.91           0.49           0.57         0.61
#>   TotalPctDiv PersPerFam PctFam2Par PctKids2Par PctYoungKids2Par PctTeen2Par
#> 1        0.75       0.35       0.55        0.59             0.61        0.56
#> 2        1.00       0.29       0.43        0.47             0.60        0.39
#> 3        0.70       0.45       0.42        0.44             0.43        0.43
#> 4        0.44       0.75       0.65        0.54             0.83        0.65
#> 5        0.21       0.51       0.91        0.91             0.89        0.85
#> 6        0.58       0.44       0.62        0.69             0.87        0.53
#>   PctWorkMomYoungKids PctWorkMom NumIlleg PctIlleg NumImmig PctImmigRecent
#> 1                0.74       0.76     0.04     0.14     0.03           0.24
#> 2                0.46       0.53     0.00     0.24     0.01           0.52
#> 3                0.71       0.67     0.01     0.46     0.00           0.07
#> 4                0.85       0.86     0.03     0.33     0.02           0.11
#> 5                0.40       0.60     0.00     0.06     0.00           0.03
#> 6                0.30       0.43     0.00     0.11     0.04           0.30
#>   PctImmigRec5 PctImmigRec8 PctImmigRec10 PctRecentImmig PctRecImmig5
#> 1         0.27         0.37          0.39           0.07         0.07
#> 2         0.62         0.64          0.63           0.25         0.27
#> 3         0.06         0.15          0.19           0.02         0.02
#> 4         0.20         0.30          0.31           0.05         0.08
#> 5         0.07         0.20          0.27           0.01         0.02
#> 6         0.35         0.43          0.47           0.50         0.50
#>   PctRecImmig8 PctRecImmig10 PctSpeakEnglOnly PctNotSpeakEnglWell
#> 1         0.08          0.08             0.89                0.06
#> 2         0.25          0.23             0.84                0.10
#> 3         0.04          0.05             0.88                0.04
#> 4         0.11          0.11             0.81                0.08
#> 5         0.04          0.05             0.88                0.05
#> 6         0.56          0.57             0.45                0.28
#>   PctLargHouseFam PctLargHouseOccup PersPerOccupHous PersPerOwnOccHous
#> 1            0.14              0.13             0.33              0.39
#> 2            0.16              0.10             0.17              0.29
#> 3            0.20              0.20             0.46              0.52
#> 4            0.56              0.62             0.85              0.77
#> 5            0.16              0.19             0.59              0.60
#> 6            0.25              0.19             0.29              0.53
#>   PersPerRentOccHous PctPersOwnOccup PctPersDenseHous PctHousLess3BR MedNumBR
#> 1               0.28            0.55             0.09           0.51      0.5
#> 2               0.17            0.26             0.20           0.82      0.0
#> 3               0.43            0.42             0.15           0.51      0.5
#> 4               1.00            0.94             0.12           0.01      0.5
#> 5               0.37            0.89             0.02           0.19      0.5
#> 6               0.18            0.39             0.26           0.73      0.0
#>   HousVacant PctHousOccup PctHousOwnOcc PctVacantBoarded PctVacMore6Mos
#> 1       0.21         0.71          0.52             0.05           0.26
#> 2       0.02         0.79          0.24             0.02           0.25
#> 3       0.01         0.86          0.41             0.29           0.30
#> 4       0.01         0.97          0.96             0.60           0.47
#> 5       0.01         0.89          0.87             0.04           0.55
#> 6       0.02         0.84          0.30             0.16           0.28
#>   MedYrHousBuilt PctHousNoPhone PctWOFullPlumb OwnOccLowQuart OwnOccMedVal
#> 1           0.65           0.14           0.06           0.22         0.19
#> 2           0.65           0.16           0.00           0.21         0.20
#> 3           0.52           0.47           0.45           0.18         0.17
#> 4           0.52           0.11           0.11           0.24         0.21
#> 5           0.73           0.05           0.14           0.31         0.31
#> 6           0.25           0.02           0.05           0.94         1.00
#>   OwnOccHiQuart RentLowQ RentMedian RentHighQ MedRent MedRentPctHousInc
#> 1          0.18     0.36       0.35      0.38    0.34              0.38
#> 2          0.21     0.42       0.38      0.40    0.37              0.29
#> 3          0.16     0.27       0.29      0.27    0.31              0.48
#> 4          0.19     0.75       0.70      0.77    0.89              0.63
#> 5          0.30     0.40       0.36      0.38    0.38              0.22
#> 6          1.00     0.67       0.63      0.68    0.62              0.47
#>   MedOwnCostPctInc MedOwnCostPctIncNoMtg NumInShelters NumStreet PctForeignBorn
#> 1             0.46                  0.25          0.04         0           0.12
#> 2             0.32                  0.18          0.00         0           0.21
#> 3             0.39                  0.28          0.00         0           0.14
#> 4             0.51                  0.47          0.00         0           0.19
#> 5             0.51                  0.21          0.00         0           0.11
#> 6             0.59                  0.11          0.00         0           0.70
#>   PctBornSameState PctSameHouse85 PctSameCity85 PctSameState85 LandArea PopDens
#> 1             0.42           0.50          0.51           0.64     0.12    0.26
#> 2             0.50           0.34          0.60           0.52     0.02    0.12
#> 3             0.49           0.54          0.67           0.56     0.01    0.21
#> 4             0.30           0.73          0.64           0.65     0.02    0.39
#> 5             0.72           0.64          0.61           0.53     0.04    0.09
#> 6             0.42           0.49          0.73           0.64     0.01    0.58
#>   PctUsePubTrans LemasPctOfficDrugUn ViolentCrimesPerPop
#> 1           0.20                0.32                0.20
#> 2           0.45                0.00                0.67
#> 3           0.02                0.00                0.43
#> 4           0.28                0.00                0.12
#> 5           0.02                0.00                0.03
#> 6           0.10                0.00                0.14
result  <- classic.to.sym(x = USCrime,
                          concept = state, 
                          variables= c(NumInShelters,
                                       NumImmig,
                                       ViolentCrimesPerPop),
                          ViolentCrimesPerPop_hist = sym.histogram(ViolentCrimesPerPop,
                                                                   breaks = pretty(USCrime$ViolentCrimesPerPop,5)))
result
#> # A tibble: 46 × 4
#>    ViolentCrimesPerPop_hist NumInShelters      NumImmig ViolentCrimesPerPop
#>                  <symblc_h>    <symblc_n>    <symblc_n>          <symblc_n>
#>  1                   <hist> [0.00 : 0.32] [0.00 : 0.04]       [0.01 : 1.00]
#>  2                   <hist> [0.01 : 0.18] [0.01 : 0.09]       [0.05 : 0.36]
#>  3                   <hist> [0.00 : 1.00] [0.00 : 0.57]       [0.05 : 0.57]
#>  4                   <hist> [0.00 : 0.08] [0.00 : 0.02]       [0.02 : 1.00]
#>  5                   <hist> [0.00 : 1.00] [0.00 : 1.00]       [0.01 : 1.00]
#>  6                   <hist> [0.00 : 0.68] [0.00 : 0.23]       [0.07 : 0.75]
#>  7                   <hist> [0.00 : 0.79] [0.00 : 0.14]       [0.00 : 0.94]
#>  8                   <hist> [0.01 : 0.01] [0.01 : 0.01]       [0.37 : 0.37]
#>  9                   <hist> [1.00 : 1.00] [0.39 : 0.39]       [1.00 : 1.00]
#> 10                   <hist> [0.00 : 0.52] [0.00 : 1.00]       [0.06 : 1.00]
#> # ℹ 36 more rows

Example 3

data("ex_mcfa1") 
head(ex_mcfa1)
#>   suspect age     hair    eyes    region
#> 1       1  42    h_red e_brown     Bronx
#> 2       2  20  h_black e_green     Bronx
#> 3       3  64  h_brown e_brown  Brooklyn
#> 4       4  55 h_blonde e_brown     Bronx
#> 5       5   4  h_brown e_green Manhattan
#> 6       6  61 h_blonde e_green     Bronx
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            variables=c(hair,
                                        eyes,
                                        region),
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 3
#>                  hair              eyes               region
#>            <symblc_s>        <symblc_s>           <symblc_s>
#>  1            {h_red} {e_brown,e_black}              {Bronx}
#>  2 {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8 {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Example 4

We can modify the function that will be applied by default to the categorical variables

sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect,
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows

Converting a SODAS 1.0 *.SDS files to RSDA files

hani3101 <- SDS.to.RSDA(file.path = "hani3101.sds")
#> Preprocessing file
#> Converting data to JSON format
#> Processing variable 1: R3101
#> Processing variable 2: RNINO12
#> Processing variable 3: RNINO3
#> Processing variable 4: RNINO4
#> Processing variable 5: RNINO34
#> Processing variable 6: RSOI
hani3101
#> # A tibble: 32 × 6
#>                             R3101                 RNINO12
#>                        <symblc_m>              <symblc_m>
#>  1 X2:0.21 X4:0.18 X3:0.15 X5:... X1:0.17 X2:0.83 X3:0.00
#>  2 X2:0.30 X4:0.14 X3:0.19 X5:... X1:0.00 X2:0.25 X3:0.75
#>  3 X2:0.16 X4:0.12 X3:0.20 X5:... X1:0.67 X2:0.33 X3:0.00
#>  4 X2:0.13 X4:0.15 X3:0.22 X5:... X1:0.17 X2:0.83 X3:0.00
#>  5 X2:0.14 X4:0.14 X3:0.18 X5:... X1:0.42 X2:0.58 X3:0.00
#>  6 X2:0.26 X4:0.06 X3:0.23 X5:... X1:0.00 X2:0.67 X3:0.33
#>  7 X2:0.28 X4:0.14 X3:0.10 X5:... X1:0.00 X2:1.00 X3:0.00
#>  8 X2:0.25 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#>  9 X2:0.20 X4:0.15 X3:0.19 X5:... X1:0.00 X2:1.00 X3:0.00
#> 10 X2:0.21 X4:0.16 X3:0.31 X5:... X1:0.08 X2:0.92 X3:0.00
#> # ℹ 22 more rows
#> # ℹ 4 more variables: RNINO3 <symblc_m>, RNINO4 <symblc_m>, RNINO34 <symblc_m>,
#> #   RSOI <symblc_m>
# We can save the file in CSV to RSDA format as follows:
write.sym.table(hani3101,
                file='hani3101.csv',
                sep=';',
                dec='.',
                row.names=TRUE,
                col.names=TRUE)

Converting a SODAS 2.0 *.XML files to RSDA files

abalone <- SODAS.to.RSDA("abalone.xml")
#> Processing variable 1: LENGTH
#> Processing variable 2: DIAMETER
#> Processing variable 3: HEIGHT
#> Processing variable 4: WHOLE_WEIGHT
#> Processing variable 5: SHUCKED_WEIGHT
#> Processing variable 6: VISCERA_WEIGHT
#> Processing variable 7: SHELL_WEIGHT
abalone
#> # A tibble: 24 × 7
#>           LENGTH      DIAMETER        HEIGHT  WHOLE_WEIGHT SHUCKED_WEIGHT
#>       <symblc_n>    <symblc_n>    <symblc_n>    <symblc_n>     <symblc_n>
#>  1 [0.28 : 0.66] [0.20 : 0.48] [0.07 : 0.18] [0.08 : 1.37]  [0.03 : 0.64]
#>  2 [0.30 : 0.74] [0.22 : 0.58] [0.02 : 1.13] [0.15 : 2.25]  [0.06 : 1.16]
#>  3 [0.34 : 0.78] [0.26 : 0.63] [0.06 : 0.23] [0.20 : 2.66]  [0.07 : 1.49]
#>  4 [0.39 : 0.82] [0.30 : 0.65] [0.10 : 0.25] [0.26 : 2.51]  [0.11 : 1.23]
#>  5 [0.40 : 0.74] [0.32 : 0.60] [0.10 : 0.24] [0.35 : 2.20]  [0.12 : 0.84]
#>  6 [0.45 : 0.80] [0.38 : 0.63] [0.14 : 0.22] [0.64 : 2.53]  [0.16 : 0.93]
#>  7 [0.49 : 0.72] [0.36 : 0.58] [0.12 : 0.21] [0.68 : 2.12]  [0.16 : 0.82]
#>  8 [0.55 : 0.70] [0.46 : 0.58] [0.18 : 0.22] [1.21 : 1.81]  [0.32 : 0.71]
#>  9 [0.08 : 0.24] [0.06 : 0.18] [0.01 : 0.06] [0.00 : 0.07]  [0.00 : 0.03]
#> 10 [0.13 : 0.58] [0.10 : 0.45] [0.00 : 0.15] [0.01 : 0.89]  [0.00 : 0.50]
#> # ℹ 14 more rows
#> # ℹ 2 more variables: VISCERA_WEIGHT <symblc_n>, SHELL_WEIGHT <symblc_n>
write.sym.table(abalone,
                file='abalone.csv',
                sep=';',
                dec='.',
                row.names = TRUE,
                col.names = TRUE)

Basic statistics

Symbolic Mean

data(example3)
mean(example3$F1)
#> [1] 1.628571
mean(example3[,1])
#> [1] 1.628571
mean(example3$F2)
#> [1] 5
mean(example3[,2])
#> [1] 5
mean(example3$F2,method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]
mean(example3[,2],method = "interval")
#> <symbolic_interval[1]>
#> [1] [1.86 : 8.14]

Symbolic median

median(example3$F1)
#> [1] 1.4
median(example3[,1])
#> [1] 1.4
median(example3$F2)
#> [1] 1.5
median(example3[,2])
#> [1] 1.5
median(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]
median(example3[,6], method = 'interval')
#> <symbolic_interval[1]>
#> [1] [5.00 : 89.00]

Variance and standard deviation

var(example3[,1])
#> [1] 15.98238
var(example3[,2])
#> [1] 90.66667
var(example3$F6)
#> [1] 1872.358
var(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [2,408.97 : 1,670.51]
var(example3$F6, method = 'billard')
#> [1] 1355.143
sd(example3$F1)
#> [1] 3.997797
sd(example3$F2)
#> [1] 6.733003
sd(example3$F6)
#> [1] 30.59704
sd(example3$F6, method = 'interval')
#> <symbolic_interval[1]>
#> [1] [49.08 : 40.87]
sd(example3$F6, method = 'billard')
#> [1] 36.81226

Symbolic correlation

cor(example3$F1, example3$F4)
#> [1] 0.2864553
cor(example3[,1], example3[,4])
#>           [,1]
#> [1,] 0.2864553
cor(example3$F2, example3$F6, method = 'centers')
#> [1] -0.6693648
cor(example3$F2, example3$F6, method = 'billard')
#> [1] -0.6020041

Radar plot for intervals

library(ggpolypath)
#> Loading required package: ggplot2

data(oils)
oils <- RSDA:::to.v3(RSDA:::to.v2(oils))
sym.radar.plot(oils[2:3,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 20 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.

sym.radar.plot(oils[2:5,])
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
#> the caller; using TRUE
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0, label = round(min(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.25, label = inverse.rescale(0.25, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.5, label = inverse.rescale(0.5, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 0.75, label = inverse.rescale(0.75, : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.
#> Warning in ggplot2::geom_text(ggplot2::aes(x = 0.5, y = 1, label = round(max(real.value), : All aesthetics have length 1, but the data has 40 rows.
#> ℹ Please consider using `annotate()` or provide this layer with data containing
#>   a single row.


res <- interval.histogram.plot(oils[,2],
                               n.bins = 4,
                               col = c(2,3,4,5))

res
#> $frequency
#> [1] 25 49  1 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1
#> [4,]  4.3

res <- interval.histogram.plot(oils[,3],
                               n.bins = 3,
                               main = "Histogram",
                               col = c(2, 3, 4))

res
#> $frequency
#> [1] 50 25 25
#> 
#> $histogram
#>      [,1]
#> [1,]  0.7
#> [2,]  1.9
#> [3,]  3.1

Distances for intervals

Gowda-Diday

data("oils")
DM <- sym.dist.interval(sym.data = oils[,1:4],
                        method = "Gowda.Diday")
model <- hclust(DM)
plot(model, hang = -1)

Ichino

DM <- sym.dist.interval(sym.data= oils[,1:4],
                        method = "Ichino")
model <- hclust(DM)
plot(model, hang = -1)

Hausdorff

DM <- sym.dist.interval(sym.data = oils[,c(1,2,4)],
                        gamma = 0.5,
                        method = "Hausdorff",
                        normalize = FALSE,
                        SpanNormalize = TRUE,
                        euclidea = TRUE,
                        q = 2)
model <- hclust(DM)
plot(model, hang = -1)

Linear regression for intervals

Training

data(int_prost_train)
data(int_prost_test)
res.cm <- sym.lm(formula = lpsa~., sym.data = int_prost_train, method = 'cm')
res.cm
#> 
#> Call:
#> stats::lm(formula = formula, data = centers)
#> 
#> Coefficients:
#> (Intercept)       lcavol      lweight          age         lbph          svi  
#>    0.411537     0.579327     0.614128    -0.018659     0.143918     0.730937  
#>         lcp      gleason        pgg45  
#>   -0.205536    -0.030924     0.009507

Prediction

pred.cm <- sym.predict(model = res.cm, new.sym.data = int_prost_test)

Testing

RMSE.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7229999
RMSE.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.7192467
R2.L(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.501419
R2.U(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.5058389
deter.coefficient(int_prost_test$lpsa, pred.cm$Fitted)
#> [1] 0.4962964

LASSO regression for intervals

data(int_prost_train)
data(int_prost_test)

Training

res.cm.lasso <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 1,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.lasso <- sym.predict(res.cm.lasso,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.lasso)

plot(res.cm.lasso$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa,pred.cm.lasso)
#> [1] 0.6998882
RMSE.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.6966652
R2.L(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.533294
R2.U(int_prost_test$lpsa,pred.cm.lasso) 
#> [1] 0.5370762
deter.coefficient(int_prost_test$lpsa, pred.cm.lasso)
#> [1] 0.4887537

RIDGE regression for intervals

Training

data(int_prost_train)
data(int_prost_test)

res.cm.ridge <- sym.glm(sym.data = int_prost_train,
                        response = 9,
                        method = 'cm',
                        alpha = 0,
                        nfolds = 10,
                        grouped = TRUE)

Prediction

pred.cm.ridge <- sym.predict(res.cm.ridge,
                             response = 9,
                             int_prost_test,
                             method = 'cm')

Testing

plot(res.cm.ridge)

plot(res.cm.ridge$glmnet.fit, "lambda", label=TRUE)

RMSE.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.6996928
RMSE.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.6967989
R2.L(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.534621
R2.U(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.5379673
deter.coefficient(int_prost_test$lpsa, pred.cm.ridge)
#> [1] 0.4722093

PCA for intervals

Example 1

data("oils")
res <- sym.pca(oils,'centers')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 2

res <- sym.pca(oils,'tops')
plot(res, choix = "ind")

Example 3

res <- sym.pca(oils, 'principal.curves')
plot(res, choix = "ind")

Example 4

res <- sym.pca(oils,'optimized.distance')
plot(res, choix = "ind")

plot(res, choix = "var")

Example 5

res <- sym.pca(oils,'optimized.variance')
plot(res, choix = "ind")

plot(res, choix = "var")

Symbolic Multiple Correspondence Analysis

Example 1

data("ex_mcfa1") 
ex_mcfa1
#>     suspect age     hair    eyes    region
#> 1         1  42    h_red e_brown     Bronx
#> 2         2  20  h_black e_green     Bronx
#> 3         3  64  h_brown e_brown  Brooklyn
#> 4         4  55 h_blonde e_brown     Bronx
#> 5         5   4  h_brown e_green Manhattan
#> 6         6  61 h_blonde e_green     Bronx
#> 7         7  61  h_white e_black    Queens
#> 8         8  32 h_blonde e_brown Manhattan
#> 9         9  39 h_blonde e_black  Brooklyn
#> 10       10  50  h_brown e_brown Manhattan
#> 11       11  41    h_red  e_blue Manhattan
#> 12       12  35 h_blonde e_green  Brooklyn
#> 13       13  56 h_blonde e_brown     Bronx
#> 14       14  52    h_red e_brown    Queens
#> 15       15  55    h_red e_green  Brooklyn
#> 16       16  25  h_brown e_brown    Queens
#> 17       17  52 h_blonde e_brown  Brooklyn
#> 18       18  28    h_red e_brown Manhattan
#> 19       19  21  h_white  e_blue Manhattan
#> 20       20  66  h_black e_black  Brooklyn
#> 21       21  67 h_blonde e_brown    Queens
#> 22       22  13  h_white  e_blue  Brooklyn
#> 23       23  39  h_brown e_green Manhattan
#> 24       24  47  h_black e_green  Brooklyn
#> 25       25  54 h_blonde e_brown     Bronx
#> 26       26  75  h_brown  e_blue  Brooklyn
#> 27       27   3  h_white e_green Manhattan
#> 28       28  40  h_white e_green Manhattan
#> 29       29  58    h_red  e_blue    Queens
#> 30       30  41  h_brown e_green     Bronx
#> 31       31  25  h_white e_black  Brooklyn
#> 32       32  75 h_blonde  e_blue Manhattan
#> 33       33  58  h_white e_brown     Bronx
#> 34       34  61  h_white e_brown Manhattan
#> 35       35  52  h_white  e_blue     Bronx
#> 36       36  19    h_red e_black    Queens
#> 37       37  58    h_red e_black     Bronx
#> 38       38  46  h_black e_green Manhattan
#> 39       39  74  h_brown e_black Manhattan
#> 40       40  26 h_blonde e_brown  Brooklyn
#> 41       41  63 h_blonde  e_blue    Queens
#> 42       42  40  h_brown e_black    Queens
#> 43       43  65  h_black e_brown  Brooklyn
#> 44       44  51 h_blonde e_brown  Brooklyn
#> 45       45  15  h_white e_black  Brooklyn
#> 46       46  32 h_blonde e_brown     Bronx
#> 47       47  68  h_white e_black Manhattan
#> 48       48  51  h_white e_black    Queens
#> 49       49  14    h_red e_green    Queens
#> 50       50  72  h_white e_brown  Brooklyn
#> 51       51   7    h_red  e_blue  Brooklyn
#> 52       52  22    h_red e_brown     Bronx
#> 53       53  52    h_red e_brown  Brooklyn
#> 54       54  62  h_brown e_green     Bronx
#> 55       55  41  h_black e_brown    Queens
#> 56       56  32  h_black e_black Manhattan
#> 57       57  58  h_brown e_brown    Queens
#> 58       58  25  h_black e_brown    Queens
#> 59       59  70 h_blonde e_green  Brooklyn
#> 60       60  64  h_brown  e_blue    Queens
#> 61       61  25  h_white  e_blue     Bronx
#> 62       62  42  h_black e_black  Brooklyn
#> 63       63  56    h_red e_black  Brooklyn
#> 64       64  41 h_blonde e_black  Brooklyn
#> 65       65   8  h_white e_black Manhattan
#> 66       66   7  h_black e_green  Brooklyn
#> 67       67  42  h_white e_brown    Queens
#> 68       68  10  h_white  e_blue Manhattan
#> 69       69  60  h_brown e_black     Bronx
#> 70       70  52 h_blonde e_brown  Brooklyn
#> 71       71  39  h_brown  e_blue Manhattan
#> 72       72  69  h_brown e_green    Queens
#> 73       73  67 h_blonde e_green Manhattan
#> 74       74  46    h_red e_black  Brooklyn
#> 75       75  72  h_black e_black    Queens
#> 76       76  66    h_red  e_blue    Queens
#> 77       77   4  h_black  e_blue Manhattan
#> 78       78  62  h_black e_green  Brooklyn
#> 79       79  10 h_blonde  e_blue     Bronx
#> 80       80  16 h_blonde e_black Manhattan
#> 81       81  59 h_blonde e_brown     Bronx
#> 82       82  63 h_blonde  e_blue Manhattan
#> 83       83  54    h_red  e_blue    Queens
#> 84       84  14  h_brown  e_blue  Brooklyn
#> 85       85  48  h_black e_green Manhattan
#> 86       86  59 h_blonde e_black     Bronx
#> 87       87  73 h_blonde e_black     Bronx
#> 88       88  51  h_brown e_brown     Bronx
#> 89       89  14  h_white e_black     Bronx
#> 90       90  58 h_blonde e_black    Queens
#> 91       91  56    h_red e_green Manhattan
#> 92       92  26    h_red  e_blue  Brooklyn
#> 93       93  59  h_brown e_black Manhattan
#> 94       94  27  h_white e_green Manhattan
#> 95       95  38  h_black e_green Manhattan
#> 96       96   5 h_blonde e_green     Bronx
#> 97       97  14  h_black  e_blue    Queens
#> 98       98  13  h_black e_brown Manhattan
#> 99       99  54  h_white  e_blue  Brooklyn
#> 100     100  66  h_white e_green Manhattan
#> 101       1  22    h_red e_black     Bronx
#> 102       2  57 h_blonde e_black Manhattan
#> 103       3  29  h_white e_green    Queens
#> 104       4  14 h_blonde e_black Manhattan
#> 105       5  47    h_red e_green     Bronx
#> 106       6  32  h_white  e_blue    Queens
#> 107       7  49    h_red  e_blue     Bronx
#> 108       8   8  h_white e_black  Brooklyn
#> 109       9  67  h_white e_brown     Bronx
#> 110      10  68  h_black e_green     Bronx
#> 111      11  15  h_black e_brown Manhattan
#> 112      12  46  h_white e_brown     Bronx
#> 113      13  68  h_white e_black Manhattan
#> 114      14  55 h_blonde  e_blue Manhattan
#> 115      15   7  h_white e_green     Bronx
#> 116      16  10  h_black e_brown  Brooklyn
#> 117      17  49    h_red  e_blue Manhattan
#> 118      18  12  h_brown  e_blue  Brooklyn
#> 119      19  41  h_white  e_blue     Bronx
#> 120      20  10  h_brown  e_blue     Bronx
#> 121      21  12  h_white e_green Manhattan
#> 122      22  53  h_white  e_blue Manhattan
#> 123      23   5  h_black e_black Manhattan
#> 124      24  46  h_brown e_black    Queens
#> 125      25  14  h_brown e_black    Queens
#> 126      26  55  h_white e_green  Brooklyn
#> 127      27  53    h_red e_brown Manhattan
#> 128      28  31  h_black e_brown Manhattan
#> 129      29  31 h_blonde e_brown    Queens
#> 130      30  55  h_brown e_black  Brooklyn
sym.table <- classic.to.sym(x = ex_mcfa1, 
                            concept = suspect, 
                            default.categorical = sym.set)
sym.table
#> # A tibble: 100 × 4
#>                age               hair              eyes               region
#>         <symblc_n>         <symblc_s>        <symblc_s>           <symblc_s>
#>  1 [22.00 : 42.00]            {h_red} {e_brown,e_black}              {Bronx}
#>  2 [20.00 : 57.00] {h_black,h_blonde} {e_green,e_black}    {Bronx,Manhattan}
#>  3 [29.00 : 64.00]  {h_brown,h_white} {e_brown,e_green}    {Brooklyn,Queens}
#>  4 [14.00 : 55.00]         {h_blonde} {e_brown,e_black}    {Bronx,Manhattan}
#>  5  [4.00 : 47.00]    {h_brown,h_red}         {e_green}    {Manhattan,Bronx}
#>  6 [32.00 : 61.00] {h_blonde,h_white}  {e_green,e_blue}       {Bronx,Queens}
#>  7 [49.00 : 61.00]    {h_white,h_red}  {e_black,e_blue}       {Queens,Bronx}
#>  8  [8.00 : 32.00] {h_blonde,h_white} {e_brown,e_black} {Manhattan,Brooklyn}
#>  9 [39.00 : 67.00] {h_blonde,h_white} {e_black,e_brown}     {Brooklyn,Bronx}
#> 10 [50.00 : 68.00]  {h_brown,h_black} {e_brown,e_green}    {Manhattan,Bronx}
#> # ℹ 90 more rows
res <- sym.mcfa(sym.table, c(2,3))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3))

res <- sym.mcfa(sym.table, c(2,3,4))
mcfa.scatterplot(res[,2], res[,3], sym.data = sym.table, pos.var = c(2,3,4))

Symbolic UMAP

Ejemplo Oils

datos <- oils
datos
#> # A tibble: 8 × 4
#>             GRA               FRE               IOD               SAP
#> *    <symblc_n>        <symblc_n>        <symblc_n>        <symblc_n>
#> 1 [0.93 : 0.94] [-27.00 : -18.00] [170.00 : 204.00] [118.00 : 196.00]
#> 2 [0.93 : 0.94]   [-5.00 : -4.00] [192.00 : 208.00] [188.00 : 197.00]
#> 3 [0.92 : 0.92]   [-6.00 : -1.00]  [99.00 : 113.00] [189.00 : 198.00]
#> 4 [0.92 : 0.93]   [-6.00 : -4.00] [104.00 : 116.00] [187.00 : 193.00]
#> 5 [0.92 : 0.92] [-25.00 : -15.00]   [80.00 : 82.00] [189.00 : 193.00]
#> 6 [0.91 : 0.92]     [0.00 : 6.00]   [79.00 : 90.00] [187.00 : 196.00]
#> 7 [0.86 : 0.87]   [30.00 : 38.00]   [40.00 : 48.00] [190.00 : 199.00]
#> 8 [0.86 : 0.86]   [22.00 : 32.00]   [53.00 : 77.00] [190.00 : 202.00]
x <- sym.umap(datos)
x
#>              V1          V2          V3           V4
#> 1   -0.15459026 -6.47090399  8.50377018 -6.686813493
#> 2   -0.29031473 -6.33563285  8.64911436 -6.647808023
#> 3   -0.19885869 -6.42663180  8.54645568 -6.680111467
#> 4   -0.24142679 -6.38418054  8.59741275 -6.759887631
#> 5   -0.09842493 -6.52688481  8.43254233 -6.583837813
#> 6   -0.34158767 -6.28384961  8.67428096 -6.517706756
#> 7   -0.33854848 -6.28686722  8.67969187 -6.758823952
#> 8   -0.26770538 -6.35764306  8.60457876 -6.728718924
#> 9   -4.89097889 -2.20185004 -4.20159585 -5.026531973
#> 10  -4.74630514 -2.17973084 -4.18006815 -5.100371542
#> 11  -4.85724833 -2.07124079 -4.07623842 -5.081788003
#> 12  -4.68743934 -2.16148461 -4.11249129 -5.216458848
#> 13  -4.97426921 -2.26859348 -4.09472027 -4.861969023
#> 14  -4.96720145 -2.25011804 -4.13203685 -4.910821345
#> 15  -5.01011722 -2.25164997 -4.33711230 -5.013041826
#> 16  -4.76471724 -2.36954136 -4.26570433 -4.865507871
#> 17  -5.30891927 -1.50356034 -3.57243436 -5.246664286
#> 18  -5.17521355 -1.34908383 -3.65717628 -5.183917059
#> 19  -5.40373874 -1.30786374 -3.56635441 -5.070057610
#> 20  -5.21460203 -1.27824279 -3.78393460 -5.144073009
#> 21  -5.41567724 -1.36092221 -3.49980430 -5.032790196
#> 22  -5.35501159 -1.02378189 -3.78579067 -5.270017494
#> 23  -5.35324239 -1.22696787 -3.44479178 -4.927007053
#> 24  -5.45526025 -1.13106965 -3.63686995 -5.201280290
#> 25  -5.26738225 -1.63865737 -3.87278221 -5.152018227
#> 26  -5.40674612 -1.57022804 -4.04558842 -5.450423505
#> 27  -5.30950624 -1.67552234 -3.97576192 -5.192511815
#> 28  -5.59448989 -1.44671023 -4.02451286 -5.309971442
#> 29  -5.58244607 -1.68272816 -3.86572876 -5.439358035
#> 30  -5.63395709 -1.63530978 -4.09860350 -5.481166546
#> 31  -5.59471562 -1.72534385 -3.92191437 -5.445386331
#> 32  -5.68743456 -1.56953132 -3.94087107 -5.614123825
#> 33  -5.60740532  0.79346001  1.21001576  4.143416672
#> 34  -5.51650657  0.99662415  1.50886571  4.191259915
#> 35  -5.71300369  0.59178084  1.22622536  3.861680580
#> 36  -5.65314082  0.45151241  1.14168941  3.811896100
#> 37  -5.30388316  1.04579290  1.56267477  4.115689070
#> 38  -5.36228013  1.01717474  1.65696257  4.095940104
#> 39  -5.52028253  0.91712705  1.33783286  4.017407578
#> 40  -5.50242348  1.09299538  1.42818990  3.889458453
#> 41  -4.49043411  0.19390181  1.89643891  4.450410372
#> 42  -4.33340060  0.14590455  1.85398949  4.580533631
#> 43  -4.38213681 -0.34811543  1.70800796  4.343073081
#> 44  -4.31849292 -0.26587185  1.76806457  4.461311559
#> 45  -4.50607947  0.29435279  1.76983539  4.606143219
#> 46  -4.62691171  0.36131702  1.69678104  4.707409301
#> 47  -4.54834060  0.02698089  1.59920783  4.810279907
#> 48  -4.41381854  0.21339317  1.73670619  4.836658972
#> 49  -5.63696762  1.29670275  1.40073766  4.242572070
#> 50  -5.80599327  1.37971258  1.73391179  4.163685159
#> 51  -5.74834839  1.27735946  1.43870370  4.047844481
#> 52  -5.87659392  1.32604531  1.86165053  4.300720953
#> 53  -5.66301113  1.18915691  1.64967553  4.103660197
#> 54  -5.40432326  1.46408718  1.82461902  4.352841358
#> 55  -5.92481123  1.15349977  1.78045218  4.323304657
#> 56  -5.63769201  1.42901048  2.02075456  4.417365316
#> 57  -4.85272355  0.84591634  1.75115322  4.424146223
#> 58  -4.95758084  1.07531115  2.00971453  4.493433335
#> 59  -4.88524345  0.80674640  1.90690747  4.370137410
#> 60  -4.86311932  1.12742711  1.97923942  4.528024211
#> 61  -4.82902875  0.78857067  1.73121271  4.736534239
#> 62  -4.78160796  1.24221451  2.03285289  4.687108704
#> 63  -4.75665312  1.01371799  1.85590743  4.697464152
#> 64  -4.78053025  1.21788678  2.02612075  4.673653036
#> 65  -5.81498823 16.75265984 -1.11786411 -2.068297814
#> 66  -5.61165243 16.61136249 -0.95290165 -2.137548870
#> 67  -7.49069999 16.13198966 -2.78103236 -3.106854167
#> 68  -7.53932532 16.14226608 -2.88895729 -2.952842739
#> 69  -5.55292176 16.58526968 -0.82037517 -2.244252708
#> 70  -5.69059845 16.71453329 -1.02376326 -2.048331574
#> 71  -7.29046052 15.97108896 -2.70467118 -2.965078967
#> 72  -7.60638904 16.18508755 -2.95081321 -3.016409501
#> 73  -5.57130463 16.49164444 -0.88735448 -2.248651802
#> 74  -5.53303279 16.55210709 -0.87550886 -2.174379978
#> 75  -7.46117655 16.25751187 -2.81607916 -2.971205594
#> 76  -7.34985903 16.34548597 -2.59217102 -2.642958749
#> 77  -5.56956314 16.64620804 -0.93771176 -2.089406454
#> 78  -5.61979155 16.68353765 -0.98291174 -2.049995300
#> 79  -7.43290608 16.23350061 -2.76784757 -2.750028636
#> 80  -7.30440297 16.12266686 -2.64288948 -2.869126353
#> 81  -5.69861547 -0.15087843  1.14346666  3.533176802
#> 82  -5.86802501 -0.27238125  1.08553191  3.633885800
#> 83  -5.87701130 -0.33236040  1.02117098  3.723747559
#> 84  -5.88151480 -0.42055417  1.12805201  3.622069041
#> 85  -5.75939576  0.02425503  1.06209320  3.692479918
#> 86  -5.93957847  0.16476254  1.04895947  3.645395155
#> 87  -5.77623907 -0.12609876  0.83442648  3.489680426
#> 88  -5.82082976 -0.21330490  0.96030402  3.597751569
#> 89  -4.43754695 -0.83639306  1.53689474  4.026710612
#> 90  -4.34658504 -0.91491233  1.46497711  3.949656099
#> 91  -4.61622041 -1.06463121  1.37373428  3.788925345
#> 92  -4.60971800 -1.15619346  1.28025449  3.772218171
#> 93  -4.34241633 -0.70945638  1.65386642  4.095282730
#> 94  -4.36564359 -0.77550109  1.60304449  4.063926130
#> 95  -4.75199169 -0.93301709  1.48190239  3.703708641
#> 96  -4.67261991 -1.11435010  1.30626351  3.733837098
#> 97  14.87634839 -5.77569791 -0.69968712 -0.017718648
#> 98  14.98799901 -5.57359296 -0.55853274 -0.017438347
#> 99  14.70056912 -5.77204462 -0.38254840  0.194253090
#> 100 14.76983695 -5.62863845 -0.25645904  0.162055541
#> 101 14.84184512 -5.96710749 -0.58511903  0.119060085
#> 102 14.88468727 -5.54426764 -0.84027353 -0.063887370
#> 103 14.59407031 -5.87184441 -0.31355468  0.262838427
#> 104 14.73336043 -5.73495705 -0.46446254 -0.006295001
#> 105 14.97521905 -5.98786447 -0.33137702  0.937724713
#> 106 15.13498383 -5.57717686 -0.29748813  0.929244770
#> 107 14.78337181 -5.85116461 -0.15436208  0.699014262
#> 108 14.90057466 -5.53862544  0.02584558  0.785586331
#> 109 14.94256809 -5.75961971 -0.52504251  0.989186445
#> 110 15.18547034 -5.76643728 -0.30469930  1.070868705
#> 111 14.78736632 -5.79638579  0.11173033  0.904735882
#> 112 14.90642503 -5.61015872  0.05170102  0.836379325
#> 113 15.25194274 -6.08797491 -0.94290382  0.224506008
#> 114 15.05312810 -5.88524551 -0.92402076  0.072535723
#> 115 14.84290899 -5.68696842 -0.83608150  0.126496658
#> 116 14.79609925 -5.78170552 -0.67434504 -0.092047567
#> 117 15.22409111 -6.14546405 -1.12160808  0.184048283
#> 118 15.08011592 -5.98692165 -1.30103625  0.128366702
#> 119 14.90602390 -5.96057391 -1.21006088  0.222626366
#> 120 14.83958564 -6.04161401 -1.11312674  0.144582418
#> 121 15.17756881 -6.02558224 -0.74119933  1.060093875
#> 122 15.10616146 -5.85080110 -0.66058053  1.143606641
#> 123 14.91400685 -6.07439349 -0.31527754  1.254347207
#> 124 15.08211362 -5.95003706 -0.35306463  1.303926481
#> 125 15.23985593 -6.08346403 -0.91646012  1.003421551
#> 126 15.31495873 -6.16240428 -1.03016783  1.080000235
#> 127 15.05270158 -6.19092724 -0.66734099  1.366835571
#> 128 15.10791084 -6.13561362 -0.61054769  1.369864943
plot(x)

Ejemplo Cardiological

datos <- Cardiological
datos
#> # A tibble: 11 × 3
#>               Pulse              Syst             Diast
#>          <symblc_n>        <symblc_n>        <symblc_n>
#>  1  [44.00 : 68.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  2  [60.00 : 72.00]  [90.00 : 130.00]   [70.00 : 90.00]
#>  3  [56.00 : 90.00] [140.00 : 180.00]  [90.00 : 100.00]
#>  4 [70.00 : 112.00] [110.00 : 142.00]  [80.00 : 108.00]
#>  5  [54.00 : 72.00]  [90.00 : 100.00]   [50.00 : 70.00]
#>  6 [70.00 : 100.00] [130.00 : 160.00]  [80.00 : 110.00]
#>  7  [63.00 : 75.00]  [60.00 : 100.00] [140.00 : 150.00]
#>  8 [72.00 : 100.00] [130.00 : 160.00]   [76.00 : 90.00]
#>  9  [76.00 : 98.00] [110.00 : 190.00]  [70.00 : 110.00]
#> 10  [86.00 : 96.00] [138.00 : 180.00]  [90.00 : 110.00]
#> 11 [86.00 : 100.00] [110.00 : 150.00]  [78.00 : 100.00]
x <- sym.umap(datos)
x
#>              V1          V2           V3
#> 1   1.492374229 -2.82369547  3.316174658
#> 2   2.021402799 -2.72194513  2.248225363
#> 3   1.467476438 -3.03724089  3.537436813
#> 4   2.036350799 -2.80243894  2.399044173
#> 5   1.281012559 -2.67019203  3.356764190
#> 6   1.559888070 -2.42920193  2.348111094
#> 7   1.276640903 -2.71121778  3.189343404
#> 8   1.380016251 -2.36589795  2.035990367
#> 9   1.463493149 -2.44106116  2.827551810
#> 10  1.739361293 -2.39932908  1.985818581
#> 11 -0.001316018 -1.30974390  0.787640839
#> 12  0.325466510 -1.48490195  1.017944209
#> 13  1.235953318 -2.02909114  2.411042541
#> 14  1.288848293 -1.87729322  2.004306817
#> 15  0.117519092 -0.89912487  0.375067896
#> 16  0.407814720 -1.00786460  0.593068730
#> 17 -0.077523965 -0.67547596  0.263615476
#> 18 -1.386604500  1.92182957 -1.587407288
#> 19 -0.376074344 -0.36553328  0.139285637
#> 20 -1.649887323  1.87858786 -0.237545170
#> 21  0.127510970 -0.60249723  0.244101332
#> 22 -0.728582446  2.09776374 -1.649419843
#> 23 -0.288885462  0.02791155  0.127990100
#> 24 -1.391654904  1.90353583 -0.085363675
#> 25  1.004964917 -1.87593802  1.601291066
#> 26 -2.068344445  2.27783243 -2.272780626
#> 27 -0.278321672 -0.96025471  0.629724461
#> 28 -2.242492463  2.34977955 -1.805659327
#> 29  1.035161682 -0.69310131  0.029947763
#> 30 -1.238303705  2.43422069 -2.416402087
#> 31  0.550144560 -0.32363345  0.051618241
#> 32 -1.205098227  2.79217851 -1.733691819
#> 33  1.690162335 -2.91771718  3.237367089
#> 34  1.825509777 -2.87287801  2.277533139
#> 35  1.798554078 -2.85987247  3.230330051
#> 36  1.800938357 -2.88601616  2.177772236
#> 37  1.203078964 -2.72475722  3.207792242
#> 38  1.576212482 -2.27744275  1.841339596
#> 39  1.296139403 -2.58084302  3.134396880
#> 40  1.682355967 -2.21216814  1.910920130
#> 41  0.067145786 -1.15829648  0.784797886
#> 42 -2.219954922  2.00522225 -2.105293857
#> 43 -0.446852873 -0.32467975  0.523082178
#> 44 -2.263169895  2.38474311 -1.211414891
#> 45  0.858803941 -0.53192561  0.060954396
#> 46 -1.070608911  2.56183267 -2.070492226
#> 47  0.045618846  0.02942103  0.180168512
#> 48 -1.080132764  2.53327217 -1.068912540
#> 49  2.719314295 -1.02911328 -1.480344983
#> 50  2.998315896 -1.21110819 -1.608777853
#> 51  2.547654664 -1.16432723 -1.625476623
#> 52  2.539614525 -0.91519038 -1.686051389
#> 53  2.735221325 -0.76211759 -1.329158592
#> 54  2.782409196 -0.93820027 -1.594115781
#> 55  2.382103136 -1.15499828 -1.421644243
#> 56  2.580627342 -1.16062040 -1.589318713
#> 57  0.034836849 -1.38345419  0.931883129
#> 58 -2.428925688  2.24307554 -2.273602821
#> 59 -0.730476220 -0.42620275  0.558619350
#> 60 -2.162328256  2.15167360 -1.221672556
#> 61  0.313391569 -0.85117843  0.745287116
#> 62 -1.803460683  2.39948064 -1.862648555
#> 63 -0.289715843 -0.21216785  0.400770173
#> 64 -1.831645426  2.34532164 -1.037034336
#> 65  1.152850733 -2.01888967  1.576329689
#> 66 -2.280955216  1.96257931 -2.427155290
#> 67 -0.935731763 -0.02189128  0.286877983
#> 68 -2.066059922  2.07155262 -0.474164400
#> 69  1.197929708 -0.57054480 -0.251514543
#> 70 -0.982038900  2.25733636 -2.414028421
#> 71 -1.034108559  1.40751168  0.085185199
#> 72 -1.229743050  2.00702736  0.006488224
#> 73 -1.036507854  1.65988493 -1.648451806
#> 74 -1.601361242  2.16904116 -1.706782517
#> 75 -1.485746046  1.67362785 -0.095622159
#> 76 -1.764801750  2.03653429 -0.156642657
#> 77 -0.565673166  2.00639815 -1.707147680
#> 78 -0.828052309  2.49188170 -1.784227803
#> 79 -1.098815011  1.62343049  0.011943884
#> 80 -1.392091949  1.81024812  0.193818287
#> 81 -2.140023103  1.60073132 -2.385532693
#> 82 -2.131333242  2.08541148 -2.440900401
#> 83 -1.363594282  1.50084909 -1.376940606
#> 84 -2.293691579  2.33143089 -1.563110712
#> 85 -0.887569607  1.89923006 -2.329397576
#> 86 -1.299613447  2.36501446 -2.415721285
#> 87 -0.737739209  1.77375000 -1.430482713
#> 88 -1.224577570  2.60212167 -1.302709875
plot(x)

Length of intervals

data(oils)
datos <- oils
interval.length(datos)
#>      GRA FRE IOD SAP
#> L  0.005   9  34  78
#> P  0.007   1  16   9
#> Co 0.002   5  14   9
#> S  0.006   2  12   6
#> Ca 0.001  10   2   4
#> O  0.005   6  11   9
#> B  0.010   8   8   9
#> H  0.006  10  24  12

PCA Histogram

Hardwood Data

data("hardwoodBrito")
Hardwood.histogram<-hardwoodBrito
Hardwood.cols<-colnames(Hardwood.histogram)
Hardwood.names<-row.names(Hardwood.histogram)
Hardwood.histogram
#> # A tibble: 5 × 4
#>         ANNT       JULT       ANNP       MITM
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Hardwood.histogram[[1]][[1]]
#> $breaks
#> [1] -3.9  4.2 10.3 20.6
#> 
#> $props
#> [1] 0.5 0.4 0.1

Weighted Center Matrix

weighted.center<-weighted.center.Hist.RSDA(Hardwood.histogram)

Bin Matrix

BIN.Matrix<-matrix(rep(3,length(Hardwood.cols)*length(Hardwood.names)),nrow = length(Hardwood.names))

PCA

pca.hist<-sym.histogram.pca(Hardwood.histogram,BIN.Matrix)
#> Warning: Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
#> Setting row names on a tibble is deprecated.
pca.hist$classic.PCA
#> **Results for the Principal Component Analysis (PCA)**
#> The analysis was performed on 85 individuals, described by 4 variables
#> *The results are available in the following objects:
#> 
#>    name               description                                
#> 1  "$eig"             "eigenvalues"                              
#> 2  "$var"             "results for the variables"                
#> 3  "$var$coord"       "coord. for the variables"                 
#> 4  "$var$cor"         "correlations variables - dimensions"      
#> 5  "$var$cos2"        "cos2 for the variables"                   
#> 6  "$var$contrib"     "contributions of the variables"           
#> 7  "$ind"             "results for the individuals"              
#> 8  "$ind$coord"       "coord. for the individuals"               
#> 9  "$ind$cos2"        "cos2 for the individuals"                 
#> 10 "$ind$contrib"     "contributions of the individuals"         
#> 11 "$ind.sup"         "results for the supplementary individuals"
#> 12 "$ind.sup$coord"   "coord. for the supplementary individuals" 
#> 13 "$ind.sup$cos2"    "cos2 for the supplementary individuals"   
#> 14 "$call"            "summary statistics"                       
#> 15 "$call$centre"     "mean of the variables"                    
#> 16 "$call$ecart.type" "standard error of the variables"          
#> 17 "$call$row.w"      "weights for the individuals"              
#> 18 "$call$col.w"      "weights for the variables"
pca.hist$sym.hist.matrix.PCA
#> # A tibble: 5 × 4
#>         PC.1       PC.2       PC.3       PC.4
#> * <symblc_h> <symblc_h> <symblc_h> <symblc_h>
#> 1     <hist>     <hist>     <hist>     <hist>
#> 2     <hist>     <hist>     <hist>     <hist>
#> 3     <hist>     <hist>     <hist>     <hist>
#> 4     <hist>     <hist>     <hist>     <hist>
#> 5     <hist>     <hist>     <hist>     <hist>

Plots

ACER.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                             title.graph = " ",
                             concepts.name = c("ACER"),
                             title.x = "First Principal Component (84.83%)",
                             title.y = "Frequency",
                             pca.axes = 1)

ACER.p1

ALL.p1<-Sym.PCA.Hist.PCA.k.plot(data.sym.df = pca.hist$Bins.df,
                    title.graph = " ",
                    concepts.name = unique(pca.hist$Bins.df$Object.Name),
                    title.x = "First Principal Component (84.83%)",
                    title.y = "Frequency",
                    pca.axes = 1)

ALL.p1
#> Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps

Hardwood.quantiles.PCA<-quantiles.RSDA(pca.hist$sym.hist.matrix.PCA,3)
#> Warning in min(which(props.cum >= percentils.RSDA[i])): no non-missing
#> arguments to min; returning Inf
#> Warning: Setting row names on a tibble is deprecated.

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

quantile.ACER.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.ACER.plot

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "First Principal Component (84.83%)"
axes.y.label<- "Second Principal Component (9.70%)"
concept.names<-row.names(Hardwood.quantiles.PCA)
var.names<-c("PC.1","PC.2")

quantile.plot<-Percentil.Arrow.plot(Hardwood.quantiles.PCA,
                     concept.names,
                     var.names,
                     Title,
                     axes.x.label,
                     axes.y.label,
                     label.name
                     )

quantile.plot
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_point()`).
#> Warning: Removed 1 row containing missing values or values outside the scale range
#> (`geom_segment()`).

label.name<-"Hard Wood"
Title<-"First Principal Plane"
axes.x.label<- "PC 1 (84.83%)"
axes.y.label<- "PC 2 (9.70%)"
concept.names<-c("ACER")
var.names<-c("PC.1","PC.2")

plot.3D.HW<-sym.quantiles.PCA.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

plot.3D.HW
concept.names<-row.names(Hardwood.quantiles.PCA)
sym.all.quantiles.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)
#> Warning: Ignoring 4 observations
sym.all.quantiles.mesh3D.plot(Hardwood.quantiles.PCA,
                               concept.names,
                               var.names,
                               Title,
                               axes.x.label,
                               axes.y.label,
                               label.name)

KS

Hardwood.quantiles.PCA.2<-quantiles.RSDA.KS(pca.hist$sym.hist.matrix.PCA,100)
#> Warning: Setting row names on a tibble is deprecated.
h<-Hardwood.quantiles.PCA.2[[1]][[1]]
tmp<-HistRSDAToEcdf(h)

h2<-Hardwood.quantiles.PCA.2[[1]][[2]]
tmp2<-HistRSDAToEcdf(h2)

h3<-Hardwood.quantiles.PCA.2[[1]][[3]]
tmp3<-HistRSDAToEcdf(h3)

h4<-Hardwood.quantiles.PCA.2[[1]][[4]]
tmp4<-HistRSDAToEcdf(h4)

h5<-Hardwood.quantiles.PCA.2[[1]][[5]]
tmp5<-HistRSDAToEcdf(h5)

breaks.unique<-unique(c(h$breaks,h2$breaks,h3$breaks,h4$breaks,h5$breaks))
tmp.unique<-breaks.unique[order(breaks.unique)]

tmp<-tmp(v = tmp.unique)
tmp2<-tmp2(v = tmp.unique)
tmp3<-tmp3(v = tmp.unique)
tmp4<-tmp4(v = tmp.unique)
tmp5<-tmp5(v = tmp.unique)
abs_dif <-  abs(tmp2 - tmp)
# La distancia Kolmogorov–Smirnov es el máximo de las distancias absolutas.
distancia_ks <- max(abs_dif)
distancia_ks
#> [1] 0.05857869
library(tidyr)
# Se unen los valores calculados en un dataframe.
df.HW <- data.frame(
  PC.1 = tmp.unique,
  ACER = tmp,
  ALNUS = tmp2,
  FRAXINUS = tmp3,
  JUGLANS = tmp4,
  QUERCUS = tmp5
) %>%
  pivot_longer(
    cols = c(ACER, ALNUS,FRAXINUS,JUGLANS,QUERCUS),
    names_to = "HardWood",
    values_to = "ecdf"
  )

grafico_ecdf <- ggplot(data = df.HW,
                       aes(x = PC.1, y = ecdf, color = HardWood)) +
  geom_line(size = 1) +
  labs(
    color = "Hardwood",
    y = "Empirical Cumulative Distribution "
  ) +
  theme_bw() +
  theme(legend.position = "bottom",
        plot.title = element_text(size = 12))+geom_line()

grafico_ecdf