Title: | Tools and Methods for Primatological Data Science |
---|---|
Description: | Data from All the World's Primates relational SQL database and other tabular datasets are made available via drivers and connection functions. Additionally we provide several functions and examples to facilitate the merging and aggregation of these tabular inputs. |
Authors: | David Schruth [aut][cre], Marc Myers [ctb], Noel Rowe [aut] |
Maintainer: | David Schruth <[email protected]> |
License: | Apache License |
Version: | 0.2.0 |
Built: | 2024-12-22 06:23:13 UTC |
Source: | CRAN |
Adds a genus_species column to the specified dataframe
add.gnsp.clmn(df,gn="Genus",sp="Species",rownames=FALSE,new.col=TRUE, gnsp.col = "gn_sp")
add.gnsp.clmn(df,gn="Genus",sp="Species",rownames=FALSE,new.col=TRUE, gnsp.col = "gn_sp")
df |
input data.frame |
gn |
column name for genus |
sp |
column name for species |
rownames |
use the new gn_sp column to assign data.frame rownames |
new.col |
TRUE if gn_sp column is to be retained, FALSE if it is to be removed |
gnsp.col |
the name of the new column to add, by default is "gn_sp" |
modified data.frame (with genus species info concatenated and added)
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,new.col=TRUE)
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,new.col=TRUE)
Connect to the All the World's Primate Database
AWP.connect(drv=AWP.driver(), prefix='jdbc:jtds:sqlserver', server="s09.everleap.com", port=1433, db="DB_3918_atwpreview", user="DB_3918_atwpreview_user", pw="atwpreview_$_j")
AWP.connect(drv=AWP.driver(), prefix='jdbc:jtds:sqlserver', server="s09.everleap.com", port=1433, db="DB_3918_atwpreview", user="DB_3918_atwpreview_user", pw="atwpreview_$_j")
drv |
driver (output from AWP.driver() |
prefix |
prefix to the URL (before "://") |
server |
domain name for the database server |
port |
port name used by the server's database |
db |
database name |
user |
database user name |
pw |
database user password |
a connection object for SQL
con <- AWP.connect(drv=AWP.driver())
con <- AWP.connect(drv=AWP.driver())
Load the driver to utilize the database sofware
AWP.driver(drv.name="net.sourceforge.jtds.jdbc.Driver", drv.file=system.file("drivers","jtds-1.2.8.jar", package='primate'))
AWP.driver(drv.name="net.sourceforge.jtds.jdbc.Driver", drv.file=system.file("drivers","jtds-1.2.8.jar", package='primate'))
drv.name |
The name of the driver |
drv.file |
The file name for the database driver |
driver argument to AWP.connect
AWP.driver()
AWP.driver()
Get the lookup table from the All the World's Primates SQL database
AWP.get.lookup.table(con=AWP.connect(),tab.nm="TextType")
AWP.get.lookup.table(con=AWP.connect(),tab.nm="TextType")
con |
connection object |
tab.nm |
table name (for the parent table) |
a data.frame corresponding to SQL table
AWP.get.lookup.table(con=AWP.connect(),tab.nm="TextType")
AWP.get.lookup.table(con=AWP.connect(),tab.nm="TextType")
Retrieve a table from the All the World's Primates SQL database
AWP.get.SQL.table(con=AWP.connect(), tab.nm="tblGrovesMonkeys", clmns=c('all'),xpnd=FALSE)
AWP.get.SQL.table(con=AWP.connect(), tab.nm="tblGrovesMonkeys", clmns=c('all'),xpnd=FALSE)
tab.nm |
table name (defaults to the main primate species list) |
con |
connection object |
clmns |
columns to return |
xpnd |
expand the lookup column codes into full text strings |
a data.frame corresponding to SQL table
online.version <- AWP.get.SQL.table(tab.nm='LMType') #a small example table
online.version <- AWP.get.SQL.table(tab.nm='LMType') #a small example table
List available tables from the All the World's Primates SQL database
AWP.list.SQL.tables(con=AWP.connect(), all=FALSE)
AWP.list.SQL.tables(con=AWP.connect(), all=FALSE)
con |
connection object from AWP.connect |
all |
list all tables available |
a list (vector) of SQL table names
AWP.list.SQL.tables(con=AWP.connect(), all=FALSE)
AWP.list.SQL.tables(con=AWP.connect(), all=FALSE)
Read an All the World's Primates table from the local package cache.
AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn=NA)
AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn=NA)
tab.nm |
table name |
id.clmn |
id column of table |
data.frame corresponding to SQL table
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves')
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves')
Run arbitrary SQL querries on the All the World's Primate database
AWP.run.SQL(con=AWP.connect(), sql=NULL)
AWP.run.SQL(con=AWP.connect(), sql=NULL)
con |
connection object |
sql |
SQL string |
results of query
AWP.run.SQL(con=AWP.connect(), sql=NULL)
AWP.run.SQL(con=AWP.connect(), sql=NULL)
Regroup based on the old or the new data frame using a direction parameter.
regroup.equivalent(df, gnsp.old, gnsp.new, clmns, agg='mean', direction='old2new')
regroup.equivalent(df, gnsp.old, gnsp.new, clmns, agg='mean', direction='old2new')
df |
a dataframe |
gnsp.old |
old nomenclature |
gnsp.new |
new nomenclature |
clmns |
the columns in the data.frame to re-group |
agg |
the aggregation type |
direction |
the aggregation priority |
a regrouped data frame
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,gnsp.col='gn_sp') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,gnsp.col='gnspg') pri.grpd <- regroup.gnsp(df=primates.tab,clmns=colnames(primates.tab), agg='max') out <- regroup.equivalent(pri.grpd, gnsp.old=gn_sp, gnsp.new='gnspg', clmns='MonkeyNumberGroves', agg='paste', direction='old2new')
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,gnsp.col='gn_sp') primates.tab <- add.gnsp.clmn(primates.tab,gn="Genus",sp="Species",rownames=FALSE,gnsp.col='gnspg') pri.grpd <- regroup.gnsp(df=primates.tab,clmns=colnames(primates.tab), agg='max') out <- regroup.equivalent(pri.grpd, gnsp.old=gn_sp, gnsp.new='gnspg', clmns='MonkeyNumberGroves', agg='paste', direction='old2new')
Regroup a given data.frame by a column designated as unique genus_species combination. This function is essentially a wrapper for caroline:::groupBy()
regroup.gnsp(df,clmns,agg='mean',by='gn_sp')
regroup.gnsp(df,clmns,agg='mean',by='gn_sp')
df |
a dataframe |
clmns |
columns |
agg |
type of aggregation to be used |
by |
the column name by which the data.frame should be re-grouped |
returned value
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') out <- regroup.gnsp(df=primates.tab,clmns=colnames(primates.tab), agg='paste')
primates.tab <- AWP.read.pkg.tab(tab.nm='dbo_tblGrovesMonkeys', id.clmn='MonkeyNumberGroves') out <- regroup.gnsp(df=primates.tab,clmns=colnames(primates.tab), agg='paste')
Update the values in an old dataframe with the values in a new dataframe. Useful for comparing a pre-existing or self-assembled dataset with AWP.
updatevals(x,y=NULL,v.old,v.new,verbose=TRUE,update=FALSE, na.only=TRUE,all=TRUE,missing.only=TRUE)
updatevals(x,y=NULL,v.old,v.new,verbose=TRUE,update=FALSE, na.only=TRUE,all=TRUE,missing.only=TRUE)
x |
first dataframe |
y |
second dataframe |
v.old |
variable old |
v.new |
variable new |
verbose |
get all the messages |
update |
update all the old with everything new |
na.only |
just update the missing values in the old dataframe |
all |
perform merge on all columns |
missing.only |
update only those that have missing values |
values of one data frame are updated to reflect new data in another
pri.tab <- AWP.read.pkg.tab(tab.nm='Locomotion') #pri.AWP <- AWP.get.SQL.table(tab.nm='Locomotion') dim(pri.tab) #should may be fewer cols or rows locally ... #dim(pri.AWP) # than there are available online. apply(pri.tab, 2, function(x) sum(is.na(x))) # also more missing values #apply(pri.AWP, 2, function(x) sum(is.na(x))) # locally than online # update the "Comment" column locally with the same online vars <- c('LocomotionID','Comment') #tmp <- merge(x=pri.tab[,c(vars)] ,y=pri.AWP[,c(vars)], by='LocomotionID') #out <- updatevals(x=tmp,y=NULL,v.old='Comment.x',v.new='Comment.y')
pri.tab <- AWP.read.pkg.tab(tab.nm='Locomotion') #pri.AWP <- AWP.get.SQL.table(tab.nm='Locomotion') dim(pri.tab) #should may be fewer cols or rows locally ... #dim(pri.AWP) # than there are available online. apply(pri.tab, 2, function(x) sum(is.na(x))) # also more missing values #apply(pri.AWP, 2, function(x) sum(is.na(x))) # locally than online # update the "Comment" column locally with the same online vars <- c('LocomotionID','Comment') #tmp <- merge(x=pri.tab[,c(vars)] ,y=pri.AWP[,c(vars)], by='LocomotionID') #out <- updatevals(x=tmp,y=NULL,v.old='Comment.x',v.new='Comment.y')