Title: | A Collection of ML Tools for Conservation Research |
---|---|
Description: | Functions required to classify subjects within camera trap field data. The package can handle both images and videos. The authors recommend a two-step approach using Microsoft's 'MegaDector' model and then a second model trained on the classes of interest. |
Authors: | Kyra Swanson [aut, cre]
|
Maintainer: | Kyra Swanson <[email protected]> |
License: | MIT + file LICENSE |
Version: | 2.0.0 |
Built: | 2025-02-12 02:52:20 UTC |
Source: | CRAN |
This module provides functions and classes for managing files and directories.
build_file_manifest( image_dir, exif = TRUE, out_file = NULL, offset = 0, recursive = TRUE )
build_file_manifest( image_dir, exif = TRUE, out_file = NULL, offset = 0, recursive = TRUE )
image_dir |
folder to search through and find media files |
exif |
returns date and time information from exif data, defaults to true |
out_file |
directory to save .csv of manifest to |
offset |
add offset in hours for videos when using the File Modified date, defaults to 0 |
recursive |
Should directories be scanned recursively? Default TRUE |
Kyra Swanson 2023 Find Image/Video Files and Gather exif Data
files dataframe with or without file dates
## Not run: files <- build_file_manifest("C:\\Users\\usr\\Pictures\\") ## End(Not run)
## Not run: files <- build_file_manifest("C:\\Users\\usr\\Pictures\\") ## End(Not run)
Check for files existence and prompt user if they want to load
check_file(file)
check_file(file)
file |
the full path of the file to check |
a boolean indicating wether a file was found and the user wants to load or not
## Not run: checkFile("path/to/newfile.csv") ## End(Not run)
## Not run: checkFile("path/to/newfile.csv") ## End(Not run)
Crops all images from an input file with specific required columns: Frame, bbox1, bbox2, bbox3, and bbox4
crop_images(imagelist, outdir)
crop_images(imagelist, outdir)
imagelist |
The path for the input csv file |
outdir |
The path where generated cropped images should be uploaded |
no return value, outputs the cropped image
## Not run: cropImagesFromFile("/image/path/file.csv", "/output/path/") ## End(Not run)
## Not run: cropImagesFromFile("/image/path/file.csv", "/output/path/") ## End(Not run)
Apply MegaDetector to a Given Batch of Images
detect_MD_batch( detector, image_file_names, checkpoint_path = NULL, checkpoint_frequency = -1, confidence_threshold = 0.1, quiet = TRUE, image_size = NULL, file_col = "Frame" )
detect_MD_batch( detector, image_file_names, checkpoint_path = NULL, checkpoint_frequency = -1, confidence_threshold = 0.1, quiet = TRUE, image_size = NULL, file_col = "Frame" )
detector |
preloaded md model |
image_file_names |
list of image filenames, a single image filename, or folder |
checkpoint_path |
path to checkpoint file |
checkpoint_frequency |
write results to checkpoint file every N images |
confidence_threshold |
only detections above this threshold are returned |
quiet |
print debugging statements when false, defaults to true |
image_size |
overrides default image size, 1280 |
file_col |
select which column if image_file_names is a manifest |
list of dictionaries of MegaDetector detections
## Not run: mdres <- detectMD_batch(md_py, allframes$Frame)
## Not run: mdres <- detectMD_batch(md_py, allframes$Frame)
Extract Frames for Single Video
extract_frame_single(file_path, out_dir, fps = NULL, frames = NULL)
extract_frame_single(file_path, out_dir, fps = NULL, frames = NULL)
file_path |
filepath to image |
out_dir |
directory to save frames to |
fps |
number of frames per second to save |
frames |
number of frames evenly distributed to save |
dataframe of filepaths, frame paths
## Not run: result <- extractFramesSingle(video$FilePath, out_dir, frames=3) ## End(Not run)
## Not run: result <- extractFramesSingle(video$FilePath, out_dir, frames=3) ## End(Not run)
This function can take
extract_frames( files, out_dir = tempfile(), out_file = NULL, fps = NULL, frames = NULL, file_col = "FilePath", parallel = FALSE, workers = 1, checkpoint = 1000 )
extract_frames( files, out_dir = tempfile(), out_file = NULL, fps = NULL, frames = NULL, file_col = "FilePath", parallel = FALSE, workers = 1, checkpoint = 1000 )
files |
dataframe of videos |
out_dir |
directory to save frames to |
out_file |
file to which results will be saved |
fps |
frames per second, otherwise determine mathematically |
frames |
number of frames to sample |
file_col |
string value indexing which column contains file paths |
parallel |
Toggle for parallel processing, defaults to FALSE |
workers |
number of processors to use if parallel, defaults to 1 |
checkpoint |
if not parallel, checkpoint ever n files, defaults to 1000 |
dataframe of still frames for each video
## Not run: frames <- extractFrames(videos, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) ## End(Not run)
## Not run: frames <- extractFrames(videos, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) ## End(Not run)
This function can take
extract_frames_old( files, out_dir = tempfile(), out_file = NULL, fps = NULL, frames = NULL, file_col = "FilePath", parallel = FALSE, workers = 1, checkpoint = 1000 )
extract_frames_old( files, out_dir = tempfile(), out_file = NULL, fps = NULL, frames = NULL, file_col = "FilePath", parallel = FALSE, workers = 1, checkpoint = 1000 )
files |
dataframe of videos |
out_dir |
directory to save frames to |
out_file |
file to which results will be saved |
fps |
frames per second, otherwise determine mathematically |
frames |
number of frames to sample |
file_col |
string value indexing which column contains file paths |
parallel |
Toggle for parallel processing, defaults to FALSE |
workers |
number of processors to use if parallel, defaults to 1 |
checkpoint |
if not parallel, checkpoint ever n files, defaults to 1000 |
dataframe of still frames for each video
## Not run: frames <- extractFrames(videos, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) ## End(Not run)
## Not run: frames <- extractFrames(videos, out_dir = "C:\\Users\\usr\\Videos\\", frames = 5) ## End(Not run)
Return a dataframe of only MD animals
get_animals(manifest)
get_animals(manifest)
manifest |
all megadetector frames |
animal frames classified by MD
## Not run: animals <- getAnimals(imagesall) ## End(Not run)
## Not run: animals <- getAnimals(imagesall) ## End(Not run)
Return MD empty, vehicle and human images in a dataframe
get_empty(manifest)
get_empty(manifest)
manifest |
all megadetector frames |
list of empty/human/vehicle allframes with md classification
## Not run: empty <- getEmpty(imagesall) ## End(Not run)
## Not run: empty <- getEmpty(imagesall) ## End(Not run)
Load .csv or .Rdata file
load_data(file)
load_data(file)
file |
the full path of the file to load |
data extracted from the file
## Not run: loadData("path/to/newfile.csv") ## End(Not run)
## Not run: loadData("path/to/newfile.csv") ## End(Not run)
Load a Classifier Model with animl-py
load_model(model_path, class_file, device = NULL, architecture = "CTL")
load_model(model_path, class_file, device = NULL, architecture = "CTL")
model_path |
path to model |
class_file |
path to class list |
device |
send model to the specified device |
architecture |
model architecture |
list of c(classifier, class_list)
## Not run: andes <- loadModel('andes_v1.pt','andes_classes.csv')
## Not run: andes <- loadModel('andes_v1.pt','andes_classes.csv')
Load MegaDetector
megadetector(model_path, device = NULL)
megadetector(model_path, device = NULL)
model_path |
path to MegaDetector model (v5) |
device |
load model onto given device description |
megadetector object
## Not run: md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt")
## Not run: md_py <- megadetector("/mnt/machinelearning/megaDetector/md_v5a.0.0.pt")
parse MD results into a simple dataframe
parse_MD( results, manifest = NULL, out_file = NULL, buffer = 0.02, threshold = 0, file_col = "Frame" )
parse_MD( results, manifest = NULL, out_file = NULL, buffer = 0.02, threshold = 0, file_col = "Frame" )
results |
json output from megadetector |
manifest |
dataframe containing all frames |
out_file |
path to save dataframe |
buffer |
percentage buffer to move bbox away from image edge |
threshold |
confidence threshold to include bbox |
file_col |
column in manifest that refers to file paths |
original dataframe including md results
## Not run: mdresults <- parseMD(mdres) ## End(Not run)
## Not run: mdresults <- parseMD(mdres) ## End(Not run)
Plot bounding boxes on image from md results
plot_boxes(image, label = FALSE, minconf = 0)
plot_boxes(image, label = FALSE, minconf = 0)
image |
The mdres for the image |
label |
T/F toggle to plot MD category |
minconf |
minimum confidence to plot box |
no return value, produces bounding box in plot panel
## Not run: mdres <- classifyImageMD(mdsession, images$FilePath[30000]) plotBoxes(mdres, minconf = 0.5) ## End(Not run)
## Not run: mdres <- classifyImageMD(mdsession, images$FilePath[30000]) plotBoxes(mdres, minconf = 0.5) ## End(Not run)
Infer Species for Given Detections
predict_species( detections, model, classes, device = NULL, out_file = NULL, raw = FALSE, file_col = "Frame", crop = TRUE, resize_width = 299, resize_height = 299, normalize = TRUE, batch_size = 1, workers = 1 )
predict_species( detections, model, classes, device = NULL, out_file = NULL, raw = FALSE, file_col = "Frame", crop = TRUE, resize_width = 299, resize_height = 299, normalize = TRUE, batch_size = 1, workers = 1 )
detections |
manifest of animal detections |
model |
loaded classifier model |
classes |
data.frame of classes |
device |
send model to the specified device |
out_file |
path to csv to save results to |
raw |
output raw logits in addition to manifest |
file_col |
column in manifest containing file paths |
crop |
use bbox to crop images before feeding into model |
resize_width |
image width input size |
resize_height |
image height input size |
normalize |
normalize the tensor before inference |
batch_size |
batch size for generator |
workers |
number of processes |
detection manifest with added prediction and confidence columns
## Not run: animals <- predictSpecies(animals, classifier[[1]], classifier[[2]], raw=FALSE)
## Not run: animals <- predictSpecies(animals, classifier[[1]], classifier[[2]], raw=FALSE)
Remove Sorted Links
remove_link(manifest, link_col = "Link")
remove_link(manifest, link_col = "Link")
manifest |
DataFrame of classified images |
link_col |
column in manifest that contains link paths |
manifest without link column
## Not run: remove_link(manifest) ## End(Not run)
## Not run: remove_link(manifest) ## End(Not run)
Save Data to Given File
save_data(data, out_file, prompt = TRUE)
save_data(data, out_file, prompt = TRUE)
data |
the dataframe to be saved |
out_file |
the full path of the saved file |
prompt |
if true, prompts the user to confirm overwrite |
none
## Not run: saveData(files,"path/to/newfile.csv") ## End(Not run)
## Not run: saveData(files,"path/to/newfile.csv") ## End(Not run)
This function applies image classifications at a sequence level by leveraging information from multiple images. A sequence is defined as all images at the same camera/station where the time between consecutive images is <=maxdiff. This can improve classification accuracy, but assumes that only one species is present in each sequence. If you regularly expect multiple species to occur in an image or sequence don't use this function.
sequence_classification( animals, empty = NULL, predictions, classes, stationcolumn, emptyclass = "", sortcolumns = NULL, recordfield = "FilePath", maxdiff = 60 )
sequence_classification( animals, empty = NULL, predictions, classes, stationcolumn, emptyclass = "", sortcolumns = NULL, recordfield = "FilePath", maxdiff = 60 )
animals |
sub-selection of all images that contain MD animals |
empty |
optional, data frame non-animal images (empty, human and vehicle) that will be merged back with animal imagages |
predictions |
data frame of prediction probabilities from the classifySpecies function |
classes |
a vector or species corresponding to the columns of 'predictions' |
stationcolumn |
a column in the animals and empty data frame that indicates the camera or camera station |
emptyclass |
a string indicating the class that should be considered 'Empty' |
sortcolumns |
optional sort order. The default is 'stationcolumnumn' and DateTime. |
recordfield |
a field indicating a single record. The default is FilePath for single images/videos. |
maxdiff |
maximum difference between images in seconds to be included in a sequence, defaults to 60 |
This function retains "Empty" classification even if other images within the sequence are predicted to contain animals. Classification confidence is weighted by MD confidence.
data frame with predictions and confidence values for animals and empty images
## Not run: predictions <-classifyCropsSpecies(images,modelfile,resize=456) animals <- allframes[allframes$max_detection_category==1,] empty <- setEmpty(allframes) animals <- sequenceClassification(animals, empty, predictions, classes, emptyclass = "Empty", stationcolumnumn="StationID", maxdiff=60) ## End(Not run)
## Not run: predictions <-classifyCropsSpecies(images,modelfile,resize=456) animals <- allframes[allframes$max_detection_category==1,] empty <- setEmpty(allframes) animals <- sequenceClassification(animals, empty, predictions, classes, emptyclass = "Empty", stationcolumnumn="StationID", maxdiff=60) ## End(Not run)
Create SymLink Directories and Sort Classified Images Based on MD Results
sort_MD( manifest, link_dir, file_col = "FilePath", unique_name = "UniqueName", copy = FALSE )
sort_MD( manifest, link_dir, file_col = "FilePath", unique_name = "UniqueName", copy = FALSE )
manifest |
DataFrame of classified images |
link_dir |
Destination directory for symlinks |
file_col |
Colun containing file paths |
unique_name |
Unique image name identifier |
copy |
Toggle to determine copy or hard link, defaults to link |
manifest with added link columns
## Not run: sort_MD(manifest, link_dir) ## End(Not run)
## Not run: sort_MD(manifest, link_dir) ## End(Not run)
Create SymLink Directories and Sort Classified Images
sort_species( manifest, link_dir, file_col = "FilePath", unique_name = "UniqueName", copy = FALSE )
sort_species( manifest, link_dir, file_col = "FilePath", unique_name = "UniqueName", copy = FALSE )
manifest |
DataFrame of classified images |
link_dir |
Destination directory for symlinks |
file_col |
Colun containing file paths |
unique_name |
Unique image name identifier |
copy |
Toggle to determine copy or hard link, defaults to link |
manifest with added link columns
## Not run: manifest <- sort_species(manifest, link_dir) ## End(Not run)
## Not run: manifest <- sort_species(manifest, link_dir) ## End(Not run)
Udate Results from File Browser
update_labels(manifest, link_dir, unique_name = "UniqueName")
update_labels(manifest, link_dir, unique_name = "UniqueName")
manifest |
dataframe containing file data and predictions |
link_dir |
directory to sort files into |
unique_name |
column name indicating a unique file name for each row |
dataframe with new "Species" column that contains the verified species
## Not run: results <- updateResults(resultsfile, linkdir) ## End(Not run)
## Not run: results <- updateResults(resultsfile, linkdir) ## End(Not run)
Set Working Directory and Save File Global Variables
WorkingDirectory(workingdir, pkg.env)
WorkingDirectory(workingdir, pkg.env)
workingdir |
local directory that contains data to process |
pkg.env |
environment to create global variables in |
None
## Not run: WorkingDirectory(/home/kyra/animl/examples) ## End(Not run)
## Not run: WorkingDirectory(/home/kyra/animl/examples) ## End(Not run)