Title: | Scrape and Analyze 'Goodreads' Book Data |
---|---|
Description: | A comprehensive toolkit for scraping and analyzing book data from <https://www.goodreads.com/>. This package provides functions to search for books, scrape book details and reviews, perform sentiment analysis on reviews, and conduct topic modeling. It's designed for researchers, data analysts, and book enthusiasts who want to gain insights from 'Goodreads' data. |
Authors: | Chao Liu [aut, cre, cph] |
Maintainer: | Chao Liu <[email protected]> |
License: | GPL-3 |
Version: | 0.1.2 |
Built: | 2024-10-25 05:25:25 UTC |
Source: | CRAN |
This function takes the output from scrape_reviews and performs sentiment analysis, including basic negation scope detection.
analyze_sentiment(reviews_df, lexicon = "afinn")
analyze_sentiment(reviews_df, lexicon = "afinn")
reviews_df |
A data frame containing the output from scrape_reviews. |
lexicon |
The sentiment lexicon to use. Options are "afinn", "bing", or "nrc". |
A data frame with sentiment scores for each review.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Display the first few rows of the results print(head(sentiment_results)) } else { cat("No reviews found. Cannot perform sentiment analysis.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Display the first few rows of the results print(head(sentiment_results)) } else { cat("No reviews found. Cannot perform sentiment analysis.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function calculates the average sentiment score for each book.
average_book_sentiment(sentiment_df)
average_book_sentiment(sentiment_df)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
A data frame with average sentiment scores for each book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Calculate average sentiment score per book avg_senti <- average_book_sentiment(sentiment_results) # Display the results print(avg_senti) } else { cat("No reviews found. Cannot calculate average sentiment.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Calculate average sentiment score per book avg_senti <- average_book_sentiment(sentiment_results) # Display the results print(avg_senti) } else { cat("No reviews found. Cannot calculate average sentiment.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function performs LDA topic modeling on the preprocessed reviews.
fit_lda(dtm, k, method = "Gibbs")
fit_lda(dtm, k, method = "Gibbs")
dtm |
A document-term matrix |
k |
The number of topics to extract |
method |
The method to use for fitting the model (default: Gibbs) |
An LDA model
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print model summary print(lda_model) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print model summary print(lda_model) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a word cloud for each topic.
gen_topic_clouds(model_output, n = 50)
gen_topic_clouds(model_output, n = 50)
model_output |
The output from model_topics function |
n |
The number of top terms to include in the word cloud |
A list of ggplot objects, where each element represents a word cloud for a topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 30, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 3, num_terms = 50, english_only = TRUE) # Generate word clouds for each topic wordcloud_plots <- gen_topic_clouds(topic_results, n = 20) # Display the word cloud for the first topic if (interactive()) { print(wordcloud_plots[[1]]) } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 30, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 3, num_terms = 50, english_only = TRUE) # Generate word clouds for each topic wordcloud_plots <- gen_topic_clouds(topic_results, n = 20) # Display the word cloud for the first topic if (interactive()) { print(wordcloud_plots[[1]]) } # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the author information for each book.
get_author_info(file_path)
get_author_info(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the author information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function author_info <- get_author_info(temp_file) print(author_info) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function author_info <- get_author_info(temp_file) print(author_info) # Clean up: remove the temporary file file.remove(temp_file)
This function retrieves the book_id values from the input_data and saves them to a specified text file.
get_book_ids(input_data, file_name)
get_book_ids(input_data, file_name)
input_data |
A data frame containing a column named book_id. |
file_name |
A string specifying the name of the text file to save the book_id values. |
No return value, the function writes the book_id values to a text file.
# Create sample data books <- data.frame(title = c("Hamlet", "The Hunger Games", "Jane Eyre"), book_id = c("1420", "2767052", "10210") ) # Create a temporary file path temp_file <- file.path(tempdir(), "bookids.txt") # Run the function get_book_ids(books, temp_file) # Clean up: remove the temporary file file.remove(temp_file)
# Create sample data books <- data.frame(title = c("Hamlet", "The Hunger Games", "Jane Eyre"), book_id = c("1420", "2767052", "10210") ) # Create a temporary file path temp_file <- file.path(tempdir(), "bookids.txt") # Run the function get_book_ids(books, temp_file) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the summary for each book.
get_book_summary(file_path)
get_book_summary(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the summary for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function book_summary <- get_book_summary(temp_file) print(book_summary) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function book_summary <- get_book_summary(temp_file) print(book_summary) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the format information for each book.
get_format_info(file_path)
get_format_info(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the format information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function format_info <- get_format_info(temp_file) print(format_info) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function format_info <- get_format_info(temp_file) print(format_info) # Clean up: remove the temporary file file.remove(temp_file)
This function reads book IDs from a file, fetches the corresponding Goodreads pages, and extracts the genres for each book.
get_genres(file_path)
get_genres(file_path)
file_path |
A character string specifying the path to the file containing book IDs. |
A named list where each element corresponds to a book ID and contains a character vector of genres for that book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function genres <- get_genres(temp_file) # Display the results print(genres) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function genres <- get_genres(temp_file) # Display the results print(genres) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the number of pages for each book.
get_num_pages(file_path)
get_num_pages(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the number of pages for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function num_pages <- get_num_pages(temp_file) print(num_pages) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function num_pages <- get_num_pages(temp_file) print(num_pages) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the published time for each book.
get_published_time(file_path)
get_published_time(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the book information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function published_times <- get_published_time(temp_file) print(published_times) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function published_times <- get_published_time(temp_file) print(published_times) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the rating distribution for each book.
get_rating_distribution(file_path)
get_rating_distribution(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the rating distribution for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function rating_distributions <- get_rating_distribution(temp_file) print(rating_distributions) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function rating_distributions <- get_rating_distribution(temp_file) print(rating_distributions) # Clean up: remove the temporary file file.remove(temp_file)
This function takes the output from scrape_reviews, preprocesses the data, performs topic modeling, and prints the results.
model_topics(reviews, num_topics = 3, num_terms = 10, english_only = TRUE)
model_topics(reviews, num_topics = 3, num_terms = 10, english_only = TRUE)
reviews |
A data frame containing the scraped reviews |
num_topics |
The number of topics to extract |
num_terms |
The number of top terms to display for each topic |
english_only |
A logical value indicating whether to filter out non-English reviews. Default is FALSE. |
A list containing the following elements:
model
: The fitted LDA model object.
filtered_reviews
: The preprocessed and filtered reviews data frame.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Print model summary print(topic_results$model) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Print model summary print(topic_results$model) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a heatmap of the topic distribution across documents.
plot_topic_heatmap(model_output)
plot_topic_heatmap(model_output)
model_output |
The output from model_topics function |
A ggplot object representing the topic distribution heatmap.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_heatmap(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_heatmap(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a bar plot of the overall prevalence of each topic.
plot_topic_prevalence(model_output)
plot_topic_prevalence(model_output)
model_output |
The output from model_topics function |
A ggplot object representing the bar plot of topic prevalence.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_prevalence(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_prevalence(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a bar plot of the top terms for each topic.
plot_topic_terms(model_output, n = 10)
plot_topic_terms(model_output, n = 10)
model_output |
The output from model_topics function |
n |
The number of top terms to visualize for each topic |
A ggplot object representing the bar plot of top terms for each topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize top terms for each topic plot_topic_terms(topic_results, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize top terms for each topic plot_topic_terms(topic_results, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
This function preprocesses the review text by optionally filtering non-English reviews, removing punctuation, converting to lowercase, removing stopwords, and stemming.
preprocess_reviews(reviews, english_only = TRUE)
preprocess_reviews(reviews, english_only = TRUE)
reviews |
A data frame containing the scraped reviews |
english_only |
A logical value indicating whether to filter out non-English reviews. Default is TRUE |
A list containing the following elements:
corpus
: The preprocessed corpus object.
dtm
: The document-term matrix.
filtered_reviews
: The filtered reviews data frame.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Print the document-term matrix print(preprocessed$dtm) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Print the document-term matrix print(preprocessed$dtm) # Clean up: remove the temporary file file.remove(temp_file)
Replace special characters and remove non-ASCII characters
replace_special_chars(x)
replace_special_chars(x)
x |
A character vector |
A character vector with special characters replaced and non-ASCII characters removed
This function scrapes details of books using their IDs from Goodreads.
scrape_books(book_ids_path, use_parallel = FALSE, num_cores = 4)
scrape_books(book_ids_path, use_parallel = FALSE, num_cores = 4)
book_ids_path |
Path to a text file containing book IDs. |
use_parallel |
Logical indicating whether to scrape in parallel (default is FALSE). |
num_cores |
Number of CPU cores to use for parallel scraping (default is 4). |
A data frame containing scraped book details.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small delay to avoid overwhelming the server) result <- scrape_books(temp_file, use_parallel = FALSE) print(head(result)) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small delay to avoid overwhelming the server) result <- scrape_books(temp_file, use_parallel = FALSE) print(head(result)) # Clean up: remove the temporary file file.remove(temp_file)
This function scrapes book reviews from Goodreads based on provided book IDs.
scrape_reviews( book_ids_path, num_reviews = 30, use_parallel = FALSE, num_cores = 4 )
scrape_reviews( book_ids_path, num_reviews = 30, use_parallel = FALSE, num_cores = 4 )
book_ids_path |
A character string specifying the path to a file containing book IDs. |
num_reviews |
An integer specifying the number of reviews to scrape per book. Default is 30. |
use_parallel |
A logical value indicating whether to use parallel processing. Default is FALSE. |
num_cores |
An integer specifying the number of cores to use for parallel processing. Default is 4. |
A data frame containing scraped review information.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small number of reviews to keep the example quick) reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) print(head(reviews)) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small number of reviews to keep the example quick) reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) print(head(reviews)) # Clean up: remove the temporary file file.remove(temp_file)
This function searches books on Goodreads.
search_goodreads( search_term, search_in = c("title", "author"), num_books = 10, sort_by = "ratings" )
search_goodreads( search_term, search_in = c("title", "author"), num_books = 10, sort_by = "ratings" )
search_term |
A search term string. |
search_in |
Where to search (e.g., "title", "author"). |
num_books |
Number of books to return. |
sort_by |
How to sort the results (e.g., "ratings", "published_year"). |
A data frame of search results.
search_goodreads("parenting", search_in = "title", num_books = 2)
search_goodreads("parenting", search_in = "title", num_books = 2)
This function creates a histogram of sentiment scores for all reviews.
sentiment_histogram(sentiment_df)
sentiment_histogram(sentiment_df)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
A ggplot object representing the histogram.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores sentiment_hist <- sentiment_histogram(sentiment_results) # Display the plot print(sentiment_hist) # Optionally, save the plot # ggsave("sentiment_hist.png", sentiment_hist, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment histogram.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores sentiment_hist <- sentiment_histogram(sentiment_results) # Display the plot print(sentiment_hist) # Optionally, save the plot # ggsave("sentiment_hist.png", sentiment_hist, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment histogram.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function plots the average sentiment score over time.
sentiment_trend(sentiment_df, time_period = "month", show_smooth_trend = FALSE)
sentiment_trend(sentiment_df, time_period = "month", show_smooth_trend = FALSE)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
time_period |
A string specifying the time period for grouping ("day", "week", "month", "year"). |
show_smooth_trend |
A logical value indicating whether to show the overall smooth trend line (default: TRUE). |
A ggplot object representing the sentiment trend.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores senti_trend <- sentiment_trend(sentiment_results) # Display the plot print(senti_trend) # Optionally, save the plot # ggsave("senti_trend.png", senti_trend, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment trend\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores senti_trend <- sentiment_trend(sentiment_results) # Display the plot print(senti_trend) # Optionally, save the plot # ggsave("senti_trend.png", senti_trend, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment trend\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function extracts the top terms for each topic in the LDA model and optionally prints them.
top_terms(lda_model, n = 10, verbose = TRUE)
top_terms(lda_model, n = 10, verbose = TRUE)
lda_model |
An LDA model |
n |
The number of top terms to extract for each topic |
verbose |
Logical; if TRUE, print the top terms to the console (default is TRUE) |
A list of character vectors, each containing the top terms for a topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print top terms top_terms(lda_model, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print top terms top_terms(lda_model, n = 5) # Clean up: remove the temporary file file.remove(temp_file)