This function takes the output from scrape_reviews and performs sentiment analysis, including basic negation scope detection.
analyze_sentiment(reviews_df, lexicon = "afinn")
analyze_sentiment(reviews_df, lexicon = "afinn")
reviews_df |
A data frame containing the output from scrape_reviews. |
lexicon |
The sentiment lexicon to use. Options are "afinn", "bing", or "nrc". |
A data frame with sentiment scores for each review.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Display the first few rows of the results print(head(sentiment_results)) } else { cat("No reviews found. Cannot perform sentiment analysis.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Display the first few rows of the results print(head(sentiment_results)) } else { cat("No reviews found. Cannot perform sentiment analysis.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function calculates the average sentiment score for each book.
average_book_sentiment(sentiment_df)
average_book_sentiment(sentiment_df)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
A data frame with average sentiment scores for each book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Calculate average sentiment score per book avg_senti <- average_book_sentiment(sentiment_results) # Display the results print(avg_senti) } else { cat("No reviews found. Cannot calculate average sentiment.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Calculate average sentiment score per book avg_senti <- average_book_sentiment(sentiment_results) # Display the results print(avg_senti) } else { cat("No reviews found. Cannot calculate average sentiment.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function performs LDA topic modeling on the preprocessed reviews.
fit_lda(dtm, k, method = "Gibbs")
fit_lda(dtm, k, method = "Gibbs")
dtm |
A document-term matrix |
k |
The number of topics to extract |
method |
The method to use for fitting the model (default: Gibbs) |
An LDA model
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print model summary print(lda_model) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print model summary print(lda_model) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a word cloud for each topic.
gen_topic_clouds(model_output, n = 50)
gen_topic_clouds(model_output, n = 50)
model_output |
The output from model_topics function |
n |
The number of top terms to include in the word cloud |
A list of ggplot objects, where each element represents a word cloud for a topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 30, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 3, num_terms = 50, english_only = TRUE) # Generate word clouds for each topic wordcloud_plots <- gen_topic_clouds(topic_results, n = 20) # Display the word cloud for the first topic if (interactive()) { print(wordcloud_plots[[1]]) } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 30, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 3, num_terms = 50, english_only = TRUE) # Generate word clouds for each topic wordcloud_plots <- gen_topic_clouds(topic_results, n = 20) # Display the word cloud for the first topic if (interactive()) { print(wordcloud_plots[[1]]) } # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the author information for each book.
get_author_info(file_path)
get_author_info(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the author information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function author_info <- get_author_info(temp_file) print(author_info) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function author_info <- get_author_info(temp_file) print(author_info) # Clean up: remove the temporary file file.remove(temp_file)
This function retrieves the book_id values from the input_data and saves them to a specified text file.
get_book_ids(input_data, file_name)
get_book_ids(input_data, file_name)
input_data |
A data frame containing a column named book_id. |
file_name |
A string specifying the name of the text file to save the book_id values. |
No return value, the function writes the book_id values to a text file.
# Create sample data books <- data.frame(title = c("Hamlet", "The Hunger Games", "Jane Eyre"), book_id = c("1420", "2767052", "10210") ) # Create a temporary file path temp_file <- file.path(tempdir(), "bookids.txt") # Run the function get_book_ids(books, temp_file) # Clean up: remove the temporary file file.remove(temp_file)
# Create sample data books <- data.frame(title = c("Hamlet", "The Hunger Games", "Jane Eyre"), book_id = c("1420", "2767052", "10210") ) # Create a temporary file path temp_file <- file.path(tempdir(), "bookids.txt") # Run the function get_book_ids(books, temp_file) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the summary for each book.
get_book_summary(file_path)
get_book_summary(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the summary for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function book_summary <- get_book_summary(temp_file) print(book_summary) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function book_summary <- get_book_summary(temp_file) print(book_summary) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the format information for each book.
get_format_info(file_path)
get_format_info(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the format information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function format_info <- get_format_info(temp_file) print(format_info) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function format_info <- get_format_info(temp_file) print(format_info) # Clean up: remove the temporary file file.remove(temp_file)
This function reads book IDs from a file, fetches the corresponding Goodreads pages, and extracts the genres for each book.
get_genres(file_path)
get_genres(file_path)
file_path |
A character string specifying the path to the file containing book IDs. |
A named list where each element corresponds to a book ID and contains a character vector of genres for that book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function genres <- get_genres(temp_file) # Display the results print(genres) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function genres <- get_genres(temp_file) # Display the results print(genres) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the number of pages for each book.
get_num_pages(file_path)
get_num_pages(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the number of pages for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function num_pages <- get_num_pages(temp_file) print(num_pages) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function num_pages <- get_num_pages(temp_file) print(num_pages) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the published time for each book.
get_published_time(file_path)
get_published_time(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the book information for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function published_times <- get_published_time(temp_file) print(published_times) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function published_times <- get_published_time(temp_file) print(published_times) # Clean up: remove the temporary file file.remove(temp_file)
This function takes a file path containing Goodreads book IDs and retrieves the rating distribution for each book.
get_rating_distribution(file_path)
get_rating_distribution(file_path)
file_path |
A character string specifying the path to the file containing Goodreads book IDs. |
A named list where each element contains the rating distribution for a book.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function rating_distributions <- get_rating_distribution(temp_file) print(rating_distributions) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function rating_distributions <- get_rating_distribution(temp_file) print(rating_distributions) # Clean up: remove the temporary file file.remove(temp_file)
This function takes the output from scrape_reviews, preprocesses the data, performs topic modeling, and prints the results.
model_topics(reviews, num_topics = 3, num_terms = 10, english_only = TRUE)
model_topics(reviews, num_topics = 3, num_terms = 10, english_only = TRUE)
reviews |
A data frame containing the scraped reviews |
num_topics |
The number of topics to extract |
num_terms |
The number of top terms to display for each topic |
english_only |
A logical value indicating whether to filter out non-English reviews. Default is FALSE. |
A list containing the following elements:
model
: The fitted LDA model object.
filtered_reviews
: The preprocessed and filtered reviews data frame.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Print model summary print(topic_results$model) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Print model summary print(topic_results$model) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a heatmap of the topic distribution across documents.
plot_topic_heatmap(model_output)
plot_topic_heatmap(model_output)
model_output |
The output from model_topics function |
A ggplot object representing the topic distribution heatmap.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_heatmap(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_heatmap(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a bar plot of the overall prevalence of each topic.
plot_topic_prevalence(model_output)
plot_topic_prevalence(model_output)
model_output |
The output from model_topics function |
A ggplot object representing the bar plot of topic prevalence.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_prevalence(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize topic distribution plot_topic_prevalence(topic_results) # Clean up: remove the temporary file file.remove(temp_file)
This function creates a bar plot of the top terms for each topic.
plot_topic_terms(model_output, n = 10)
plot_topic_terms(model_output, n = 10)
model_output |
The output from model_topics function |
n |
The number of top terms to visualize for each topic |
A ggplot object representing the bar plot of top terms for each topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize top terms for each topic plot_topic_terms(topic_results, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Model topics topic_results <- model_topics(reviews, num_topics = 2, num_terms = 5, english_only = TRUE) # Visualize top terms for each topic plot_topic_terms(topic_results, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
This function preprocesses the review text by optionally filtering non-English reviews, removing punctuation, converting to lowercase, removing stopwords, and stemming.
preprocess_reviews(reviews, english_only = TRUE)
preprocess_reviews(reviews, english_only = TRUE)
reviews |
A data frame containing the scraped reviews |
english_only |
A logical value indicating whether to filter out non-English reviews. Default is TRUE |
A list containing the following elements:
corpus
: The preprocessed corpus object.
dtm
: The document-term matrix.
filtered_reviews
: The filtered reviews data frame.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Print the document-term matrix print(preprocessed$dtm) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Print the document-term matrix print(preprocessed$dtm) # Clean up: remove the temporary file file.remove(temp_file)
Replace special characters and remove non-ASCII characters
replace_special_chars(x)
replace_special_chars(x)
x |
A character vector |
A character vector with special characters replaced and non-ASCII characters removed
This function scrapes details of books using their IDs from Goodreads.
scrape_books(book_ids_path, use_parallel = FALSE, num_cores = 4)
scrape_books(book_ids_path, use_parallel = FALSE, num_cores = 4)
book_ids_path |
Path to a text file containing book IDs. |
use_parallel |
Logical indicating whether to scrape in parallel (default is FALSE). |
num_cores |
Number of CPU cores to use for parallel scraping (default is 4). |
A data frame containing scraped book details.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small delay to avoid overwhelming the server) result <- scrape_books(temp_file, use_parallel = FALSE) print(head(result)) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small delay to avoid overwhelming the server) result <- scrape_books(temp_file, use_parallel = FALSE) print(head(result)) # Clean up: remove the temporary file file.remove(temp_file)
This function scrapes book reviews from Goodreads based on provided book IDs.
scrape_reviews( book_ids_path, num_reviews = 30, use_parallel = FALSE, num_cores = 4 )
scrape_reviews( book_ids_path, num_reviews = 30, use_parallel = FALSE, num_cores = 4 )
book_ids_path |
A character string specifying the path to a file containing book IDs. |
num_reviews |
An integer specifying the number of reviews to scrape per book. Default is 30. |
use_parallel |
A logical value indicating whether to use parallel processing. Default is FALSE. |
num_cores |
An integer specifying the number of cores to use for parallel processing. Default is 4. |
A data frame containing scraped review information.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small number of reviews to keep the example quick) reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) print(head(reviews)) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the function (with a small number of reviews to keep the example quick) reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) print(head(reviews)) # Clean up: remove the temporary file file.remove(temp_file)
This function searches books on Goodreads.
search_goodreads( search_term, search_in = c("title", "author"), num_books = 10, sort_by = "ratings" )
search_goodreads( search_term, search_in = c("title", "author"), num_books = 10, sort_by = "ratings" )
search_term |
A search term string. |
search_in |
Where to search (e.g., "title", "author"). |
num_books |
Number of books to return. |
sort_by |
How to sort the results (e.g., "ratings", "published_year"). |
A data frame of search results.
search_goodreads("parenting", search_in = "title", num_books = 2)
search_goodreads("parenting", search_in = "title", num_books = 2)
This function creates a histogram of sentiment scores for all reviews.
sentiment_histogram(sentiment_df)
sentiment_histogram(sentiment_df)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
A ggplot object representing the histogram.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores sentiment_hist <- sentiment_histogram(sentiment_results) # Display the plot print(sentiment_hist) # Optionally, save the plot # ggsave("sentiment_hist.png", sentiment_hist, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment histogram.\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores sentiment_hist <- sentiment_histogram(sentiment_results) # Display the plot print(sentiment_hist) # Optionally, save the plot # ggsave("sentiment_hist.png", sentiment_hist, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment histogram.\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function plots the average sentiment score over time.
sentiment_trend(sentiment_df, time_period = "month", show_smooth_trend = FALSE)
sentiment_trend(sentiment_df, time_period = "month", show_smooth_trend = FALSE)
sentiment_df |
A data frame containing the output from analyze_sentiment. |
time_period |
A string specifying the time period for grouping ("day", "week", "month", "year"). |
show_smooth_trend |
A logical value indicating whether to show the overall smooth trend line (default: TRUE). |
A ggplot object representing the sentiment trend.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores senti_trend <- sentiment_trend(sentiment_results) # Display the plot print(senti_trend) # Optionally, save the plot # ggsave("senti_trend.png", senti_trend, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment trend\n") } # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Run the scrape_reviews function reviews <- scrape_reviews(temp_file, num_reviews = 10, use_parallel = FALSE) # Check if reviews were successfully scraped if (nrow(reviews) > 0) { # Perform sentiment analysis sentiment_results <- analyze_sentiment(reviews, lexicon = "afinn") # Create histogram of sentiment scores senti_trend <- sentiment_trend(sentiment_results) # Display the plot print(senti_trend) # Optionally, save the plot # ggsave("senti_trend.png", senti_trend, width = 8, height = 6) } else { cat("No reviews found. Cannot create sentiment trend\n") } # Clean up: remove the temporary file file.remove(temp_file)
This function extracts the top terms for each topic in the LDA model and optionally prints them.
top_terms(lda_model, n = 10, verbose = TRUE)
top_terms(lda_model, n = 10, verbose = TRUE)
lda_model |
An LDA model |
n |
The number of top terms to extract for each topic |
verbose |
Logical; if TRUE, print the top terms to the console (default is TRUE) |
A list of character vectors, each containing the top terms for a topic.
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print top terms top_terms(lda_model, n = 5) # Clean up: remove the temporary file file.remove(temp_file)
# Create a temporary file with sample book IDs temp_file <- tempfile(fileext = ".txt") writeLines(c("1420", "2767052", "10210"), temp_file) # Scrape reviews reviews <- scrape_reviews(temp_file, num_reviews = 5, use_parallel = FALSE) # Preprocess the reviews preprocessed <- preprocess_reviews(reviews, english_only = TRUE) # Fit LDA model lda_model <- fit_lda(preprocessed$dtm, k = 2) # Print top terms top_terms(lda_model, n = 5) # Clean up: remove the temporary file file.remove(temp_file)