Languages
Frequency of words identified in text (EM13LGG703, EM13LGG704, EM13LGG705)
# Install the required packages (if they are not already installed)
if (!requireNamespace("plotly", quietly = TRUE)) install.packages("plotly")
if (!requireNamespace("tm", quietly = TRUE)) install.packages("tm")
if (!requireNamespace("wordcloud", quietly = TRUE)) install.packages("wordcloud")
# Load packages
library(plotly)
library(tm)
# Example text (can be replaced by another text or set of texts)
<- c(
text "Reading is essential for understanding the world.",
"Reading broadens horizons and awakens creativity.",
"Language and creativity connect us and transform knowledge into something accessible."
)
# Create a corpus
<- Corpus(VectorSource(text))
corpus
# Text preprocessing
<- tm_map(corpus, content_transformer(tolower)) # Convert to lowercase letters
corpus <- tm_map(corpus, removePunctuation) # Remove punctuation
corpus <- tm_map(corpus, removeNumbers) # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("pt")) # Remove stopwords in Portuguese
corpus
# Create a term-document matrix
<- TermDocumentMatrix(corpus)
tdm <- as.matrix(tdm)
matrix
# Sum the word frequencies
<- sort(rowSums(matrix), decreasing = TRUE)
frequencies <- data.frame(
frequency_data Word = names(frequencies),
Frequency = frequencies
)
# Create an interactive graph with Plotly
<- plot_ly(
graph
data_frequencies,x = ~Word,
y = ~Frequency,
type = "bar",
text = ~paste("Frequency:", Frequency),
hoverinfo = "text"
%>%
) layout(
title = "Frequency of Words in Text",
xaxis = list(title = "Words"),
yaxis = list(title = "Frequency"),
showlegend = FALSE
)
# Display the interactive graph
graph
Suggestions:
Try modifying the graph, using/replacing the commands below in the code snippet:
# Addition right after hoverinfo, changes the colors
= list(color = ~Frequency, colorscale = "Blues") # Color scale "Blues" marker