for loop
add image
execution time
barchart(ylimit, vertical bar chart, add labels, frequency analysis)
R Notebook
0. load data
for(i in 1:13){
load(file=paste0("D:/datasets/citations_data/tourism_satisfaction2021/M",i,".rda"))
}
1. create m_matrix: first line
m_matrix<-as.data.frame(t(c(1,"AU","M1")))
colnames(m_matrix)<-c("col_id","col_names","from_data")
2. loop for 13 sections
# mlist includes the names of the data frame
mlist<-as.data.frame(c('M1','M2','M3','M4','M5','M6','M7','M8','M9','M10','M11','M12','M13'))
for(i in 1:13){
temp<-get(mlist[i,1])
# temp is the dataframe
c1<-1:ncol(temp) # variable's position
c2<-colnames(temp) # variable's name
c3<-rep(mlist[i,1],ncol(temp)) # section's name
c123<-as.data.frame(cbind(c1,c2,c3))
colnames(c123)<-c("col_id","col_names","from_data")
# rbind
m_matrix<-rbind(m_matrix,c123)
}
# delete the first row
m_matrix<-m_matrix[-1,]
3. frequency analysis
freq_variable<-as.data.frame(table(m_matrix[,2]))
# filter Freq < 13
freq_variable2<-subset(freq_variable,Freq<13)
4. plot of frequency
library(ggplot2)
Plot_variables<-ggplot(freq_variable2, aes(x = Var1, y = Freq)) +
geom_col(fill = "lightblue", colour = "grey60", binwidth=10) +
geom_density(colour="red") +
ylim(0,max(freq_variable2$Freq)*1.1)+
geom_text(aes(label = Freq), hjust = -0.5)+
coord_flip()
Warning: Ignoring unknown parameters: binwidth
Plot_variables
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning: Groups with fewer than two data points have been dropped.
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
5. variable_matrix
start_time <- Sys.time()
colid_matrix<-freq_variable
for(i in 1:nrow(m_matrix)){
# all 596 rows
for(j in 1:nrow(mlist)){
# j is for the 13 sections
for(m in 1:nrow(colid_matrix)){
# m is for the final matix: variables
if((colid_matrix[m,1]==m_matrix[i,2]) &(mlist[j,1]==m_matrix[i,3]) ){
colid_matrix[m,mlist[j,1]]<-m_matrix[i,1]
}
}
}
}
end_time <- Sys.time()
end_time - start_time
Time difference of 27.44249 secs