Daten einlesen
load("token_per_group.RData")
verschiedene Gruppen finden
unique_groups<-unique(trimws(unlist(stringr::str_split(string = data_token_per_group$meta,pattern = "/", simplify = F)),which = "both"))
groups_per_document<-trimws(stringr::str_split(string = data_token_per_group$meta,pattern = "/", simplify = T),which="both")
group_data<-data.frame(name=unique_groups,size=rep(0,length(unique_groups)),mean=rep(0,length(unique_groups)),sd=rep(0,length(unique_groups)))
group_values<-list()
# get token values per group
count=0
for( group in unique_groups){
count=count+1
relevant_documents<-which(apply(X = groups_per_document,MARGIN = 1,FUN = function(x){
group%in%x
})
)
group_values[[count]]<-data_token_per_group[relevant_documents,"meta2"]
}
group_data$size<-unlist(lapply(X = group_values,FUN = length))
group_data$mean<-unlist(lapply(X = group_values,FUN = mean))
group_data$sd<-unlist(lapply(X = group_values,FUN = sd))
group_data
## name size mean sd
## 1 G77 + China 121 4879.198 3242.8325
## 2 SIDS 39 4195.897 3674.1891
## 3 AOSIS 39 4199.641 3671.4444
## 4 CfRN 51 4623.784 2689.8547
## 5 ALBA 11 4618.182 2980.9172
## 6 Cartagena Dialogue 29 3406.621 4049.3198
## 7 African Group 48 5396.771 2790.1263
## 8 Francophonie 47 4380.064 2902.5631
## 9 League of Arab States 17 4611.529 3155.3483
## 10 LDC 44 5460.727 2850.3638
## 11 Umbrella Group 8 1385.750 766.9283
## 12 AILAC 7 4649.714 2006.1734
## 13 BASIC 4 5948.500 3723.5473
## 14 LMDC 23 5615.174 3064.6980
## 15 LLDC 29 4210.655 2879.4418
## 16 NA 9 1447.667 895.1723
## 17 EIG 5 1923.000 848.1489
## 18 OPEC 10 3993.800 3226.1970
## 19 EU 28 941.000 0.0000
## 20 CACAM 6 3523.500 4574.0541
erstelle Plot
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig<-plot_ly(type="box")
for( i in 1:length(unique_groups)){
fig<-add_boxplot(p = fig,y = group_values[[i]],name=paste0(group_data$name[i]," (",group_data$size[i],")"))
}
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.