Daten einlesen

load("token_per_group.RData")

verschiedene Gruppen finden

unique_groups<-unique(trimws(unlist(stringr::str_split(string = data_token_per_group$meta,pattern = "/", simplify = F)),which = "both")) 
groups_per_document<-trimws(stringr::str_split(string = data_token_per_group$meta,pattern = "/", simplify = T),which="both")
group_data<-data.frame(name=unique_groups,size=rep(0,length(unique_groups)),mean=rep(0,length(unique_groups)),sd=rep(0,length(unique_groups)))
group_values<-list()
# get token values per group
count=0
for( group in unique_groups){
  count=count+1
  relevant_documents<-which(apply(X = groups_per_document,MARGIN = 1,FUN = function(x){
    group%in%x
  })
  )
  group_values[[count]]<-data_token_per_group[relevant_documents,"meta2"]
}

group_data$size<-unlist(lapply(X = group_values,FUN = length))

group_data$mean<-unlist(lapply(X = group_values,FUN = mean))

group_data$sd<-unlist(lapply(X = group_values,FUN = sd))

group_data
##                     name size     mean        sd
## 1            G77 + China  121 4879.198 3242.8325
## 2                   SIDS   39 4195.897 3674.1891
## 3                  AOSIS   39 4199.641 3671.4444
## 4                   CfRN   51 4623.784 2689.8547
## 5                   ALBA   11 4618.182 2980.9172
## 6     Cartagena Dialogue   29 3406.621 4049.3198
## 7          African Group   48 5396.771 2790.1263
## 8           Francophonie   47 4380.064 2902.5631
## 9  League of Arab States   17 4611.529 3155.3483
## 10                   LDC   44 5460.727 2850.3638
## 11        Umbrella Group    8 1385.750  766.9283
## 12                 AILAC    7 4649.714 2006.1734
## 13                 BASIC    4 5948.500 3723.5473
## 14                  LMDC   23 5615.174 3064.6980
## 15                  LLDC   29 4210.655 2879.4418
## 16                    NA    9 1447.667  895.1723
## 17                   EIG    5 1923.000  848.1489
## 18                  OPEC   10 3993.800 3226.1970
## 19                    EU   28  941.000    0.0000
## 20                 CACAM    6 3523.500 4574.0541

erstelle Plot

library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fig<-plot_ly(type="box")
for( i in 1:length(unique_groups)){
  fig<-add_boxplot(p = fig,y = group_values[[i]],name=paste0(group_data$name[i]," (",group_data$size[i],")"))
}
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.