Für diesen Test beziehe ich mich auf das Paket ldatuning. Des weiteren verwende ich die DTM’s der 4 Szenarien:
library(ldatuning)
library(topicmodels)
load("~/iLCM/optimal_number_of_topics/dtm_EU_1.RData")
result1 <- FindTopicsNumber(
dtm,
topics = c(8,9,10,11,12,13,14,15,16,17,18,19,20,25,30,40,50,75,100),
metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
method = "Gibbs",
control = list(seed = 77),
mc.cores = 12L,
verbose = TRUE
)
## fit models... done.
## calculate metrics:
## Griffiths2004... done.
## CaoJuan2009... done.
## Arun2010... done.
## Deveaud2014... done.
FindTopicsNumber_plot(result1)
result1
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 8 -2578448 0.3483345 142.60582 1.6450391
## 2 9 -2567885 0.3491809 138.07401 1.6653001
## 3 10 -2557559 0.3271429 135.32985 1.6861334
## 4 11 -2539703 0.3157360 131.71314 1.7310469
## 5 12 -2536926 0.3212529 129.22437 1.7109810
## 6 13 -2534768 0.3119351 126.04527 1.7110147
## 7 14 -2525872 0.2977491 123.75780 1.7383053
## 8 15 -2504248 0.2709931 118.51581 1.7770629
## 9 16 -2518020 0.2878266 119.55910 1.7201098
## 10 17 -2506852 0.2664217 116.89605 1.7424904
## 11 18 -2491143 0.2513951 114.12443 1.7919701
## 12 19 -2490129 0.2356003 111.61893 1.7845710
## 13 20 -2494463 0.2501677 111.66916 1.7505247
## 14 25 -2491149 0.2083799 105.15811 1.7339959
## 15 30 -2487867 0.1743341 103.32189 1.7135715
## 16 40 -2469930 0.1316377 98.37042 1.6275296
## 17 50 -2477016 0.1120251 101.07534 1.5001233
## 18 75 -2505516 0.1115046 105.14446 1.1567538
## 19 100 -2497529 0.1504475 130.71397 0.9051722
load("~/iLCM/optimal_number_of_topics/dtm_EU_29.RData")
result2 <- FindTopicsNumber(
dtm,
topics = c(8,9,10,11,12,13,14,15,16,17,18,19,20,25,30,40,50,75,100),
metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
method = "Gibbs",
control = list(seed = 77),
mc.cores = 12L,
verbose = TRUE
)
## fit models... done.
## calculate metrics:
## Griffiths2004... done.
## CaoJuan2009... done.
## Arun2010... done.
## Deveaud2014... done.
FindTopicsNumber_plot(result2)
result2
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 8 -2669101 0.3510717 150.63222 1.7246833
## 2 9 -2652165 0.3441926 143.86431 1.7168540
## 3 10 -2638264 0.3249980 140.34440 1.7620584
## 4 11 -2633163 0.3292602 137.30970 1.7354314
## 5 12 -2615991 0.3132699 129.85475 1.7723625
## 6 13 -2622020 0.3210556 132.28369 1.7303839
## 7 14 -2608078 0.2990217 128.00876 1.7656318
## 8 15 -2600355 0.2975373 124.57399 1.7660026
## 9 16 -2594511 0.2890138 122.01523 1.7778759
## 10 17 -2580810 0.2698499 118.47510 1.7948530
## 11 18 -2577528 0.2577908 118.65063 1.8027502
## 12 19 -2579169 0.2561881 117.32870 1.7897608
## 13 20 -2569520 0.2397683 117.41895 1.7977829
## 14 25 -2547902 0.1877694 108.96369 1.8187363
## 15 30 -2548852 0.1659741 108.23893 1.7623446
## 16 40 -2536191 0.1211090 97.26519 1.6685254
## 17 50 -2553371 0.1140594 104.53619 1.5177730
## 18 75 -2563561 0.1129723 113.02479 1.1678359
## 19 100 -2572755 0.1513491 122.23374 0.9354812
load("~/iLCM/optimal_number_of_topics/dtm_PP_EU_1.RData")
result3 <- FindTopicsNumber(
dtm,
topics = c(8,9,10,11,12,13,14,15,16,17,18,19,20,25,30,40,50,75,100),
metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
method = "Gibbs",
control = list(seed = 77),
mc.cores = 12L,
verbose = TRUE
)
## fit models... done.
## calculate metrics:
## Griffiths2004... done.
## CaoJuan2009... done.
## Arun2010... done.
## Deveaud2014... done.
FindTopicsNumber_plot(result3)
result3
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 8 -2484298 0.15572604 340.6293 2.354542
## 2 9 -2467714 0.16782735 332.3515 2.364057
## 3 10 -2458607 0.16992081 324.6557 2.337016
## 4 11 -2442756 0.16473384 318.2359 2.335298
## 5 12 -2425210 0.15403512 311.5766 2.332853
## 6 13 -2423400 0.15806624 305.4414 2.310756
## 7 14 -2411878 0.14679393 295.0657 2.339800
## 8 15 -2392401 0.13843689 287.8407 2.346362
## 9 16 -2381168 0.14007974 283.6461 2.346401
## 10 17 -2385534 0.14624828 280.4679 2.299738
## 11 18 -2381690 0.14431579 275.9350 2.304281
## 12 19 -2372159 0.14120205 271.2188 2.288635
## 13 20 -2372269 0.14232136 268.6370 2.258188
## 14 25 -2349081 0.12949915 248.1210 2.249515
## 15 30 -2343793 0.13401858 237.4217 2.172150
## 16 40 -2329563 0.11425515 217.8728 2.095942
## 17 50 -2323176 0.09739261 196.9080 2.020757
## 18 75 -2357699 0.07774345 207.6789 1.647673
## 19 100 -2387462 0.08593159 223.9873 1.292861
load("~/iLCM/optimal_number_of_topics/dtm_PP_EU_29.RData")
result4 <- FindTopicsNumber(
dtm,
topics = c(8,9,10,11,12,13,14,15,16,17,18,19,20,25,30,40,50,75,100),
metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
method = "Gibbs",
control = list(seed = 77),
mc.cores = 12L,
verbose = TRUE
)
## fit models... done.
## calculate metrics:
## Griffiths2004... done.
## CaoJuan2009... done.
## Arun2010... done.
## Deveaud2014... done.
FindTopicsNumber_plot(result4)
result4
## topics Griffiths2004 CaoJuan2009 Arun2010 Deveaud2014
## 1 8 -2591273 0.20335775 361.7873 2.294121
## 2 9 -2565141 0.16438646 346.3879 2.350113
## 3 10 -2541569 0.16121787 338.6469 2.371165
## 4 11 -2525893 0.13998974 327.1124 2.396232
## 5 12 -2504476 0.14943310 324.7737 2.371800
## 6 13 -2497992 0.14487503 316.3425 2.368529
## 7 14 -2497520 0.15414063 311.5686 2.323471
## 8 15 -2481643 0.15020760 305.4088 2.336152
## 9 16 -2486037 0.14991339 300.6334 2.318803
## 10 17 -2475599 0.15937725 297.8200 2.289480
## 11 18 -2455157 0.13676020 289.3816 2.369950
## 12 19 -2462938 0.14592057 287.0762 2.299454
## 13 20 -2459785 0.15104640 284.7150 2.296351
## 14 25 -2442544 0.13573676 265.4774 2.264474
## 15 30 -2421437 0.12649735 252.9074 2.227994
## 16 40 -2404089 0.10988331 229.5053 2.124785
## 17 50 -2412866 0.10339556 219.8442 2.013355
## 18 75 -2429295 0.07459367 210.0138 1.702282
## 19 100 -2473877 0.08993066 232.7439 1.301588