Get cooccurrence statistics.
Get cooccurrence statistics.
cooccurrences(.Object, ...) ## S4 method for signature 'corpus' cooccurrences( .Object, query, cqp = is.cqp, p_attribute = getOption("polmineR.p_attribute"), boundary = NULL, left = getOption("polmineR.left"), right = getOption("polmineR.right"), stoplist = NULL, positivelist = NULL, regex = FALSE, keep = NULL, cpos = NULL, method = "ll", mc = getOption("polmineR.mc"), verbose = FALSE, progress = FALSE, ... ) ## S4 method for signature 'character' cooccurrences( .Object, query, cqp = is.cqp, p_attribute = getOption("polmineR.p_attribute"), boundary = NULL, left = getOption("polmineR.left"), right = getOption("polmineR.right"), stoplist = NULL, positivelist = NULL, regex = FALSE, keep = NULL, cpos = NULL, method = "ll", mc = getOption("polmineR.mc"), verbose = FALSE, progress = FALSE, ... ) ## S4 method for signature 'slice' cooccurrences( .Object, query, cqp = is.cqp, left = getOption("polmineR.left"), right = getOption("polmineR.right"), p_attribute = getOption("polmineR.p_attribute"), boundary = NULL, stoplist = NULL, positivelist = NULL, keep = NULL, method = "ll", mc = FALSE, progress = TRUE, verbose = FALSE, ... ) ## S4 method for signature 'partition' cooccurrences( .Object, query, cqp = is.cqp, left = getOption("polmineR.left"), right = getOption("polmineR.right"), p_attribute = getOption("polmineR.p_attribute"), boundary = NULL, stoplist = NULL, positivelist = NULL, keep = NULL, method = "ll", mc = FALSE, progress = TRUE, verbose = FALSE, ... ) ## S4 method for signature 'subcorpus' cooccurrences( .Object, query, cqp = is.cqp, left = getOption("polmineR.left"), right = getOption("polmineR.right"), p_attribute = getOption("polmineR.p_attribute"), boundary = NULL, stoplist = NULL, positivelist = NULL, keep = NULL, method = "ll", mc = FALSE, progress = TRUE, verbose = FALSE, ... ) ## S4 method for signature 'context' cooccurrences(.Object, method = "ll", verbose = FALSE) ## S4 method for signature 'partition_bundle' cooccurrences(.Object, query, mc = getOption("polmineR.mc"), ...) ## S4 method for signature 'Cooccurrences' cooccurrences(.Object, query) ## S4 method for signature 'remote_corpus' cooccurrences(.Object, ...) ## S4 method for signature 'remote_subcorpus' cooccurrences(.Object, ...)
.Object |
A |
... |
Further parameters that will be passed into bigmatrix (applies only of big = TRUE). |
query |
A query, either a character vector to match a token, or a CQP query. |
cqp |
Defaults to |
p_attribute |
The p-attribute of the tokens/the query. |
boundary |
If provided, it will be checked that the corpus positions of windows do not extend beyond the left and right boundaries of the region defined by the s-attribute where the match occurs. |
left |
Number of tokens to the left of the query match. |
right |
Number of tokens to the right of the query match. |
stoplist |
Exclude a query hit from analysis if stopword(s) is/are in
context (relevant only if query is not |
positivelist |
Character vector or numeric vector: include a query hit
only if token in |
regex |
A |
keep |
list with tokens to keep |
cpos |
integer vector with corpus positions, defaults to NULL - then the corpus positions for the whole corpus will be used |
method |
The statistical test(s) to use (defaults to "ll"). |
mc |
whether to use multicore |
verbose |
A |
progress |
A |
a cooccurrences-class object
Andreas Blaette
Baker, Paul (2006): Using Corpora in Discourse Analysis. London: continuum, p. 95-120 (ch. 5).
Manning, Christopher D.; Schuetze, Hinrich (1999): Foundations of Statistical Natural Language Processing. MIT Press: Cambridge, Mass., pp. 151-189 (ch. 5).
See the documentation for the ll
-method for an
explanation of the computation of the log-likelihood statistic.
use("polmineR") merkel <- partition("GERMAPARLMINI", interjection = "speech", speaker = ".*Merkel", regex = TRUE) merkel <- enrich(merkel, p_attribute = "word") cooc <- cooccurrences(merkel, query = "Deutschland") # use subset-method to filter results a <- cooccurrences("REUTERS", query = "oil") b <- subset(a, !is.na(ll)) c <- subset(b, !word %in% tm::stopwords("en")) d <- subset(c, count_coi >= 5) e <- subset(c, ll >= 10.83) format(e) # using pipe operator may be convenient if (require(magrittr)){ cooccurrences("REUTERS", query = "oil") %>% subset(!is.na(ll)) %>% subset(!word %in% tm::stopwords("en")) %>% subset(count_coi >= 5) %>% subset(ll >= 10.83) %>% format() } pb <- partition_bundle("GERMAPARLMINI", s_attribute = "speaker") pb_min <- pb[[ count(pb, query = "Deutschland")[Deutschland >= 25][["partition"]] ]] y <- cooccurrences(pb_min, query = "Deutschland") if (interactive()) y[[1]] if (interactive()) y[[2]] y2 <- corpus("GERMAPARLMINI") %>% subset(speaker %in% c("Hubertus Heil", "Angela Dorothea Merkel")) %>% split(s_attribute = "speaker") %>% cooccurrences(query = "Deutschland")
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.