<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="FeedCreator 1.8" -->
<?xml-stylesheet href="http://wiki.korpus.cz/lib/exe/css.php?s=feed" type="text/css"?>
<rdf:RDF
    xmlns="http://purl.org/rss/1.0/"
    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
    xmlns:dc="http://purl.org/dc/elements/1.1/">
    <channel rdf:about="http://wiki.korpus.cz/feed.php">
        <title>Příručka ČNK - en:cnk</title>
        <description>Báze znalostí z korpusové lingvistiky</description>
        <link>http://wiki.korpus.cz/</link>
        <image rdf:resource="http://wiki.korpus.cz/lib/exe/fetch.php/wiki:dokuwiki.svg" />
       <dc:date>2026-04-30T08:11:47+00:00</dc:date>
        <items>
            <rdf:Seq>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:aibrown?rev=1760357437&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:aikoditex?rev=1773017289&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:anotacni_standard_cnk?rev=1768562529&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:aranea?rev=1775647826&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:baltischebriefe?rev=1731065001&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:bmk?rev=1445626963&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:capek?rev=1576761262&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:cep?rev=1576491883&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:citace?rev=1697210155&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:codit?rev=1617020283&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:czesl-man?rev=1605623347&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:czesl-plain?rev=1533639143&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt-basic?rev=1572547233&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt?rev=1605620641&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:dewac?rev=1445680154&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:diakorp?rev=1706800467&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:dialekt?rev=1641394903&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:dotko?rev=1695813968&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:eebo?rev=1748435803&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:etalon?rev=1622654065&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:fictree?rev=1513621472&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:frwac?rev=1445680414&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:fsc2000?rev=1445539282&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:hotko?rev=1614890864&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:intercorp?rev=1727772179&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:itwac?rev=1445680607&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:jazyky-v-migraci?rev=1688996030&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:jerome?rev=1445546471&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:kh-dopisy?rev=1623929100&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:kh-noviny?rev=1623929054&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:klasifikace_textu_syn2015?rev=1768559385&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:klaus?rev=1732194762&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:koditex?rev=1541085330&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:ksk-dopisy?rev=1677071601&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:ksp?rev=1761118776&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:lemtag_mluv?rev=1749209865&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:lestrepublicain?rev=1722756022&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:lindsei_cz?rev=1493299596&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:link?rev=1445678935&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:mda?rev=1598868062&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:net?rev=1613379524&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:nkjp?rev=1542035375&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:obc?rev=1612967951&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:online?rev=1671719107&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:onomos?rev=1752589264&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:oral?rev=1700480137&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:oral2006?rev=1445622087&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:oral2008?rev=1445621759&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:oral2013?rev=1512254138&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:orator?rev=1769165287&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:ortofon?rev=1769165234&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:orwell?rev=1445607601&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:parlcorp?rev=1622898345&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:pmk?rev=1445625683&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:psalm77?rev=1673955732&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:registrova_klasifikace?rev=1598976370&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:schola2010?rev=1588872531&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:skript2012?rev=1474895558&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:speeches?rev=1445618933&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn?rev=1769159264&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2000?rev=1481456308&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2005?rev=1481454710&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2006pub?rev=1615890322&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2009pub?rev=1615890397&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2010?rev=1481470049&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2013pub?rev=1615890599&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2015?rev=1598975168&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2020?rev=1654774618&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:syn2025?rev=1768816864&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:totalita?rev=1677160402&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:ukwac?rev=1445680813&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:uvod?rev=1769159976&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:veda?rev=1708890014&amp;do=diff"/>
                <rdf:li rdf:resource="http://wiki.korpus.cz/doku.php/en:cnk:vespa_cz?rev=1671651721&amp;do=diff"/>
            </rdf:Seq>
        </items>
    </channel>
    <image rdf:about="http://wiki.korpus.cz/lib/exe/fetch.php/wiki:dokuwiki.svg">
        <title>Příručka ČNK</title>
        <link>http://wiki.korpus.cz/</link>
        <url>http://wiki.korpus.cz/lib/exe/fetch.php/wiki:dokuwiki.svg</url>
    </image>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:aibrown?rev=1760357437&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-10-13T12:10:37+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>aibrown</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:aibrown?rev=1760357437&amp;do=diff</link>
        <description>AI-Brown

AI-Brown is a generated, annotated, multi-genre corpus of English texts produced by large language models (LLMs). 
     Positions  Number of positions (tokens)   27 661 454  Number of positions (excl. punctuation)   23 975 982  Number of word forms (excl. punctuation)</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:aikoditex?rev=1773017289&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-03-09T00:48:09+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>aikoditex</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:aikoditex?rev=1773017289&amp;do=diff</link>
        <description>AI-Koditex

AI-Koditex is a generated, annotated, multi-genre corpus of Czech texts produced by large language models (LLMs). 
     Positions  Number of positions (tokens)   24 030 795  Number of positions (excl. punctuation)   20 180 737  Number of word forms (excl. punctuation)</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:anotacni_standard_cnk?rev=1768562529&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-16T11:22:09+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>anotacni_standard_cnk</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:anotacni_standard_cnk?rev=1768562529&amp;do=diff</link>
        <description>Unified CNC Annotation Scheme (Tokenization, Lemmatization, Morphology)

The Czech National Corpus uses in its synchronous written corpora (starting with SYN2020 and SYN_v9, followed with e.g. NET and ONLINE copora) as well as in its spoken corpora (Ortofon_v3) a unified annotation scheme for morphological tagging and lemmatization. The annotation standard includes tokenization (defining tokens in text), lemmatization (basic dictionary forms of tokens), and morphological tagging including specia…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:aranea?rev=1775647826&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-04-08T11:30:26+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>aranea</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:aranea?rev=1775647826&amp;do=diff</link>
        <description>Corpora Aranea

Aranea is a family of comparable web corpora prepared by Vladimír Benko. See more here.

Corpora available by now (December 2014)

	*  Araneum Anglicum Maius &amp; Minus 14.12
	*  Araneum Anglicum Asiaticum Maius &amp; Minus 14.10
	*  Araneum Finnicum Maius &amp; Minus 14.08</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:baltischebriefe?rev=1731065001&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-11-08T11:23:21+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>baltischebriefe</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:baltischebriefe?rev=1731065001&amp;do=diff</link>
        <description>Baltische Briefe

The “Baltische Briefe” (Baltic Letters) is a historical newspaper that served as an important platform for the Baltic German community, especially after their resettlement during and after the Second World War. The newspaper documented the experiences, culture and heritage of the Baltic Germans and provided a link to their ancestral homeland in Estonia, Latvia and Lithuania. It became an important medium for preserving cultural identity and sharing historical narratives within …</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:bmk?rev=1445626963&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T19:02:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>bmk</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:bmk?rev=1445626963&amp;do=diff</link>
        <description>Brno Spoken Corpus
     Number of positions (tokens)   596 009  Number of positions (tokens) without punctuation and other marks   500  460  Number of word forms (words)    39 615  Number of recordings of dialogues   250  Number of utterances   27 921</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:capek?rev=1576761262&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2019-12-19T13:14:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>capek</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:capek?rev=1576761262&amp;do=diff</link>
        <description>Corpora of texts by Karel Čapek

&#039;capek&#039; a &#039;capek_uplny&#039; are author corpora of texts written by Karel Čapek that have been created as the data source of Dictionary of Karel Čapek. &#039;capek&#039; corpus contains all texts that have been undoubtedly written by himself (i.e. with no co-authors and without possible influence of a partner or translation original), while &#039;capek_uplny&#039; corpus is a full collection of all the texts that Karel Čapek participated on (e.g. including texts co-authored by his brothe…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:cep?rev=1576491883&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2019-12-16T10:24:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>cep</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:cep?rev=1576491883&amp;do=diff</link>
        <description>Cep corpus

&#039;cep&#039; is an author corpus of prosaic texts written by Jan Čep that has been created by Richard Změlík as the data source for his monograph Kvantitativně-korpusová analýza a literární věda. Please note that there is also a separate web where some quantitative data have been published.

The corpus is lemmatized and morphologically tagged, the annotation corresponds to that of the monograph. For a detailed description and organization of the &#039;cep&#039; corpus, please refer to pages 80…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:citace?rev=1697210155&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-10-13T15:15:55+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>citace</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:citace?rev=1697210155&amp;do=diff</link>
        <description>How to cite corpora accessed through the CNC

Ways of citing the corpora

There are essentially two ways in which corpora can be cited in an academic text:

	*  listing the corpus as a reference or source of data;
	*  listing a reference to the specific article which describes the creation and the structure of the corpus.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:codit?rev=1617020283&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-03-29T12:18:03+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>codit</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:codit?rev=1617020283&amp;do=diff</link>
        <description>CODIT corpus

Corpus diacronico dell’italiano -- ‘Diachronic corpus of Italian’



The CODIT corpus is a balanced diachronic corpus of written Italian of around 33 million tokens. The corpus has been compiled by Maria Silvia Micheli and it covers a period ranging from the earliest attestations of Italian language (i.e. the 13th century) to 1947. Its structure recalls that shown by the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:czesl-man?rev=1605623347&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-11-17T14:29:07+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>czesl-man</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:czesl-man?rev=1605623347&amp;do=diff</link>
        <description>CzeSL-man – a corpus of non-native Czech with manual error annotation in a simplified tiered scheme

CzeSL-man is the name used in the search interface KonText for CzeSL-man v1 searchable, a corpus including annotated texts of non-native speakers of Czech. It is part of the texts from the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:czesl-plain?rev=1533639143&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2018-08-07T10:52:23+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>czesl-plain</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:czesl-plain?rev=1533639143&amp;do=diff</link>
        <description>The CzeSL-plain corpus

The learner corpus CzeSL-plain (Czech as a Second Language, plain = without annotation) is one of the results of the project Innovation of Education in the Field of Czech as a Second Language, a part of the operational program</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt-basic?rev=1572547233&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2019-10-31T18:40:33+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>czesl-sgt-basic</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt-basic?rev=1572547233&amp;do=diff</link>
        <description>CzeSL-SGT-basic – a corpus of non-native Czech with simplified search options

The CzeSL-SGT-basic corpus is based on the CzeSL-SGT corpus (Czech as a Second Language with Spelling, Grammar and Tags), which includes transcriptions of essays written by non-native speakers of Czech, extending the “foreign” (ciz) part of the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt?rev=1605620641&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-11-17T13:44:01+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>czesl-sgt</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:czesl-sgt?rev=1605620641&amp;do=diff</link>
        <description>CzeSL-SGT – a corpus of non-native speakers’ Czech with automatic annotation

The CzeSL-SGT corpus (Czech as a Second Language with Spelling, Grammar and Tags) includes transcriptions of essays written by non-native speakers of Czech, extending the “foreign” (ciz) part of the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:dewac?rev=1445680154&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-24T09:49:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>dewac</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:dewac?rev=1445680154&amp;do=diff</link>
        <description>Corpus deWaC

deWaC is a 1.7 billion word corpus constructed from the Web limiting the crawl to the .de domain and using medium-frequency words from the SudDeutsche Zeitung corpus and basic German vocabulary lists as seeds. The corpus was POS-tagged and lemmatized with the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:diakorp?rev=1706800467&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-02-01T15:14:27+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>diakorp</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:diakorp?rev=1706800467&amp;do=diff</link>
        <description>Diakorp

Diakorp represents the diachronic section of the Czech National Corpus and aims to cover the texts of a total of seven centuries of the Czech language development. The first completed version (approximately 700 000 word forms) of the corpus was made accessible to the public in September 2005. Making the data public after the processing phase continues at a pace of about 250 000 word forms yearly.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:dialekt?rev=1641394903&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-01-05T15:01:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>dialekt</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:dialekt?rev=1641394903&amp;do=diff</link>
        <description>Dialekt_dial•v2 Dialekt_ort•v2 Number of positions (tokens)   310 200  298 539 Number of positions (tokens) without punctuation and other symbols   223 281  223 327 Number of  word forms (words)   33 715  25 360 Number of recordings   972 Number of utterances   43 628 Number of speakers   291 Length of recordings (hh:mm:ss.ms)</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:dotko?rev=1695813968&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-09-27T11:26:08+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>dotko</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:dotko?rev=1695813968&amp;do=diff</link>
        <description>Corpus of Lower Sorbian

DOTKO v2 (DOlnoserbski Tekstowy KOrpus) is an extended version of the diachronic corpus of Lower Sorbian prepared by the Cottbus-Chóśebuz branch of the Sorbian Institute. It includes the largest part of historical Lower Sorbian prints from the beginning of the 18th century until the complete ban on the public use of Sorbian in 1937. The oldest text in the corpus at this time dates from 1706, the most recent from 1936. A substantial part of it consists of the texts of Bra…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:eebo?rev=1748435803&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-05-28T12:36:43+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>eebo</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:eebo?rev=1748435803&amp;do=diff</link>
        <description>EEBO (Early English Books Online)

The EEBO version 1 corpus contains more than 25 000 English texts from the period 1475--1700, which were digitalized by the Text Creation Partnership organization during Phase 1 of the Early English Books Online project; a detailed description of the digitalization process is available</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:etalon?rev=1622654065&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-06-02T17:14:25+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>etalon</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:etalon?rev=1622654065&amp;do=diff</link>
        <description>Corpus Etalon: manually annotated corpus of Czech texts

The Etalon corpus is a synchronic morphologically annotated corpus of written Czech. The morphological tagging was performed manually, following the same principles as in SYN2020.
     Position  Number of tokens</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:fictree?rev=1513621472&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2017-12-18T18:24:32+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>fictree</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:fictree?rev=1513621472&amp;do=diff</link>
        <description>FicTree: a manually annotated treebank of Czech fiction

The FicTree treebank is a syntactically annotated corpus of Czech fiction. It consists of 135,000 words (166,000 tokens).  The lemmatization, the morphological and syntactic annotation were performed manually.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:frwac?rev=1445680414&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-24T09:53:34+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>frwac</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:frwac?rev=1445680414&amp;do=diff</link>
        <description>Corpus frWaC

frWaC is a 1.6 billion word corpus constructed from the Web limiting the crawl to the .fr domain and using medium-frequency words from the Le Monde Diplomatique corpus and basic French vocabulary lists as seeds. The corpus was POS-tagged and lemmatized with the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:fsc2000?rev=1445539282&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-22T18:41:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>fsc2000</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:fsc2000?rev=1445539282&amp;do=diff</link>
        <description>Corpus FSC2000

The FSC2000 Corpus is a reference source and a complement to the Frequency Dictionary of Czech (FSČ), which was published at the end of 2004 by NLN. The FSC2000 Corpus is based on the SYN2000 corpus and its development is described in Czech</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:hotko?rev=1614890864&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-03-04T20:47:44+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>hotko</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:hotko?rev=1614890864&amp;do=diff</link>
        <description>Corpus of Upper Sorbian

HOTKO (HOrnjoserbski Tekstowy KOrpus) is a corpus of Upper Sorbian, being built at the Sorbian Institute in Bautzen (Budyšin). It consists of journalistic, fiction, religious and scientific texts from half of the 19th century until today. The most part of the corpus consists of journalistic texts (57 %) and fiction (23 %), a number of dictionaries are included as well (12 %). Regarding the time periods covered, more than half of the texts come from recent period after th…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:intercorp?rev=1727772179&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-10-01T08:42:59+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>intercorp</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:intercorp?rev=1727772179&amp;do=diff</link>
        <description>InterCorp

InterCorp is a large parallel synchronic corpus covering a number of languages. The corpus is compiled mostly by teachers and students of the Faculty of Arts, Charles University in Prague, and by other collaborators of the ICNC. It serves as a source of data for theoretical studies, lexicography, student research, (foreign) language learning, computer applications, translators and also for the general public.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:itwac?rev=1445680607&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-24T09:56:47+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>itwac</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:itwac?rev=1445680607&amp;do=diff</link>
        <description>Corpus itWaC

itWaC is a 2 billion word corpus constructed from the Web limiting the crawl to the .it domain and using medium-frequency words from the Repubblica corpus and basic Italian vocabulary lists as seeds. The corpus was POS-tagged with the TreeTagger using this</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:jazyky-v-migraci?rev=1688996030&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-07-10T13:33:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>jazyky-v-migraci</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:jazyky-v-migraci?rev=1688996030&amp;do=diff</link>
        <description>Languages in Migration

Corpus description

The Languages in Migration corpus is a record of the spontaneous language production of speakers using informal spoken Czech and German. The speakers interviewed in 2018, 2019 and 2020 look back on their language biographies in the Czechoslovakia, particularly in its Czech-speaking part, and in the Federal Republic of Germany in their narratives. The part of the interview that relates to Czechoslovakia is conducted in German in order to elicit morphosy…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:jerome?rev=1445546471&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-22T20:41:11+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>jerome</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:jerome?rev=1445546471&amp;do=diff</link>
        <description>Corpus Jerome

Corpus JEROME is a monolingual comparable corpus specifically designed for analyzing translated Czech. It comprises more than 85 million tokens (including punctuation) and includes both fiction and professional literature. As a comparable corpus, it contains, in equal amounts, both translated and non-translated Czech (however, not original in the sense of source texts!). The non-translated part serves as a reference corpus.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:kh-dopisy?rev=1623929100&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-06-17T11:25:00+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>kh-dopisy</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:kh-dopisy?rev=1623929100&amp;do=diff</link>
        <description>Karel Havlíček’s Correspondence Corpus

Karel Havlíček’s Correspondence Corpus (KH-DOPISY) contains all letters related to Czech writer and journalist Karel Havlíček (1821—1856), i.e. either written by or addressed to him, including those sent to the newspaper offices lead by him. The letters are written in various languages (mostly Czech, often German, seldom Polish, French, Russian, etc.).</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:kh-noviny?rev=1623929054&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-06-17T11:24:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>kh-noviny</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:kh-noviny?rev=1623929054&amp;do=diff</link>
        <description>Karel Havlíček’s Journalism Corpus

Karel Havlíček’s Journalism Corpus (KH-NOVINY) contains all journalistic text written by Karel Havlíček (1821—1856) and published in his periodicals Pražské noviny (Prague Newspaper, 1846—1848), including its supplement Česká včela (The Czech Bee), and Národní noviny (National Newspaper, 1848—1850). The activities of Karel Havlíček, the founder of modern Czech journalism, document the history of substantial political and social changes in the era when the pres…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:klasifikace_textu_syn2015?rev=1768559385&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-16T10:29:45+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>klasifikace_textu_syn2015</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:klasifikace_textu_syn2015?rev=1768559385&amp;do=diff</link>
        <description>Overview of text classification in SYN2015

Texts in the SYN2015 corpus are divided into three main groups (txtype_group):

	*  FIC: fiction
	*  NFC: non-fiction
	*  NMG: newspapers and magazines

Each of these groups makes up one third of all texts in the corpus.

1. Fiction</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:klaus?rev=1732194762&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-11-21T13:12:42+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>klaus</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:klaus?rev=1732194762&amp;do=diff</link>
        <description>Václav Klaus Corpus

Václav Klaus Corpus (&#039;VK&#039;) is an author corpus of texts by Václav Klaus which was created as a data basis for the thesis Václav Klaus’ Idiolect: A Corpus-based Analysis. The data used for the creation of the corpus were sourced from his official website, which contains texts intended primarily for this website, as well as texts originally published elsewhere (e.g. newspaper articles or magazine interviews) or created for specific events (e.g., presidential speeches or lectur…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:koditex?rev=1541085330&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2018-11-01T15:15:30+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>koditex</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:koditex?rev=1541085330&amp;do=diff</link>
        <description>The Koditex Corpus

Koditex is a synchronic, representative and reference 9-million-word corpus (excl. punctuation) compiled for the purpose of conducting a multidimensional analysis (MDA) of Czech.
     Positions  Number of positions (tokens)   10,880,550</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:ksk-dopisy?rev=1677071601&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-02-22T13:13:21+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ksk-dopisy</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:ksk-dopisy?rev=1677071601&amp;do=diff</link>
        <description>Private Correspondence Corpus
 Name  KSK-dopisy  Number of letters    2000  Number of positions (tokens)    942 573  Number of positions (tokens) without punctuation and other marks   764 918  Number of word forms (words)   76 587  Letters from    1990</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:ksp?rev=1761118776&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-10-22T07:39:36+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ksp</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:ksp?rev=1761118776&amp;do=diff</link>
        <description>Corpus of Contemporary Czech Poetry (C3P)

C3P is a joint project of the Institute of Czech Literature of CAS and the Institute of the Czech National Corpus, dating back to 2015. As the name suggests, it is a corpus of contemporary Czech poetry texts (delimited by the years 1990 and 2020), i.e. a representative sample of Czech poetry over the last three decades. Significantly, this sample includes not only texts officially published in poetry books, and thus having gone through the standard edit…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:lemtag_mluv?rev=1749209865&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-06-06T11:37:45+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>lemtag_mluv</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:lemtag_mluv?rev=1749209865&amp;do=diff</link>
        <description>Lemmatization and tagging in spoken corpora

Lemmatizing and tagging a transcription of spoken language is much more demanding than for written language. There is a larger amount of unknown forms (reduced pronunciation, dialectal forms, neologisms), which can be homonymous with forms contained in the morphological dictionary for written language (e.g.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:lestrepublicain?rev=1722756022&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-08-04T07:20:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>lestrepublicain</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:lestrepublicain?rev=1722756022&amp;do=diff</link>
        <description>Corpus lEstRepublicain

Corpus consists of 3 volumes (1999, 2002, 2003; not all of them complete) of French regional newspaper L&#039;Est Républicain. After the deduplication it contains almost 73 million words in version 2 (v1 had almost 120 million words) and it was built from</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:lindsei_cz?rev=1493299596&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2017-04-27T13:26:36+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>lindsei_cz</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:lindsei_cz?rev=1493299596&amp;do=diff</link>
        <description>LINDSEI_CZ

Learner corpus of spontaneous spoken English by advanced speakers, whose L1 is Czech.

History and present situation

The learner corpus LINDSEI_CZ was created as part of the international LINDSEI project, organized by the Centre for English Corpus Linguistics at Université catholique de Louvain). LINDSEI supplements the written learner corpus, the International Corpus of Learner English (</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:link?rev=1445678935&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-24T09:28:55+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>link</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:link?rev=1445678935&amp;do=diff</link>
        <description>Corpus LINK

LINK (originally LIngvistův Narozeninový Korpus, i.e. Linguist’s Birthday Corpus, created on the occasion of Professor František Čermák’s birthday) is a corpus comprising exclusively linguistic texts. It is thus designed especially for the research of academic language specifics (study of terminology, the language of linguistics etc.).</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:mda?rev=1598868062&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-08-31T10:01:02+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>mda</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:mda?rev=1598868062&amp;do=diff</link>
        <description>Multidimensional analysis of Czech

Multidimensional analysis (MDA) is a method developed by Douglas Biber 
for the empirical research of textual variability. MDA is based on the assumption that textual variability is manifested in the utilization of linguistic features from different levels (from phonology and morphology through lexicon to syntax and pragmatics). When designing a text, the use of one set of features is often conditioned or supported by the use of another, which leads to the ass…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:net?rev=1613379524&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-02-15T08:58:44+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>net</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:net?rev=1613379524&amp;do=diff</link>
        <description>NET Corpus
       Positions  Number of positions (tokens)   51 733 873   176 365 880  Number of word forms   1 245 717   2 637 707  Number of lemmas   750 650   1 744 001  Structures  Number of documents &lt;doc&gt;   1 279   12 738  Number of texts &lt;text&gt;   267 026   1 817 088  Number of paragraphs &lt;p&gt;   267 026   1 817 088</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:nkjp?rev=1542035375&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2018-11-12T15:09:35+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>nkjp</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:nkjp?rev=1542035375&amp;do=diff</link>
        <description>The NKJP_1M corpus

The NKJP_1M corpus is a manually annotated one million word subcorpus of the  National Corpus of Polish (NKJP – Narodowy Korpus Języka Polskiego), composed of various text samples (see below). It is a corpus of contemporary Polish with texts published after the year 1945; it contains written, spoken and web communication. The corpus features lemmatisation, morphological annotation, and representative coverage of text categories.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:obc?rev=1612967951&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-02-10T14:39:11+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>obc</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:obc?rev=1612967951&amp;do=diff</link>
        <description>OBC: The Old Bailey Corpus 2.0

The Old Bailey Corpus is a sociolinguistically, pragmatically and textually annotated corpus based on a selection of the Proceedings of Old Bailey. It consists of 637 texts recording trial proceedings which took place between 1720 and 1913 at Old Bailey, London. There are more than 24 million words in the corpus - its overall size is over 35 million tokens (including words, punctuation, etc.). More detailed information about the corpus is available</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:online?rev=1671719107&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-12-22T14:25:07+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>online</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:online?rev=1671719107&amp;do=diff</link>
        <description>ONLINE corpora

ONLINE corpora together create a monitor corpus of the dynamic content of the Czech web, i.e. predominantly internet journalism, to some extent also  discussions, forums and social networks. The span of the corpus is since 2017 till the present.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:onomos?rev=1752589264&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2025-07-15T14:21:04+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>onomos</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:onomos?rev=1752589264&amp;do=diff</link>
        <description>The OnomOs v1 a OnomOs v2 corpora

The OnomOs v1 and OnomOs v2 corpora are linguistically processed databases of texts from the periodicals Rudé právo (published 1920–1995) and Právo (1995–present). The OnomOs v1 corpus includes one randomly selected issue from each decade in which Rudé právo was published. The composition of the corpus is detailed in Figure 1; the corpus contains a total of 255,149 tokens.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:oral?rev=1700480137&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-11-20T11:35:37+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>oral</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:oral?rev=1700480137&amp;do=diff</link>
        <description>ORAL Corpus

The ORAL corpus is a corpus containing the transcribed recordings of predominantly informal conversations taking place between native speakers of Czech from all regions of the Czech Republic. The speakers knew each other very well (they were either friends or family members) and they were recorded in their natural environment. The recordings were made over the course of ten years, between 2002 and 2011. The corpus is not balanced, with the majority of the data originating from the B…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:oral2006?rev=1445622087&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T17:41:27+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>oral2006</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:oral2006?rev=1445622087&amp;do=diff</link>
        <description>Czech Spoken Corpus ORAL2006
     Number of positions (tokens)   1 312 282  Number of positions (tokens) without punctuation and other marks   1 000 798  Number of word forms (words)   64 495  Number of recordings of dialogues   221  Number of utterances</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:oral2008?rev=1445621759&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T17:35:59+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>oral2008</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:oral2008?rev=1445621759&amp;do=diff</link>
        <description>Corpus of Spoken Czech ORAL2008
     Number of positions (tokens)   1 349 536  Number of positions (tokens) without punctuation and other marks   1 000 097  Number of word forms (words)   65 778  Number of recordings of dialogues   297  Number of utterances</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:oral2013?rev=1512254138&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2017-12-02T22:35:38+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>oral2013</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:oral2013?rev=1512254138&amp;do=diff</link>
        <description>Corpus of informal spoken Czech ORAL2013
     Number of positions (tokens)   3 285 508  Number of positions (tokens) without punctuation and other marks   2 785 189  Number of word forms (words)   131 246  Number of recordings of dialogues   835  Number of utterances</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:orator?rev=1769165287&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-23T10:48:07+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>orator</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:orator?rev=1769165287&amp;do=diff</link>
        <description>Corpus of monologues: ORATOR

The ORATOR corpus contains monologues by native Czech speakers. The typical situations include a lecture, instruction, guided tour, welcome address, sermon etc. The speech is usually prepared and the speaker has to fit within the given time frame. To our knowledge, there is no corpus with this kind of data available for Czech.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:ortofon?rev=1769165234&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-23T10:47:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ortofon</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:ortofon?rev=1769165234&amp;do=diff</link>
        <description>Corpus of informal spoken Czech with multi-tier transcription: ORTOFON

The ORTOFON corpus captures spontaneous spoken language used in informal situations between speakers who know each other. It follows the ORAL series of informal spoken Czech corpora in its data collection design. The recordings are transcribed in two tiers - orthographic and phonetic. Together with the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:orwell?rev=1445607601&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T13:40:01+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>orwell</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:orwell?rev=1445607601&amp;do=diff</link>
        <description>Corpus ORWELL

This corpus was created as part of the EU Multext-East project and it is formed by the text of George Orwell&#039;s novel 1984 (from the English original translated by Eva Šimečková; Prague: Naše vojsko, 1991). The corpus contains c. 80 thousand words and 20 thousand punctuation marks, that is approximately 100 thousand of corpus positions and it is morphologically tagged. The relatively small size of this corpus allowed the hand-correction of mistakes, which were created during the au…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:parlcorp?rev=1622898345&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-06-05T13:05:45+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>parlcorp</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:parlcorp?rev=1622898345&amp;do=diff</link>
        <description>ParlCorp: Corpus of Czech Parliamentary Speeches

The ParlCorp  is a corpus of speeches delivered in the Lower Chamber of the Czech Parliament (Poslanecká Sněmovna). The core of the corpus is constituted by electronic transcripts of parliamentary debates, publicly available at</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:pmk?rev=1445625683&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T18:41:23+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>pmk</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:pmk?rev=1445625683&amp;do=diff</link>
        <description>Prague Spoken Corpus
     Number of positions (tokens)   819 267  Number of positions (tokens) without punctuation and other marks   674 992  Number of word forms (words)    49 089  Number of recordings of dialogues   304  Number of utterances   15 710</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:psalm77?rev=1673955732&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-01-17T11:42:12+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>psalm77</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:psalm77?rev=1673955732&amp;do=diff</link>
        <description>Psalm 77 Corpus

Psalm 77 translation and parallel corpus of Old Romanian

About the Project

The Psalm 77 corpus is the result of a pilot project carried out in autumn 2022 which aligns all sixteenth-century Romanian versions of psalm 77 as well as the Slavonic and the Greek texts of the same psalm. The corpus was compiled thanks to the technical support of the Institute of the Czech National Corpus (</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:registrova_klasifikace?rev=1598976370&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-09-01T16:06:10+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>registrova_klasifikace</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:registrova_klasifikace?rev=1598976370&amp;do=diff</link>
        <description>Classification of registers

The classification of registers in the CNC corpora serves to provide an insight into textual variability that is complementary to the txtype categories and genres. While these attributes capture a text-external perspective (how the text is conventionally perceived based on formal characteristics, e.g. novel, letter or scientific article), the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:schola2010?rev=1588872531&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-05-07T17:28:51+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>schola2010</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:schola2010?rev=1588872531&amp;do=diff</link>
        <description>School Lessons Corpus SCHOLA2010
     Number of positions (tokens)  1 046 600  Number of positions (tokens) without punctuation and other marks  828 038 or 792 764     Number of word forms (words)  64 329  Number of recordings of dialogues  204  Number of speaker turns</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:skript2012?rev=1474895558&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2016-09-26T13:12:38+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>skript2012</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:skript2012?rev=1474895558&amp;do=diff</link>
        <description>Corpus SKRIPT2012

Corpus SKRIPT2012 is a learner corpus aiming at representing the written language of Czech pupils and students at elementarxy and secondary schools. It consists of transcripts of student&#039;s written assignments which were produced during their language classes. It contains 708,668 positions.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:speeches?rev=1445618933&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-23T16:48:53+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>speeches</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:speeches?rev=1445618933&amp;do=diff</link>
        <description>Corpus SPEECHES
     Number of positions (tokens)   248 839  Number of positions (tokens) without punctuation and other marks   217 314  Number of word forms (words)   30 909  Number of lemmata   12 522  Number of speeches   151  Number of sentences</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn?rev=1769159264&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-23T09:07:44+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn?rev=1769159264&amp;do=diff</link>
        <description>SYN corpus

SYN is a non-reference corpus consisting of texts from all reference  synchronic written corpora of the SYN series published up until the given version of the SYN corpus (for example SYN version 3 from the year 2014 includes the corpora SYN2000, SYN2005, SYN2006PUB, SYN2009PUB, SYN2010 and SYN2013PUB, as can be seen in the following table) and which has been processed by the newest versions of the (</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2000?rev=1481456308&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2016-12-11T11:38:28+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2000</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2000?rev=1481456308&amp;do=diff</link>
        <description>Corpus SYN2000
     Positions  Number of positions (tokens)  120 908 724  Number of positions (tokens) without punctuation  100 061 381  Number of word forms (words)  1 763 813  Number of lemmas  891 713  Structural attributes  Number of documents (not opera)</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2005?rev=1481454710&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2016-12-11T11:11:50+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2005</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2005?rev=1481454710&amp;do=diff</link>
        <description>Corpus SYN2005

The SYN2005 corpus is a synchronic representative corpus of contemporary written Czech, containing 100 million words (tokens). This basic characteristic is identical with its predecessor, the SYN2000 corpus. There are, however, also many differences between these two corpora, which must be taken into consideration when comparing any data in the two corpora (see below), because the mere mechanical comparison of frequencies can lead to misleading conclusions when these circumstance…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2006pub?rev=1615890322&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-03-16T10:25:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2006pub</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2006pub?rev=1615890322&amp;do=diff</link>
        <description>Corpus SYN2006PUB

The SYN2006PUB is a synchronic corpus of written journalism of 300 million of words (tokens). It contains exclusively journalistic texts from November 1989 to the end of 2004, that is the time period covered by corpora SYN2000 and SYN2005. All three corpora are disjunctive as to the texts used, that is no text, which is part of one corpus, is included in the other two. Corpora</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2009pub?rev=1615890397&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-03-16T10:26:37+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2009pub</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2009pub?rev=1615890397&amp;do=diff</link>
        <description>Corpus SYN2009PUB

The SYN2009PUB is a synchronic corpus of written journalism, a sequel to SYN2006PUB. It contains exclusively journalistic texts from 1995 to 2007, the total size of the corpus is 700 million of words (tokens). All the SYN-series corpora are disjunctive as to the texts used, that is no text, which is part of one corpus, is included in the other two. Corpora</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2010?rev=1481470049&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2016-12-11T15:27:29+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2010</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2010?rev=1481470049&amp;do=diff</link>
        <description>Corpus SYN2010

SYN2010 is a synchronic representative corpus of written Czech comprising 100 million tokens. It is a sequel to the corpora SYN2000 and SYN2005 and together with them forms a series of synchronic representative corpora that cover three successive periods.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2013pub?rev=1615890599&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2021-03-16T10:29:59+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2013pub</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2013pub?rev=1615890599&amp;do=diff</link>
        <description>Corpus SYN2013PUB

The SYN2013PUB is a synchronic corpus of written journalism, a sequel to SYN2006PUB and SYN2009PUB. It contains exclusively journalistic texts from 2005 to 2009, 44 different titles, the total size of the corpus is 935 million of words (tokens). All the</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2015?rev=1598975168&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2020-09-01T15:46:08+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2015</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2015?rev=1598975168&amp;do=diff</link>
        <description>Corpus SYN2015

SYN2015 is a representative corpus of contemporary written Czech published in December 2015. SYN2015 is a sequel of the representative corpora of the SYN series (SYN2000, SYN2005, SYN2010), but at the same time, it reflects necessary methodological and technological enhancements outlined below.</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2020?rev=1654774618&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-06-09T11:36:58+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2020</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2020?rev=1654774618&amp;do=diff</link>
        <description>SYN2020 Corpus

The SYN2020 corpus is a synchronous representative and reference corpus of contemporary written Czech, containing 100 million text words, including punctuation (tokens). It is a sequel of the representative corpora of the SYN series (</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:syn2025?rev=1768816864&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-19T10:01:04+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>syn2025</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:syn2025?rev=1768816864&amp;do=diff</link>
        <description>SYN2025 Corpus

The SYN2025 corpus is a synchronous representative and reference corpus of contemporary written Czech, containing 100 million text words, including punctuation (tokens). It is a sequel of the representative corpora of the SYN series (</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:totalita?rev=1677160402&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2023-02-23T13:53:22+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>totalita</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:totalita?rev=1677160402&amp;do=diff</link>
        <description>Totalita: Corpus of totalitarian language

The Totalita corpus is a diachronic corpus of written Czech covering the period of the communist regime (1948--1989), which served as the material base for the Slovník komunistické totality (Dictionary of communist totalitarianism).</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:ukwac?rev=1445680813&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2015-10-24T10:00:13+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>ukwac</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:ukwac?rev=1445680813&amp;do=diff</link>
        <description>Corpus ukWaC

ukWaC is a 2 billion word corpus constructed from the Web limiting the crawl to the .uk domain and using medium-frequency words from the BNC as seeds. The corpus was POS-tagged and lemmatized with the TreeTagger. The tagset is available here, more information can be found in this</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:uvod?rev=1769159976&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2026-01-23T09:19:36+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>uvod</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:uvod?rev=1769159976&amp;do=diff</link>
        <description>Corpora of the Czech National Corpus project
   corpus  size (word count)   lemmas   morphological tags   released   characteristic features  General corpora  SYN (version 14)   5.5G   ✓    ✓    2010–2025   versioned corpus, unification of all the SYN-series synchronic written corpora</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:veda?rev=1708890014&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2024-02-25T19:40:14+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>veda</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:veda?rev=1708890014&amp;do=diff</link>
        <description>Corpus of Academic Czech

The Corpus of Academic Czechs is a complement to Phrase Bank of Academic Czech and includes only Czech-language untranslated texts published after 2010 in scientific journals indexed in the Web of Science or Scopus, or, in some cases, EBSCO databases. Another criterion is the genre of the text: only studies and review articles are included in the corpus, not, for example, reviews or conference reports. In most cases, the texts are in the final editing stage, i.e. they h…</description>
    </item>
    <item rdf:about="http://wiki.korpus.cz/doku.php/en:cnk:vespa_cz?rev=1671651721&amp;do=diff">
        <dc:format>text/html</dc:format>
        <dc:date>2022-12-21T19:42:01+00:00</dc:date>
        <dc:creator>Anonymous (anonymous@undisclosed.example.com)</dc:creator>
        <title>vespa_cz</title>
        <link>http://wiki.korpus.cz/doku.php/en:cnk:vespa_cz?rev=1671651721&amp;do=diff</link>
        <description>VESPA_CZ

Learner corpus of written academic English by advanced L2 English university students, whose L1 is Czech.

The learner corpus VESPA_CZ was created as part of the international project VESPA (The Varieties of English for Specific Purposes dAtabase
), organized by the Centre for English Corpus Linguistics, Université catholique de Louvain. The aim of the project, which was initiated in 2008, is to build a database of English academic writing by L2 English university students from various…</description>
    </item>
</rdf:RDF>
