Standard

Harvard

APA

Vancouver

Author

BibTeX

@article{7c0aa47b05274d5e8e522a9ca80ba418,
title = "АННОТИРОВАНИЕ ПРАГМАТИЧЕСКИХ МАРКЕРОВ В РУССКОМ РЕЧЕВОМ КОРПУСЕ: ПРОБЛЕМЫ, ПОИСКИ, РЕШЕНИЯ И РЕЗУЛЬТАТЫ",
abstract = "The article describes the experience of pragmatic markers (PM) annotation in two Russian speech corpora: “One Speaker{\textquoteright}s Day” (ORD; dialogues) and “Balanced Annotated Textotec” (SAT; monologues). To prepare an optimal PM annotation scheme, 4 pilot annotations were conducted on samples from ORD and SAT. It made it possible to form the final list of PM: 450 units, representing variants of 53 basic structural types. Processing the results of the pilot annotation allowed to obtain preliminary data on frequency of individual pragmatic markers and their types, as well as on the dependence of PM usage on sex and the level of speech competence of the speaker. As a result of statistical data processing, frequency lists of both PMs and their functions were obtained. The most commonly used in the dialogue are the PM вот, which is usually used as a «boundary marker» (G), and the PM там, which is usually used as a hesitative and/or rhythm-forming marker. In the monologue, the upper zone of the frequency list of the PMs is also full of boundary markers (G), marking the beginning/end of the monologue or serving as navigators in the text (вот/ну вот, значит, так). The most frequent types of PMs in dialogue are: X (hesitative markers), M (meta-communicative marker), GХ (boundary/hesitative marker), K (xeno-indicator marker that introduces someone{\textquoteright}s speech), RX (rhythm-forming/hesitative marker). In the list of the most frequent types of PMs in monologue speech, the markers of the type GX (boundary/hesitative marker) and X (hesitative marker) are in the lead. The analysis of the frequency lists of PMs showed that we can talk about statistically significant differences in the use of PMs in dialogue and monologue.",
keywords = "Corpus annotation, Dialogue, Monologue, Pragmatic marker, Russian everyday speech, Speech corpus",
author = "Bogdanova-Beglarian, {N. V.} and Blinova, {O. V.} and Martynenko, {G. Ya} and Sherstinova, {T. Yu} and Zaides, {K. D.} and Popova, {T. I.}",
year = "2019",
month = jan,
day = "1",
language = "русский",
volume = "2019-May",
pages = "72--85",
journal = "Компьютерная лингвистика и интеллектуальные технологии",
issn = "2221-7932",
publisher = "Российский государственный гуманитарный университет",
number = "18",
note = "2019 Annual International Conference on Computational Linguistics and Intellectual Technologies, Dialogue 2019 ; Conference date: 29-05-2019 Through 01-06-2019",

}

RIS

TY - JOUR

T1 - АННОТИРОВАНИЕ ПРАГМАТИЧЕСКИХ МАРКЕРОВ В РУССКОМ РЕЧЕВОМ КОРПУСЕ

T2 - 2019 Annual International Conference on Computational Linguistics and Intellectual Technologies, Dialogue 2019

AU - Bogdanova-Beglarian, N. V.

AU - Blinova, O. V.

AU - Martynenko, G. Ya

AU - Sherstinova, T. Yu

AU - Zaides, K. D.

AU - Popova, T. I.

PY - 2019/1/1

Y1 - 2019/1/1

N2 - The article describes the experience of pragmatic markers (PM) annotation in two Russian speech corpora: “One Speaker’s Day” (ORD; dialogues) and “Balanced Annotated Textotec” (SAT; monologues). To prepare an optimal PM annotation scheme, 4 pilot annotations were conducted on samples from ORD and SAT. It made it possible to form the final list of PM: 450 units, representing variants of 53 basic structural types. Processing the results of the pilot annotation allowed to obtain preliminary data on frequency of individual pragmatic markers and their types, as well as on the dependence of PM usage on sex and the level of speech competence of the speaker. As a result of statistical data processing, frequency lists of both PMs and their functions were obtained. The most commonly used in the dialogue are the PM вот, which is usually used as a «boundary marker» (G), and the PM там, which is usually used as a hesitative and/or rhythm-forming marker. In the monologue, the upper zone of the frequency list of the PMs is also full of boundary markers (G), marking the beginning/end of the monologue or serving as navigators in the text (вот/ну вот, значит, так). The most frequent types of PMs in dialogue are: X (hesitative markers), M (meta-communicative marker), GХ (boundary/hesitative marker), K (xeno-indicator marker that introduces someone’s speech), RX (rhythm-forming/hesitative marker). In the list of the most frequent types of PMs in monologue speech, the markers of the type GX (boundary/hesitative marker) and X (hesitative marker) are in the lead. The analysis of the frequency lists of PMs showed that we can talk about statistically significant differences in the use of PMs in dialogue and monologue.

AB - The article describes the experience of pragmatic markers (PM) annotation in two Russian speech corpora: “One Speaker’s Day” (ORD; dialogues) and “Balanced Annotated Textotec” (SAT; monologues). To prepare an optimal PM annotation scheme, 4 pilot annotations were conducted on samples from ORD and SAT. It made it possible to form the final list of PM: 450 units, representing variants of 53 basic structural types. Processing the results of the pilot annotation allowed to obtain preliminary data on frequency of individual pragmatic markers and their types, as well as on the dependence of PM usage on sex and the level of speech competence of the speaker. As a result of statistical data processing, frequency lists of both PMs and their functions were obtained. The most commonly used in the dialogue are the PM вот, which is usually used as a «boundary marker» (G), and the PM там, which is usually used as a hesitative and/or rhythm-forming marker. In the monologue, the upper zone of the frequency list of the PMs is also full of boundary markers (G), marking the beginning/end of the monologue or serving as navigators in the text (вот/ну вот, значит, так). The most frequent types of PMs in dialogue are: X (hesitative markers), M (meta-communicative marker), GХ (boundary/hesitative marker), K (xeno-indicator marker that introduces someone’s speech), RX (rhythm-forming/hesitative marker). In the list of the most frequent types of PMs in monologue speech, the markers of the type GX (boundary/hesitative marker) and X (hesitative marker) are in the lead. The analysis of the frequency lists of PMs showed that we can talk about statistically significant differences in the use of PMs in dialogue and monologue.

KW - Corpus annotation

KW - Dialogue

KW - Monologue

KW - Pragmatic marker

KW - Russian everyday speech

KW - Speech corpus

UR - http://www.scopus.com/inward/record.url?scp=85071483218&partnerID=8YFLogxK

M3 - статья в журнале по материалам конференции

AN - SCOPUS:85071483218

VL - 2019-May

SP - 72

EP - 85

JO - Компьютерная лингвистика и интеллектуальные технологии

JF - Компьютерная лингвистика и интеллектуальные технологии

SN - 2221-7932

IS - 18

Y2 - 29 May 2019 through 1 June 2019

ER -

ID: 61379861