Title |
Towards Semi-Automated Annotation for Prepositional Phrase Attachment |
Authors |
Sara Rosenthal, William Lipovsky, Kathleen McKeown, Kapil Thadani and Jacob Andreas |
Abstract |
This paper investigates whether high-quality annotations for tasks involvingsemantic disambiguation can be obtained without a major investment in time orexpense. We examine the use of untrained human volunteers from AmazonsMechanical Turk in disambiguating prepositional phrase (PP) attachment oversentences drawn from the Wall Street Journal corpus. Our goal is to compare theperformance of these crowdsourced judgments to the annotations supplied bytrained linguists for the Penn Treebank project in order to indicate theviability of this approach for annotation projects that involve contextualdisambiguation. The results of our experiments on a sample of the Wall StreetJournal corpus show that invoking majority agreement between multiple humanworkers can yield PP attachments with fairly high precision. This confirms thata crowdsourcing approach to syntactic annotation holds promise for thegeneration of training corpora in new domains and genres where high-qualityannotations are not available and difficult to obtain. |
Language |
Evaluation methodologies |
Topics |
Corpus (creation, annotation, etc.), Grammar and Syntax, Evaluation methodologies |
Full paper  |
Towards Semi-Automated Annotation for Prepositional Phrase Attachment |
Bibtex |
@InProceedings{ROSENTHAL10.828,
author = {Sara Rosenthal, William Lipovsky, Kathleen McKeown, Kapil Thadani and Jacob Andreas}, title = {Towards Semi-Automated Annotation for Prepositional Phrase Attachment}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odjik, Stelios Piperidis, Mike Rosner, Daniel Tapias}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english} } |