Title |
Corpus Aligner (CorAl) Evaluation on English-Croatian Parallel Corpora |
Authors |
Sanja Seljan, Marko Tadić, Željko Agić, Jan Šnajder, Bojana Dalbelo Bašić and Vjekoslav Osmann |
Abstract |
An increasing demand for new language resources of recent EU members andaccessing countries has in turn initiated the developmentof different language tools and resources, such as alignment tools andcorresponding translation memories for new languages pairs.The primary goal of this paper is to provide a description of a free sentencealignment tool CorAl (Corpus Aligner), developed at theFaculty of Electrical Engineering and Computing, University of Zagreb. The toolperforms paragraph alignment at the first step of thealignment process, which is followed by sentence alignment. Description of thetool is followed by its evaluation. The paper describesan experiment with applying the CorAl aligner to a English-Croatian parallelcorpus of legislative domain using metrics of precision,recall and F1-measure. Results are discussed and the concluding sectionsdiscuss future directions of CorAl development. |
Language |
Tools, systems, applications |
Topics |
Evaluation methodologies, Corpus (creation, annotation, etc.), Tools, systems, applications |
Full paper  |
Corpus Aligner (CorAl) Evaluation on English-Croatian Parallel Corpora |
Bibtex |
@InProceedings{SELJAN10.599,
author = {Sanja Seljan, Marko Tadić, Željko Agić, Jan Šnajder, Bojana Dalbelo Bašić and Vjekoslav Osmann}, title = {Corpus Aligner (CorAl) Evaluation on English-Croatian Parallel Corpora}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, editor = {Nicoletta Calzolari (Conference Chair), Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odjik, Stelios Piperidis, Mike Rosner, Daniel Tapias}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english} } |