@inbook{326065ac221c4cbdba476fa28ce7b785,
title = "Extrinsic versus intrinsic evaluation of natural language generation for spoken dialogue systems and social robotics",
abstract = "In the past 10 years, very few published studies include some kind of extrinsic evaluation of an NLG component in an end-to-end-system, be it for phone or mobile-based dialogues or social robotic interaction. This may be attributed to the fact that these types of evaluations are very costly to set-up and run for a single component. The question therefore arises whether there is anything to be gained over and above intrinsic quality measures obtained in off-line experiments? In this article, we describe a case study of evaluating two variants of an NLG surface realiser and show that there are significant differences in both extrinsic measures and intrinsic measures. These differences can be used to inform further iterations of component and system development.",
keywords = "Evaluation, Natural language generation, Spoken dialogue systems",
author = "Helen Hastie and Heriberto Cuay{\'a}huitl and Nina Dethlefs and Simon Keizer and Xingkun Liu",
year = "2016",
month = dec,
day = "25",
doi = "10.1007/978-981-10-2585-3_24",
language = "English",
isbn = "9789811025846",
volume = "Part V",
series = "Lecture Notes in Electrical Engineering",
publisher = "Springer",
pages = "303--311",
editor = "Kristiina Jokinen and Graham Wilcock",
booktitle = "Dialogues with Social Robots",
note = "7th International Workshop on Spoken Dialogue Systems 2016, IWSDS 2016 ; Conference date: 13-01-2016 Through 16-01-2016",
}