@inproceedings{193be701ddfe4f96b5642f1015a84c35,
title = "Dynamic speech emotion recognition with state-space models",
abstract = "Automatic emotion recognition from speech has been focused mainly on identifying categorical or static affect states, but the spectrum of human emotion is continuous and time-varying. In this paper, we present a recognition system for dynamic speech emotion based on state-space models (SSMs). The prediction of the unknown emotion trajectory in the affect space spanned by Arousal, Valence, and Dominance (A-V-D) descriptors is cast as a time series filtering task. The state space models we investigated include a standard linear model (Kalman filter) as well as novel non-linear, non-parametric Gaussian Processes (GP) based SSM. We use the AVEC 2014 database for evaluation, which provides ground truth A-V-D labels which allows state and measurement functions to be learned separately simplifying the model training. For the filtering with GP SSM, we used two approximation methods: a recently proposed analytic method and Particle filter. All models were evaluated in terms of average Pearson correlation R and root mean square error (RMSE). The results show that using the same feature vectors, the GP SSMs achieve twice higher correlation and twice smaller RMSE than a Kalman filter.",
keywords = "Affect recognition, Emotion recognition, Gaussian Process state-space model, Kalman filter",
author = "Konstantin Markov and Tomoko Matsui and Francois Septier and Gareth Peters",
year = "2015",
month = dec,
day = "28",
doi = "10.1109/EUSIPCO.2015.7362750",
language = "English",
series = "European Signal Processing Conference",
publisher = "IEEE",
pages = "2077--2081",
booktitle = "2015 23rd European Signal Processing Conference (EUSIPCO)",
address = "United States",
note = "23rd European Signal Processing Conference 2015, EUSIPCO 2015 ; Conference date: 31-08-2015 Through 04-09-2015",
}