@inproceedings{fbc44297614640f4947a2f5c2ac9769b,
title = "PVAE-TTS: ADAPTIVE TEXT-TO-SPEECH VIA PROGRESSIVE STYLE ADAPTATION",
abstract = "Adaptive text-to-speech (TTS) has attracted increasing interests for the purpose of training TTS systems without tons of high quality data. Nevertheless, existing adaptive TTS systems still show low adaptation quality for novel speakers, since it is hard to learn an extensive speaking style with limited data. To address this issue, we propose progressive variational autoencoder (PVAE) which generates data with adapting to style gradually. PVAE learns a progressively style-normalized representation, which is a key component of progressive style adaptation. We extend PVAE to PVAE-TTS, a multi-speaker adaptive TTS model which generates natural speech with high adaptation quality for novel speakers. To further improve the adaptation quality, we also propose dynamic style layer normalization (DSLN) which utilizes a convolution operation. The experimental results demonstrate the superiority of PVAE-TTS in terms of both subjective and objective evaluations.",
keywords = "adaptive TTS, speaker adaptation, speech synthesis, text-to-speech",
author = "Lee, {Ji Hyun} and Lee, {Sang Hoon} and Kim, {Ji Hoon} and Lee, {Seong Whan}",
note = "Funding Information: This work was supported by Institute of Information & communications Technology Planning & Evaluation (IITP) grant funded by the Korea government (MSIT) (No. 2019-0-00079 , Artificial Intelligence Graduate School Program (Korea University)) and Netmarble AI Center Publisher Copyright: {\textcopyright} 2022 IEEE; 47th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2022 ; Conference date: 23-05-2022 Through 27-05-2022",
year = "2022",
doi = "10.1109/ICASSP43922.2022.9747388",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6312--6316",
booktitle = "2022 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2022 - Proceedings",
}