@phdthesis{Huestegge2019, author = {Huestegge, Sujata Maya}, title = {Cognitive mechanisms of voice processing}, doi = {10.25972/OPUS-18608}, url = {http://nbn-resolving.de/urn:nbn:de:bvb:20-opus-186086}, school = {Universit{\"a}t W{\"u}rzburg}, year = {2019}, abstract = {The present thesis addresses cognitive processing of voice information. Based on general theoretical concepts regarding mental processes it will differentiate between modular, abstract information processing approaches to cognition and interactive, embodied ideas of mental processing. These general concepts will then be transferred to the context of processing voice-related information in the context of parallel face-related processing streams. One central issue here is whether and to what extent cognitive voice processing can occur independently, that is, encapsulated from the simultaneous processing of visual person-related information (and vice versa). In Study 1 (Huestegge \& Raettig, in press), participants are presented with audio-visual stimuli displaying faces uttering digits. Audiovisual gender congruency was manipulated: There were male and female faces, each uttering digits with either a male or female voice (all stimuli were AV- synchronized). Participants were asked to categorize the gender of either the face or the voice by pressing one of two keys in each trial. A central result was that audio-visual gender congruency affected performance: Incongruent stimuli were categorized slower and more error-prone, suggesting a strong cross-modal interaction of the underlying visual and auditory processing routes. Additionally, the effect of incongruent visual information on auditory classification was stronger than the effect of incongruent auditory information on visual categorization, suggesting visual dominance over auditory processing in the context of gender classification. A gender congruency effect was also present under high cognitive load. Study 2 (Huestegge, Raettig, \& Huestegge, in press) utilized the same (gender-congruent and -incongruent) stimuli, but different tasks for the participants, namely categorizing the spoken digits (into odd/even or smaller/larger than 5). This should effectively direct attention away from gender information, which was no longer task-relevant. Nevertheless, congruency effects were still observed in this study. This suggests a relatively automatic processing of cross-modal gender information, which eventually affects basic speech-based information processing. Study 3 (Huestegge, subm.) focused on the ability of participants to match unfamiliar voices to (either static or dynamic) faces. One result was that participants were indeed able to match voices to faces. Moreover, there was no evidence for any performance increase when dynamic (vs. mere static) faces had to be matched to concurrent voices. The results support the idea that common person-related source information affects both vocal and facial features, and implicit corresponding knowledge appears to be used by participants to successfully complete face-voice matching. Taken together, the three studies (Huestegge, subm.; Huestegge \& Raettig, in press; Huestegge et al., in press) provided information to further develop current theories of voice processing (in the context of face processing). On a general level, the results of all three studies are not in line with an abstract, modular view of cognition, but rather lend further support to interactive, embodied accounts of mental processing.}, subject = {Stimme}, language = {en} }