{"$schema":"https://json-schema.org/draft/2020-12/schema","additionalProperties":false,"properties":{"emotion":{"description":"Emotion control for synthesized speech","enum":["happy","sad","angry","fearful","disgusted","surprised","calm","fluent"],"type":"string"},"format":{"default":"mp3","description":"Output audio format","enum":["mp3","flac","wav"],"type":"string"},"pitch":{"default":0,"description":"Pitch adjustment (-12 to 12)","maximum":12,"minimum":-12,"type":"integer"},"sample_rate":{"anyOf":[{"const":8000,"type":"number"},{"const":16000,"type":"number"},{"const":22050,"type":"number"},{"const":24000,"type":"number"},{"const":32000,"type":"number"},{"const":44100,"type":"number"}],"description":"Audio sample rate"},"speed":{"default":1,"description":"Speech speed (0.5 to 2)","maximum":2,"minimum":0.5,"type":"number"},"text":{"description":"The text to convert to speech. Maximum 10,000 characters.","maxLength":10000,"type":"string"},"voice_id":{"default":"English_expressive_narrator","description":"The voice ID to use for synthesis","type":"string"},"volume":{"default":1,"description":"Speech volume (0 to 10)","maximum":10,"minimum":0,"type":"number"}},"required":["text","voice_id","speed","volume","pitch","format"],"type":"object"}