tts.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. package yask
  2. import (
  3. "fmt"
  4. "io"
  5. "net/http"
  6. "net/url"
  7. "strconv"
  8. "strings"
  9. )
  10. // TTSConfig is config for text to speeh method
  11. type TTSConfig struct {
  12. Text string
  13. SSML string
  14. Lang string
  15. Voice string
  16. Emotion string
  17. Speed float32
  18. Format string
  19. Rate int
  20. YaFolderID string
  21. YaAPIKey string
  22. }
  23. // ssml content checker
  24. func (s *TTSConfig) isSSML() bool {
  25. return len(s.SSML) > 0
  26. }
  27. // returns TTSConfig with default parameters
  28. func defaultTTSConfig(yaFolderID, yaAPIKey string) *TTSConfig {
  29. return &TTSConfig{
  30. Lang: LangRU,
  31. Voice: VoiceOksana,
  32. Emotion: EmotionNeutral,
  33. Speed: SpeedStandard,
  34. Format: FormatLPCM,
  35. Rate: Rate8k,
  36. YaFolderID: yaFolderID,
  37. YaAPIKey: yaAPIKey,
  38. }
  39. }
  40. // TTSDefaultConfigText returns config with default parameters for raw text recognition and use in TextToSpeech method
  41. func TTSDefaultConfigText(yaFolderID, yaAPIKey, text string) *TTSConfig {
  42. conf := defaultTTSConfig(yaFolderID, yaAPIKey)
  43. conf.Text = text
  44. return conf
  45. }
  46. // TTSDefaultConfigSSML returns config with default parameters for raw text recognition and use in TextToSpeech method
  47. // more details of SSML language in https://cloud.yandex.ru/docs/speechkit/tts/ssml
  48. func TTSDefaultConfigSSML(yaFolderID, yaAPIKey, SSML string) *TTSConfig {
  49. conf := defaultTTSConfig(yaFolderID, yaAPIKey)
  50. conf.SSML = SSML
  51. return conf
  52. }
  53. // TextToSpeech returns PCM or OGG sound stream using the service Yandex Speech Kit.
  54. // Result PCM stream can be converted to Wav stream using EncodePCMToWav
  55. func TextToSpeech(config *TTSConfig) (io.ReadCloser, error) {
  56. httpForm := url.Values{
  57. "lang": []string{config.Lang},
  58. "voice": []string{config.Voice},
  59. "emotion": []string{config.Emotion},
  60. "speed": []string{strconv.FormatFloat(float64(config.Speed), 'f', 1, 32)},
  61. "format": []string{config.Format},
  62. "sampleRateHertz": []string{strconv.FormatInt(int64(config.Rate), 10)},
  63. "folderId": []string{config.YaFolderID},
  64. }
  65. // emotion deprecated - only for omazh and jane
  66. if config.Voice != "jane" {
  67. delete(httpForm, "emotion")
  68. }
  69. // ssml
  70. if config.isSSML() {
  71. httpForm.Set("ssml", config.SSML)
  72. } else {
  73. httpForm.Set("text", config.Text)
  74. }
  75. request, err := http.NewRequest("POST", YaTTSUrl, strings.NewReader(httpForm.Encode()))
  76. if err != nil {
  77. return nil, err
  78. }
  79. request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
  80. request.Header.Set("Authorization", fmt.Sprintf("Api-Key %v", config.YaAPIKey))
  81. client := new(http.Client)
  82. response, err := client.Do(request)
  83. if err != nil {
  84. return nil, err
  85. }
  86. if response.StatusCode != http.StatusOK {
  87. err = unmarshallYaError(response.Body)
  88. response.Body.Close()
  89. return nil, err
  90. }
  91. return response.Body, nil
  92. }