Vladimir V Maksimov 3 years ago
commit
4c9185e60f
13 changed files with 734 additions and 0 deletions
  1. 13 0
      .travis.yml
  2. 21 0
      LICENSE
  3. 112 0
      README.md
  4. 102 0
      README.ru.md
  5. 123 0
      const.go
  6. 9 0
      go.mod
  7. 9 0
      go.sum
  8. 90 0
      stt.go
  9. BIN
      test_data/test_sound.wav
  10. BIN
      test_data/tts.wav
  11. 105 0
      tts.go
  12. 74 0
      yask.go
  13. 76 0
      z_test.go

+ 13 - 0
.travis.yml

@@ -0,0 +1,13 @@
+--- 
+dist: xenial
+env: 
+  - GO111MODULE=on GOPROXY=https://proxy.golang.org
+go: 
+  - 1.10.x
+  - 1.11.x
+  - 1.12.x
+  - 1.13.x
+language: go
+os: linux
+script: 
+  - "go test -cpu=1,2 -v -tags integration ./..."

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 fcg-xvii
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 112 - 0
README.md

@@ -0,0 +1,112 @@
+<p align="center">
+  <span>English</span> |
+  <a href="README.ru.md">Русский</a>
+</p>
+
+[![Build Status](https://travis-ci.org/fcg-xvii/yask.svg?branch=master)](https://travis-ci.org/fcg-xvii/yask)
+ [![Go Report Card](https://goreportcard.com/badge/github.com/fcg-xvii/yask)](https://goreportcard.com/report/github.com/fcg-xvii/yask)
+
+# yask
+> Tools for work with the synthesis and speech recognition service <b>Yandex Speech Kit</b> (more about in <a href="https://cloud.yandex.ru/docs/speechkit/" target="_blank">https://cloud.yandex.ru/docs/speechkit/</a>) for <b>golang</b> programming language. Used to synthesize speech from text and recognize text from a sound stream.
+
+Before start to use, you must register at <a href="https://cloud.yandex.ru/" target="_blank">https://cloud.yandex.ru/</a> to get the API key and directory identifier (more about <a href="https://cloud.yandex.ru/docs" target="_blank">https://cloud.yandex.ru/docs</a>).
+
+### Audio stream formats
+<ul>
+    <li><b>OGG</b> <a href="https://ru.wikipedia.org/wiki/Ogg" target="_blank">https://en.wikipedia.org/wiki/Ogg</a></li>
+    <li><b>PCM</b> <a href="https://en.wikipedia.org/wiki/Pulse-code_modulation" target="_blank">https://en.wikipedia.org/wiki/Pulse-code_modulation</a> (when recognizing text in the lpcm format parameter, a wav format stream can be used</li>
+</ul>
+
+### Speech synthesis from text
+> As a result of the example, get a file in wav format, ready for playback in any player program. The default bitrate is 8000.
+```golang
+import (
+	"log"
+	"os"
+
+	"github.com/fcg-xvii/go-tools/speech/yask"
+)
+
+func main() {
+	yaFolderID := "b1g..."    // yandex folder id
+	yaAPIKey := "AQVNy..."    // yandex api yandex
+	text := "Hi It's test of speech synthesis" // text for synthesis
+
+	// init config for synthesis (по умоланию установлен формат lpcm)
+	config := yask.TTSDefaultConfigText(yaFolderID, yaAPIKey, text)
+
+    // By default language in config russian. For english must setup 
+    // english language and voice
+    config.Lang = yask.LangEN
+    config.Voice = yask.VoiceNick
+
+
+	// speech synthesis
+	r, err := yask.TextToSpeech(config)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+    // open file for save result
+	f, err := os.OpenFile("tts.wav", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0655)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+	defer f.Close()
+
+    // lpcm encoding to wav format
+	if err := yask.EncodePCMToWav(r, f, config.Rate, 16, 1); err != nil {
+		log.Println(err)
+		return
+	}
+}
+```
+
+### Speech recognition to text
+> Example of recognition of short audio. The example uses a wav file that can be used with a configuration format value of <b>lpcm</b>
+
+```golang
+package main
+
+import (
+	"log"
+	"os"
+
+	"github.com/fcg-xvii/go-tools/speech/yask"
+)
+
+func main() {
+	yaFolderID := "b1g4..." // yandex folder id
+	yaAPIKey := "AQVNyr..." // yandex api key
+	dataFileName := "data.wav" // audio file in wav format for recodnition to text
+
+    // open audio file
+	f, err := os.Open(dataFileName)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+	defer f.Close()
+
+    // init config for recodnition
+	config := yask.STTConfigDefault(yaFolderID, yaAPIKey, f)
+
+    // setup english language
+    config.Lang = yask.LangEN
+
+    // recodnition speech to text
+	text, err := yask.SpeechToTextShort(config)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+	log.Println(text)
+}
+```
+
+## License
+
+The MIT License (MIT), see [LICENSE](LICENSE).

+ 102 - 0
README.ru.md

@@ -0,0 +1,102 @@
+<p align="center">
+  <span>Русский</span> |
+  <a href="README.md#go-tools">English</a>
+</p>
+
+[![Build Status](https://travis-ci.org/fcg-xvii/yask.svg?branch=master)](https://travis-ci.org/fcg-xvii/yask)
+ [![Go Report Card](https://goreportcard.com/badge/github.com/fcg-xvii/yask)](https://goreportcard.com/report/github.com/fcg-xvii/yask)
+
+# yask
+> Инструмент для работы с сервисом синтеза и распознавания речи <b>Yandex Speech Kit</b> (подробнее о сервисе <a href="https://cloud.yandex.ru/docs/speechkit/" target="_blank">https://cloud.yandex.ru/docs/speechkit/</a>) для языка программирования <b>golang</b>. Инструмент позволяет синтезировать речь из тескта, а так же распознавать текст из звукового потока.
+
+Перед началом работы необходимо зарегистрироваться на <a href="https://cloud.yandex.ru/" target="_blank">https://cloud.yandex.ru/</a> для получения API-ключа и идентификатора директирии (подробнее <a href="https://cloud.yandex.ru/docs" target="_blank">https://cloud.yandex.ru/docs</a>).
+
+### Форматы аудиопотока
+<ul>
+    <li><b>OGG</b> <a href="https://ru.wikipedia.org/wiki/Ogg" target="_blank">https://ru.wikipedia.org/wiki/Ogg</a></li>
+    <li><b>PCM</b> <a href="https://ru.wikipedia.org/wiki/Импульсно-кодовая_модуляция" target="_blank">https://ru.wikipedia.org/wiki/Импульсно-кодовая_модуляция</a> (при паспознавании текста в параметром формата lpcm, может быть использован поток формата wav</li>
+</ul>
+
+### Синтез речи из текста
+> В результате примера получим файл в формате wav, готовый для воспроизведения в любой программе-плеере. Битрейт по умолчанию 8000.
+```golang
+import (
+	"log"
+	"os"
+
+	"github.com/fcg-xvii/go-tools/speech/yask"
+)
+
+func main() {
+	yaFolderID := "b1g..."    // идентификатор директории в yandex
+	yaAPIKey := "AQVNy..."    // ключ api yandex
+	text := "Привет, это тест синтеза речи с помощью сервиса Яндекса" // текст для синтеза
+
+	// инициализация конфигурации для синтеза (по умоланию установлен формат lpcm)
+	config := yask.TTSDefaultConfigText(yaFolderID, yaAPIKey, text)
+
+	// синтез речи
+	r, err := yask.TextToSpeech(config)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+    // файл для сохранения результата
+	f, err := os.OpenFile("tts.wav", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0655)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+	defer f.Close()
+
+    // кодировка lpcm в wav формат
+	if err := yask.EncodePCMToWav(r, f, config.Rate, 16, 1); err != nil {
+		log.Println(err)
+		return
+	}
+}
+```
+
+### Распознавание речи в текст
+> Пример разпознавания коротких аудио. В примере используется файл в формате wav, который допускается в использовании со значением формата конфигурации <b>lpcm</b>
+```golang
+package main
+
+import (
+	"log"
+	"os"
+
+	"github.com/fcg-xvii/go-tools/speech/yask"
+)
+
+func main() {
+	yaFolderID := "b1g4..." // идентификатор директории в yandex
+	yaAPIKey := "AQVNyr..." // ключ api yandex
+	dataFileName := "data.wav" // файл в формате wav для распознавания
+
+    // открытие аудиофайла
+	f, err := os.Open(dataFileName)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+	defer f.Close()
+
+    // создание конфигурации распознавания
+	config := yask.STTConfigDefault(yaFolderID, yaAPIKey, f)
+
+    // Распознавание звука в текст
+	text, err := yask.SpeechToTextShort(config)
+	if err != nil {
+		log.Println(err)
+		return
+	}
+
+	log.Println(text)
+}
+```
+
+## Лицензия
+
+The MIT License (MIT), подробнее [LICENSE](LICENSE).

+ 123 - 0
const.go

@@ -0,0 +1,123 @@
+package yask
+
+// Voice is struct of voice object into
+type Voice struct {
+	NameEn  string `json:"name_en"`
+	MameRu  string `json:"name_ru"`
+	Voice   string `json:"voice"`
+	Lang    string `json:"lang"`
+	Male    bool   `json:"is_male"`
+	Premium bool   `json:"is_premium"`
+}
+
+const (
+	// YaSTTUrl is url for send speech to text requests
+	YaSTTUrl = "https://stt.api.cloud.yandex.net/speech/v1/stt:recognize"
+
+	// YaTTSUrl is url for send text to speech requests
+	YaTTSUrl = "https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize"
+
+	// Formats of audio
+
+	// FormatLPCM is PCM audio format (wav) without wav header (more details in https://en.wikipedia.org/wiki/Pulse-code_modulation)
+	FormatLPCM = "lpcm"
+	// FormatOgg is audio ogg format
+	FormatOgg = "oggopus"
+
+	// Sample rates
+
+	// Rate8k is rate of 8kHz
+	Rate8k int = 8000
+	// Rate16k is rate of 16kHz
+	Rate16k int = 16000
+	// Rate48k is rate of 48kHz
+	Rate48k int = 48000
+
+	// Languages
+
+	// LangRU is russian language
+	LangRU = "ru-Ru"
+	// LangEN is english language
+	LangEN = "en-US"
+	// LangTR is turkish language
+	LangTR = "tr-TR"
+
+	// Speed constants
+
+	// SpeedStandard is standart speed of voice (1.0)
+	SpeedStandard float32 = 1.0
+	// SpeedMostFastest is maximum speed voice (3.0)
+	SpeedMostFastest float32 = 3.0
+	// SpeedSlowest is minimum speed of voice (0.1)
+	SpeedSlowest float32 = 0.1
+
+	// Voice speeches
+
+	// VoiceOksana is Oksana voice (russian, female, standard)
+	VoiceOksana = "oksana"
+	// VoiceJane is Jane voice (russian, female, standard)
+	VoiceJane = "jane"
+	// VoiceOmazh is Omazh voice (russian, female, standard)
+	VoiceOmazh = "omazh"
+	// VoiceZahar is Zahar voice (russian, male, standard)
+	VoiceZahar = "zahar"
+	// VoiceErmil is Ermil voice (russian, male, standard)
+	VoiceErmil = "ermil"
+	// VoiceSilaerkan is Silaerkan voice (turkish, female, standard)
+	VoiceSilaerkan = "silaerkan"
+	// VoiceErkanyavas is Erkanyavas voice (turkish, male, standard)
+	VoiceErkanyavas = "erkanyavas"
+	// VoiceAlyss is Alyss voice (english, female, standard)
+	VoiceAlyss = "alyss"
+	// VoiceNick is Nick voice (engish, male, standard)
+	VoiceNick = "nick"
+	// VoiceAlena is Alena voice (russian, female, premium)
+	VoiceAlena = "alena"
+	// VoiceFilipp is Filipp voice (russian, male, premium)
+	VoiceFilipp = "filipp"
+
+	// Voice emotions
+
+	// EmotionGood is good voice emotion
+	EmotionGood = "good"
+	// EmotionEvil is evil voice emotion
+	EmotionEvil = "evil"
+	// EmotionNeutral is neutral voice emotion
+	EmotionNeutral = "neutral"
+
+	// Models for speech recodnition
+
+	// TopicGeneral is current version of voice model (available in all languages)
+	TopicGeneral = "general"
+	// TopicGeneralRC is experimental version of voice model (russian language)
+	TopicGeneralRC = "general:rc"
+	// TopicGeneralDeprecated is deprecated version of voice model (russian language)
+	TopicGeneralDeprecated = "general:deprecated"
+	// TopicMaps is model for addresses anc company names
+	TopicMaps = "maps"
+
+	// This constants for use in voice selection filter
+
+	// SexAll is male and female
+	SexAll = 0
+	// SexMale is male
+	SexMale = 1
+	// SexFemale is female
+	SexFemale = 2
+)
+
+var (
+	// voices is list of voice params
+	voices = []Voice{
+		Voice{"Oksana", "Оксана", VoiceOksana, LangRU, false, false},
+		Voice{"Jane", "Джейн", VoiceJane, LangRU, false, false},
+		Voice{"Omazh", "Омаж", VoiceOmazh, LangRU, false, false},
+		Voice{"Zahar", "Захар", VoiceZahar, LangRU, true, false},
+		Voice{"Ermil", "Эрмил", VoiceErmil, LangTR, true, false},
+		Voice{"Sila Erkan", "Сыла Эркан", VoiceSilaerkan, LangTR, false, false},
+		Voice{"Alyss", "Элис", VoiceAlyss, LangTR, false, false},
+		Voice{"Nick", "Ник", VoiceNick, LangTR, true, false},
+		Voice{"Alena", "Алёна", VoiceNick, LangRU, false, true},
+		Voice{"Filipp", "Филипп", VoiceNick, LangRU, true, true},
+	}
+)

+ 9 - 0
go.mod

@@ -0,0 +1,9 @@
+module github.com/fcg-xvii/yask
+
+go 1.12
+
+require (
+	github.com/fcg-xvii/go-tools v0.0.0-20200508201218-1e919c727b5d
+	github.com/go-audio/audio v1.0.0
+	github.com/go-audio/wav v1.0.0
+)

+ 9 - 0
go.sum

@@ -0,0 +1,9 @@
+github.com/fcg-xvii/go-tools v0.0.0-20200508201218-1e919c727b5d h1:8mxF+S89GOZkn1F4AO5cWujo4ryj7asKKuBec2Tt2T8=
+github.com/fcg-xvii/go-tools v0.0.0-20200508201218-1e919c727b5d/go.mod h1:99INCxALoGw9mQWOmpCdBDvjm6Wk1HbDV4afw5UrORw=
+github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
+github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
+github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
+github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
+github.com/go-audio/wav v1.0.0 h1:WdSGLhtyud6bof6XHL28xKeCQRzCV06pOFo3LZsFdyE=
+github.com/go-audio/wav v1.0.0/go.mod h1:3yoReyQOsiARkvPl3ERCi8JFjihzG6WhjYpZCf5zAWE=
+github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=

+ 90 - 0
stt.go

@@ -0,0 +1,90 @@
+package yask
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"net/url"
+	"strconv"
+)
+
+// STTConfigDefault returns STTConfig with default parameters
+func STTConfigDefault(yaFolderID, yaAPIKey string, data io.Reader) *STTConfig {
+	return &STTConfig{
+		Lang:            "ru-RU",
+		Topic:           "general",
+		ProfanityFilter: false,
+		Format:          FormatLPCM,
+		Rate:            Rate8k,
+		YaFolderID:      yaFolderID,
+		YaAPIKey:        yaAPIKey,
+		Data:            data,
+	}
+}
+
+// STTConfig is config for speech to text methods
+type STTConfig struct {
+	Lang            string
+	Topic           string
+	ProfanityFilter bool
+	Format          string
+	Rate            int
+	YaFolderID      string
+	YaAPIKey        string
+	Data            io.Reader
+}
+
+// uri returns url with get parameters for http request
+func (s *STTConfig) uri() string {
+	vars := url.Values{
+		"lang":            []string{s.Lang},
+		"topic":           []string{s.Topic},
+		"profanityFilter": []string{strconv.FormatBool(s.ProfanityFilter)},
+		"format":          []string{s.Format},
+		"sampleRateHertz": []string{strconv.FormatInt(int64(s.Rate), 10)},
+		"folderId":        []string{s.YaFolderID},
+	}
+
+	url := fmt.Sprintf("%v?%v", YaSTTUrl, vars.Encode())
+	return url
+}
+
+// SpeechToTextShort returns text from a PCM or OGG sound stream using the service Yandex Speech Kit
+func SpeechToTextShort(conf *STTConfig) (string, error) {
+	req, err := http.NewRequest(
+		"POST",
+		conf.uri(),
+		conf.Data,
+	)
+	if err != nil {
+		return "", err
+	}
+	req.Header.Set("Transfer-encoding", "chunked")
+	req.Header.Set("Authorization", fmt.Sprintf("Api-Key %v", conf.YaAPIKey))
+
+	cl := new(http.Client)
+
+	resp, err := cl.Do(req)
+	if err != nil {
+		return "", err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return "", unmarshallYaError(resp.Body)
+	}
+
+	rSource, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return "", err
+	}
+
+	m := make(map[string]interface{})
+	if err = json.Unmarshal(rSource, &m); err != nil {
+		return "", err
+	}
+
+	result := fmt.Sprint(m["result"])
+	return result, nil
+}

BIN
test_data/test_sound.wav


BIN
test_data/tts.wav


+ 105 - 0
tts.go

@@ -0,0 +1,105 @@
+package yask
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strconv"
+	"strings"
+)
+
+// TTSConfig is config for text to speeh method
+type TTSConfig struct {
+	Text       string
+	SSML       string
+	Lang       string
+	Voice      string
+	Emotion    string
+	Speed      float32
+	Format     string
+	Rate       int
+	YaFolderID string
+	YaAPIKey   string
+}
+
+// ssml content checker
+func (s *TTSConfig) isSSML() bool {
+	return len(s.SSML) > 0
+}
+
+// returns TTSConfig with default parameters
+func defaultTTSConfig(yaFolderID, yaAPIKey string) *TTSConfig {
+	return &TTSConfig{
+		Lang:       LangRU,
+		Voice:      VoiceOksana,
+		Emotion:    EmotionNeutral,
+		Speed:      SpeedStandard,
+		Format:     FormatLPCM,
+		Rate:       Rate8k,
+		YaFolderID: yaFolderID,
+		YaAPIKey:   yaAPIKey,
+	}
+}
+
+// TTSDefaultConfigText returns config with default parameters for raw text recognition and use in TextToSpeech method
+func TTSDefaultConfigText(yaFolderID, yaAPIKey, text string) *TTSConfig {
+	conf := defaultTTSConfig(yaFolderID, yaAPIKey)
+	conf.Text = text
+	return conf
+}
+
+// TTSDefaultConfigSSML returns config with default parameters for raw text recognition and use in TextToSpeech method
+// more details of SSML language in https://cloud.yandex.ru/docs/speechkit/tts/ssml
+func TTSDefaultConfigSSML(yaFolderID, yaAPIKey, SSML string) *TTSConfig {
+	conf := defaultTTSConfig(yaFolderID, yaAPIKey)
+	conf.SSML = SSML
+	return conf
+}
+
+// TextToSpeech returns PCM or OGG sound stream using the service Yandex Speech Kit.
+// Result PCM stream can be converted to Wav stream using EncodePCMToWav
+func TextToSpeech(config *TTSConfig) (io.ReadCloser, error) {
+	httpForm := url.Values{
+		"lang":            []string{config.Lang},
+		"voice":           []string{config.Voice},
+		"emotion":         []string{config.Emotion},
+		"speed":           []string{strconv.FormatFloat(float64(config.Speed), 'f', 1, 32)},
+		"format":          []string{config.Format},
+		"sampleRateHertz": []string{strconv.FormatInt(int64(config.Rate), 10)},
+		"folderId":        []string{config.YaFolderID},
+	}
+	// emotion deprecated - only for omazh and jane
+	if config.Voice != "omazh" && config.Voice != "jane" {
+		delete(httpForm, "emotion")
+	}
+	// ssml
+	if config.isSSML() {
+		httpForm.Set("ssml", config.SSML)
+	} else {
+		httpForm.Set("text", config.Text)
+	}
+
+	request, err := http.NewRequest("POST", YaTTSUrl, strings.NewReader(httpForm.Encode()))
+	if err != nil {
+		return nil, err
+	}
+
+	request.Header.Add("Content-Type", "application/x-www-form-urlencoded")
+	request.Header.Set("Authorization", fmt.Sprintf("Api-Key %v", config.YaAPIKey))
+
+	client := new(http.Client)
+
+	response, err := client.Do(request)
+	if err != nil {
+		return nil, err
+	}
+
+	if response.StatusCode != http.StatusOK {
+		err = unmarshallYaError(response.Body)
+		response.Body.Close()
+		return nil, err
+	}
+
+	return response.Body, nil
+}

+ 74 - 0
yask.go

@@ -0,0 +1,74 @@
+package yask
+
+import (
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+
+	"github.com/go-audio/audio"
+	"github.com/go-audio/wav"
+)
+
+// EncodePCMToWav encode input stream of pcm audio format to wav and write to out stream
+func EncodePCMToWav(in io.Reader, out io.WriteSeeker, sampleRate, bitDepth, numChans int) error {
+	encoder := wav.NewEncoder(out, sampleRate, bitDepth, numChans, 1)
+
+	audioBuf := &audio.IntBuffer{
+		Format: &audio.Format{
+			NumChannels: numChans,
+			SampleRate:  sampleRate,
+		},
+	}
+
+	for {
+		var sample int16
+		if err := binary.Read(in, binary.LittleEndian, &sample); err != nil {
+			if err == io.EOF {
+				break
+			} else {
+				return err
+			}
+		}
+		audioBuf.Data = append(audioBuf.Data, int(sample))
+	}
+
+	if err := encoder.Write(audioBuf); err != nil {
+		return err
+	}
+
+	return encoder.Close()
+}
+
+func unmarshallYaError(r io.Reader) (err error) {
+	var data []byte
+	if data, err = ioutil.ReadAll(r); err != nil {
+		return
+	}
+	mErr := make(map[string]interface{})
+	if err = json.Unmarshal(data, &mErr); err == nil {
+		err = fmt.Errorf("Yandex request error: %v", mErr["error_message"])
+	}
+	return
+}
+
+// Voices returns slice of available vioces
+// lang: empty (all alngs) ru-RU, en-EN, tr-TR
+// sex: 0 - all, 1 - male, 2 - female
+// premium: 0 - all, 1 - standard only, 2 - premium only
+func Voices(lang string, sex, premium int) (res []Voice) {
+	for _, voice := range voices {
+		if len(lang) > 0 && voice.Lang != lang {
+			continue
+		}
+		if sex != 0 && (sex == 1 && !voice.Male || sex == 2 && voice.Male) {
+			continue
+		}
+		if premium != 0 && (voice.Premium && premium != 2 || !voice.Premium && premium != 1) {
+			continue
+		}
+		res = append(res, voice)
+	}
+	return
+}

+ 76 - 0
z_test.go

@@ -0,0 +1,76 @@
+package yask
+
+import (
+	"os"
+	"testing"
+
+	"github.com/fcg-xvii/go-tools/text/config"
+)
+
+var (
+	yaFolderID, yaAPIKey string
+)
+
+func init() {
+	if f, err := os.Open("test_data/ya.config"); err == nil {
+		config.SplitToVals(f, "::", &yaFolderID, &yaAPIKey)
+		f.Close()
+	}
+}
+
+func TestTextToSpeech(t *testing.T) {
+	if len(yaFolderID) == 0 {
+		t.Log("ya config 'test_data/ya.config' not parsed. format 'ya_folder_id::ya_api_key")
+		return
+	}
+
+	// init request config
+	config := TTSDefaultConfigText(yaFolderID, yaAPIKey, "Привет, это тест синтеза речи с помощью сервиса Яндекса")
+
+	r, err := TextToSpeech(config)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	f, err := os.OpenFile("test_data/tts.wav", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0655)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := EncodePCMToWav(r, f, config.Rate, 16, 1); err != nil {
+		t.Fatal(err)
+	}
+
+	r.Close()
+	f.Close()
+}
+
+func TestSpeechToTextShort(t *testing.T) {
+	if len(yaFolderID) == 0 {
+		t.Log("ya config 'test_data/ya.config' not parsed. format 'ya_folder_id::ya_api_key")
+		return
+	}
+
+	f, err := os.Open("test_data/test_sound.wav")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	conf := STTConfigDefault(yaFolderID, yaAPIKey, f)
+
+	text, err := SpeechToTextShort(conf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Log(text)
+}
+
+func TestVoiseFilter(t *testing.T) {
+	// Get all voices
+	items := Voices("", 0, 0)
+	t.Log(len(voices), len(items), items)
+
+	// Get only russian standard females
+	items = Voices(LangRU, 2, 1)
+	t.Log(len(voices), len(items), items)
+}