speechtotext.go 1.84 KiB
Newer Older
package googleapi

import (
	"encoding/base64"
	"net/http"

	"corelab.mkcl.org/MKCLOS/coredevelopmentplatform/corepkgv2/loggermdl"
	speech "google.golang.org/api/speech/v1beta1"

	"io/ioutil"
)

//AudioConfig Google Speech-to-Text config
type AudioConfig struct {
	FilePath        string
	LanguageCode    string
	AudioEncoding   string
	AudioSampleRate int64
}

//SpeechToText get the speech to text
func SpeechToText(client *http.Client, c AudioConfig) (string, error) {
	loggermdl.LogInfo("IN SpeechToText")
	if c.AudioEncoding == "" {
		c.AudioEncoding = "FLAC"
	}
	if c.AudioSampleRate == 0 {
		c.AudioSampleRate = 16000
	}
	if c.LanguageCode == "" {
		c.LanguageCode = "en-US"
	}
	speechService, err := speech.New(client)
	if err != nil {
		loggermdl.LogError("speechService Error: ", err)
		loggermdl.LogInfo("OUT SpeechToText")
		return "", err
	}
	fileDir := c.FilePath

	audioData, err := ioutil.ReadFile(fileDir)
	if err != nil {
		loggermdl.LogError("ReadFile Error: ", err)
		loggermdl.LogInfo("OUT SpeechToText")
		return "", err
	}

	encoded := base64.StdEncoding.EncodeToString(audioData)
	speechRecConfig := speech.RecognitionConfig{
		SampleRate:   c.AudioSampleRate,
		Encoding:     c.AudioEncoding,
		LanguageCode: c.LanguageCode,
	}
	audio := speech.RecognitionAudio{
		Content: encoded,
	}
	speechRequest := speech.SyncRecognizeRequest{
		Audio:  &audio,
		Config: &speechRecConfig,
	}
	syncRecCall := speechService.Speech.Syncrecognize(&speechRequest)
	syncRecResponse, err := syncRecCall.Do()
	if err != nil {
		loggermdl.LogError("syncRecCall Error: ", err)
		loggermdl.LogInfo("OUT SpeechToText")
		return "", err
	}
	resp, err := syncRecResponse.MarshalJSON()
	if err != nil {
		loggermdl.LogError("syncRecResponse Error: ", err)
		loggermdl.LogInfo("OUT SpeechToText")
		return "", err
	}
	loggermdl.LogInfo("OUT SpeechToText")
	return string(resp), nil
}