Source code for langchain_community.tools.azure_ai_services.text_to_speech

from __future__ import annotations

import logging
import tempfile
from typing import Any, Dict, Optional

from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import root_validator
from langchain_core.tools import BaseTool
from langchain_core.utils import get_from_dict_or_env

logger = logging.getLogger(__name__)


[docs]class AzureAiServicesTextToSpeechTool(BaseTool): """Tool that queries the Azure AI Services Text to Speech API. In order to set this up, follow instructions at: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-text-to-speech?pivots=programming-language-python """ name: str = "azure_ai_services_text_to_speech" description: str = ( "A wrapper around Azure AI Services Text to Speech API. " "Useful for when you need to convert text to speech. " ) return_direct: bool = True azure_ai_services_key: str = "" #: :meta private: azure_ai_services_region: str = "" #: :meta private: speech_language: str = "en-US" #: :meta private: speech_config: Any #: :meta private: @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and endpoint exists in environment.""" azure_ai_services_key = get_from_dict_or_env( values, "azure_ai_services_key", "AZURE_AI_SERVICES_KEY" ) azure_ai_services_region = get_from_dict_or_env( values, "azure_ai_services_region", "AZURE_AI_SERVICES_REGION" ) try: import azure.cognitiveservices.speech as speechsdk values["speech_config"] = speechsdk.SpeechConfig( subscription=azure_ai_services_key, region=azure_ai_services_region ) except ImportError: raise ImportError( "azure-cognitiveservices-speech is not installed. " "Run `pip install azure-cognitiveservices-speech` to install." ) return values def _text_to_speech(self, text: str, speech_language: str) -> str: try: import azure.cognitiveservices.speech as speechsdk except ImportError: pass self.speech_config.speech_synthesis_language = speech_language speech_synthesizer = speechsdk.SpeechSynthesizer( speech_config=self.speech_config, audio_config=None ) result = speech_synthesizer.speak_text(text) if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: stream = speechsdk.AudioDataStream(result) with tempfile.NamedTemporaryFile( mode="wb", suffix=".wav", delete=False ) as f: stream.save_to_wav_file(f.name) return f.name elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details logger.debug(f"Speech synthesis canceled: {cancellation_details.reason}") if cancellation_details.reason == speechsdk.CancellationReason.Error: raise RuntimeError( f"Speech synthesis error: {cancellation_details.error_details}" ) return "Speech synthesis canceled." else: return f"Speech synthesis failed: {result.reason}" def _run( self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: """Use the tool.""" try: speech_file = self._text_to_speech(query, self.speech_language) return speech_file except Exception as e: raise RuntimeError( f"Error while running AzureAiServicesTextToSpeechTool: {e}" )