Azure 语音

基类：`BaseToolSpec`

Azure 语音工具规范。

源代码位于 llama-index-integrations/tools/llama-index-tools-azure-speech/llama_index/tools/azure_speech/base.py

text_to_speech #

class AzureSpeechToolSpec(BaseToolSpec):
    """Azure Speech tool spec."""

    spec_functions = ["speech_to_text", "text_to_speech"]

    def __init__(
        self, region: str, speech_key: str, language: Optional[str] = "en-US"
    ) -> None:
        import azure.cognitiveservices.speech as speechsdk

        """Initialize with parameters."""
        self.config = speechsdk.SpeechConfig(subscription=speech_key, region=region)
        self.config.speech_recognition_language = language

    def text_to_speech(self, text: str) -> None:
        """
        This tool accepts a natural language string and will use Azure speech services to create an
        audio version of the text, and play it on the users computer.

        Args:
            text (str): The text to play

        """
        import azure.cognitiveservices.speech as speechsdk

        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.config)
        result = speech_synthesizer.speak_text(text)

        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            speechsdk.AudioDataStream(result)
            return "Audio playback complete."
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print(f"Speech synthesis canceled: {cancellation_details.reason}")
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print(f"Error details: {cancellation_details.error_details}")
                return None
            return None
        return None

    def _transcribe(self, speech_recognizer) -> List[str]:
        done = False
        results = []

        def stop_cb(evt) -> None:
            """Callback that stop continuous recognition."""
            speech_recognizer.stop_continuous_recognition_async()
            nonlocal done
            done = True

        speech_recognizer.recognized.connect(
            lambda evt, results=results: results.append(evt.result.text)
        )
        speech_recognizer.session_stopped.connect(stop_cb)
        speech_recognizer.canceled.connect(stop_cb)

        # Start continuous speech recognition
        speech_recognizer.start_continuous_recognition_async()
        while not done:
            time.sleep(0.5)

        return results

    def speech_to_text(self, filename: str) -> List[str]:
        """
        This tool accepts a filename for a speech audio file and uses Azure to transcribe it into text.

        Args:
            filename (str): The name of the file to transcribe

        """
        import azure.cognitiveservices.speech as speechsdk

        speech_recognizer = speechsdk.SpeechRecognizer(
            speech_config=self.config,
            audio_config=speechsdk.audio.AudioConfig(filename=filename),
        )
        return self._transcribe(speech_recognizer)

此工具接受自然语言字符串，并将使用 Azure 语音服务创建文本的音频版本，并在用户的计算机上播放。

text_to_speech(text: str) -> None

参数

名称

类型	描述	默认值	text
`str`	`要播放的文本`	必需	speech_to_text #

text_to_speech #

def text_to_speech(self, text: str) -> None:
    """
    This tool accepts a natural language string and will use Azure speech services to create an
    audio version of the text, and play it on the users computer.

    Args:
        text (str): The text to play

    """
    import azure.cognitiveservices.speech as speechsdk

    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.config)
    result = speech_synthesizer.speak_text(text)

    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        speechsdk.AudioDataStream(result)
        return "Audio playback complete."
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print(f"Speech synthesis canceled: {cancellation_details.reason}")
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print(f"Error details: {cancellation_details.error_details}")
            return None
        return None
    return None

此工具接受语音音频文件的文件名，并使用 Azure 将其转录为文本。

speech_to_text(filename: str) -> List[str]

filename

名称

类型	描述	默认值	text
`要转录的文件名`	`要播放的文本`	返回顶部	speech_to_text #

text_to_speech #

def speech_to_text(self, filename: str) -> List[str]:
    """
    This tool accepts a filename for a speech audio file and uses Azure to transcribe it into text.

    Args:
        filename (str): The name of the file to transcribe

    """
    import azure.cognitiveservices.speech as speechsdk

    speech_recognizer = speechsdk.SpeechRecognizer(
        speech_config=self.config,
        audio_config=speechsdk.audio.AudioConfig(filename=filename),
    )
    return self._transcribe(speech_recognizer)

Azure 语音

基类：BaseToolSpec

此工具接受自然语言字符串，并将使用 Azure 语音服务创建文本的音频版本，并在用户的计算机上播放。

此工具接受语音音频文件的文件名，并使用 Azure 将其转录为文本。

基类：`BaseToolSpec`