Elevenlabs

ElevenLabsToolSpec #

基类: BaseToolSpec

ElevenLabs 工具规范，用于文本转语音合成。

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py

class ElevenLabsToolSpec(BaseToolSpec):
    """ElevenLabs tool spec for text-to-speech synthesis."""

    spec_functions = ["get_voices", "text_to_speech"]

    def __init__(
        self, api_key: str, base_url: Optional[str] = "https://api.elevenlabs.io"
    ) -> None:
        """
        Initialize with parameters.

        Args:
            api_key (str): Your ElevenLabs API key
            base_url (Optional[str]): The base url of elevenlabs

        """
        self.api_key = api_key
        self.base_url = base_url

    def get_voices(self) -> List[dict]:
        """
        Get list of available voices from ElevenLabs.

        Returns:
            List[dict]: List of available voices with their details

        """
        from elevenlabs import ElevenLabs

        # Create the client
        client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

        # Get the voices
        response = client.voices.get_all()

        # Return the dumped voice models as dict
        return [voice.model_dump() for voice in response.voices]

    def text_to_speech(
        self,
        text: str,
        output_path: str,
        voice_id: Optional[str] = None,
        voice_stability: Optional[float] = None,
        voice_similarity_boost: Optional[float] = None,
        voice_style: Optional[float] = None,
        voice_use_speaker_boost: Optional[bool] = None,
        model_id: Optional[str] = "eleven_monolingual_v1",
    ) -> str:
        """
        Convert text to speech using ElevenLabs API.

        Args:
            text (str): The text to convert to speech
            output_path (str): Where to save the output file
            output_path (str): Path to save the audio file. If None, generates one
            voice_id (Optional[str]): Override the default voice ID
            voice_stability (Optional[float]): The stability setting of the voice
            voice_similarity_boost (Optional[float]): The similarity boost setting of the voice
            voice_style: (Optional[float]): The style setting of the voice
            voice_use_speaker_boost (Optional[bool]): Whether to use speaker boost or not
            model_id (Optional[str]): Override the default model ID

        Returns:
            str: Path to the generated audio file

        """
        from elevenlabs import ElevenLabs, VoiceSettings
        from elevenlabs.client import DEFAULT_VOICE

        # Create client
        client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

        # Default the settings if not supplied
        if voice_stability is None:
            voice_stability = DEFAULT_VOICE.settings.stability

        if voice_similarity_boost is None:
            voice_similarity_boost = DEFAULT_VOICE.settings.similarity_boost

        if voice_style is None:
            voice_style = DEFAULT_VOICE.settings.style

        if voice_use_speaker_boost is None:
            voice_use_speaker_boost = DEFAULT_VOICE.settings.use_speaker_boost

        # Create the VoiceSettings
        voice_settings = VoiceSettings(
            stability=voice_stability,
            similarity_boost=voice_similarity_boost,
            style=voice_style,
            use_speaker_boost=voice_use_speaker_boost,
        )

        # Generate audio
        audio = client.generate(
            text=text, voice=voice_id, voice_settings=voice_settings, model=model_id
        )

        # Save the audio
        with open(output_path, "wb") as fp:
            fp.write(b"".join(audio))

        # Return the save location
        return output_path

get_voices #

get_voices() -> List[dict]

从 ElevenLabs 获取可用声音列表。

返回

类型	描述
`List[dict]`	List[dict]：可用声音及其详细信息的列表

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py

def get_voices(self) -> List[dict]:
    """
    Get list of available voices from ElevenLabs.

    Returns:
        List[dict]: List of available voices with their details

    """
    from elevenlabs import ElevenLabs

    # Create the client
    client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

    # Get the voices
    response = client.voices.get_all()

    # Return the dumped voice models as dict
    return [voice.model_dump() for voice in response.voices]

text_to_speech #

text_to_speech(text: str, output_path: str, voice_id: Optional[str] = None, voice_stability: Optional[float] = None, voice_similarity_boost: Optional[float] = None, voice_style: Optional[float] = None, voice_use_speaker_boost: Optional[bool] = None, model_id: Optional[str] = 'eleven_monolingual_v1') -> str

使用 ElevenLabs API 将文本转换为语音。

参数

名称	类型	描述	默认值
`text`	`str`	要转换为语音的文本	必需
`output_path`	`str`	输出文件保存路径	必需
`output_path`	`str`	音频文件保存路径。如果为 None，则自动生成路径。	必需
`voice_id`	`Optional[str]`	覆盖默认声音 ID	`无`
`voice_stability`	`Optional[float]`	声音的稳定性设置	`无`
`voice_similarity_boost`	`Optional[float]`	声音的相似度增强设置	`无`
`voice_style`	`Optional[float]`	(Optional[float])：声音的风格设置	`无`
`voice_use_speaker_boost`	`Optional[bool]`	是否使用发音人增强	`无`
`model_id`	`Optional[str]`	覆盖默认模型 ID	`'eleven_monolingual_v1'`

返回

名称	类型	描述
`str`	`str`	生成音频文件的路径

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py

def text_to_speech(
    self,
    text: str,
    output_path: str,
    voice_id: Optional[str] = None,
    voice_stability: Optional[float] = None,
    voice_similarity_boost: Optional[float] = None,
    voice_style: Optional[float] = None,
    voice_use_speaker_boost: Optional[bool] = None,
    model_id: Optional[str] = "eleven_monolingual_v1",
) -> str:
    """
    Convert text to speech using ElevenLabs API.

    Args:
        text (str): The text to convert to speech
        output_path (str): Where to save the output file
        output_path (str): Path to save the audio file. If None, generates one
        voice_id (Optional[str]): Override the default voice ID
        voice_stability (Optional[float]): The stability setting of the voice
        voice_similarity_boost (Optional[float]): The similarity boost setting of the voice
        voice_style: (Optional[float]): The style setting of the voice
        voice_use_speaker_boost (Optional[bool]): Whether to use speaker boost or not
        model_id (Optional[str]): Override the default model ID

    Returns:
        str: Path to the generated audio file

    """
    from elevenlabs import ElevenLabs, VoiceSettings
    from elevenlabs.client import DEFAULT_VOICE

    # Create client
    client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

    # Default the settings if not supplied
    if voice_stability is None:
        voice_stability = DEFAULT_VOICE.settings.stability

    if voice_similarity_boost is None:
        voice_similarity_boost = DEFAULT_VOICE.settings.similarity_boost

    if voice_style is None:
        voice_style = DEFAULT_VOICE.settings.style

    if voice_use_speaker_boost is None:
        voice_use_speaker_boost = DEFAULT_VOICE.settings.use_speaker_boost

    # Create the VoiceSettings
    voice_settings = VoiceSettings(
        stability=voice_stability,
        similarity_boost=voice_similarity_boost,
        style=voice_style,
        use_speaker_boost=voice_use_speaker_boost,
    )

    # Generate audio
    audio = client.generate(
        text=text, voice=voice_id, voice_settings=voice_settings, model=model_id
    )

    # Save the audio
    with open(output_path, "wb") as fp:
        fp.write(b"".join(audio))

    # Return the save location
    return output_path