跳到内容

Elevenlabs

ElevenLabsToolSpec #

基类: BaseToolSpec

ElevenLabs 工具规范,用于文本转语音合成。

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class ElevenLabsToolSpec(BaseToolSpec):
    """ElevenLabs tool spec for text-to-speech synthesis."""

    spec_functions = ["get_voices", "text_to_speech"]

    def __init__(
        self, api_key: str, base_url: Optional[str] = "https://api.elevenlabs.io"
    ) -> None:
        """
        Initialize with parameters.

        Args:
            api_key (str): Your ElevenLabs API key
            base_url (Optional[str]): The base url of elevenlabs

        """
        self.api_key = api_key
        self.base_url = base_url

    def get_voices(self) -> List[dict]:
        """
        Get list of available voices from ElevenLabs.

        Returns:
            List[dict]: List of available voices with their details

        """
        from elevenlabs import ElevenLabs

        # Create the client
        client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

        # Get the voices
        response = client.voices.get_all()

        # Return the dumped voice models as dict
        return [voice.model_dump() for voice in response.voices]

    def text_to_speech(
        self,
        text: str,
        output_path: str,
        voice_id: Optional[str] = None,
        voice_stability: Optional[float] = None,
        voice_similarity_boost: Optional[float] = None,
        voice_style: Optional[float] = None,
        voice_use_speaker_boost: Optional[bool] = None,
        model_id: Optional[str] = "eleven_monolingual_v1",
    ) -> str:
        """
        Convert text to speech using ElevenLabs API.

        Args:
            text (str): The text to convert to speech
            output_path (str): Where to save the output file
            output_path (str): Path to save the audio file. If None, generates one
            voice_id (Optional[str]): Override the default voice ID
            voice_stability (Optional[float]): The stability setting of the voice
            voice_similarity_boost (Optional[float]): The similarity boost setting of the voice
            voice_style: (Optional[float]): The style setting of the voice
            voice_use_speaker_boost (Optional[bool]): Whether to use speaker boost or not
            model_id (Optional[str]): Override the default model ID

        Returns:
            str: Path to the generated audio file

        """
        from elevenlabs import ElevenLabs, VoiceSettings
        from elevenlabs.client import DEFAULT_VOICE

        # Create client
        client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

        # Default the settings if not supplied
        if voice_stability is None:
            voice_stability = DEFAULT_VOICE.settings.stability

        if voice_similarity_boost is None:
            voice_similarity_boost = DEFAULT_VOICE.settings.similarity_boost

        if voice_style is None:
            voice_style = DEFAULT_VOICE.settings.style

        if voice_use_speaker_boost is None:
            voice_use_speaker_boost = DEFAULT_VOICE.settings.use_speaker_boost

        # Create the VoiceSettings
        voice_settings = VoiceSettings(
            stability=voice_stability,
            similarity_boost=voice_similarity_boost,
            style=voice_style,
            use_speaker_boost=voice_use_speaker_boost,
        )

        # Generate audio
        audio = client.generate(
            text=text, voice=voice_id, voice_settings=voice_settings, model=model_id
        )

        # Save the audio
        with open(output_path, "wb") as fp:
            fp.write(b"".join(audio))

        # Return the save location
        return output_path

get_voices #

get_voices() -> List[dict]

从 ElevenLabs 获取可用声音列表。

返回

类型 描述
List[dict]

List[dict]:可用声音及其详细信息的列表

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def get_voices(self) -> List[dict]:
    """
    Get list of available voices from ElevenLabs.

    Returns:
        List[dict]: List of available voices with their details

    """
    from elevenlabs import ElevenLabs

    # Create the client
    client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

    # Get the voices
    response = client.voices.get_all()

    # Return the dumped voice models as dict
    return [voice.model_dump() for voice in response.voices]

text_to_speech #

text_to_speech(text: str, output_path: str, voice_id: Optional[str] = None, voice_stability: Optional[float] = None, voice_similarity_boost: Optional[float] = None, voice_style: Optional[float] = None, voice_use_speaker_boost: Optional[bool] = None, model_id: Optional[str] = 'eleven_monolingual_v1') -> str

使用 ElevenLabs API 将文本转换为语音。

参数

名称 类型 描述 默认值
text str

要转换为语音的文本

必需
output_path str

输出文件保存路径

必需
output_path str

音频文件保存路径。如果为 None,则自动生成路径。

必需
voice_id Optional[str]

覆盖默认声音 ID

voice_stability Optional[float]

声音的稳定性设置

voice_similarity_boost Optional[float]

声音的相似度增强设置

voice_style Optional[float]

(Optional[float]):声音的风格设置

voice_use_speaker_boost Optional[bool]

是否使用发音人增强

model_id Optional[str]

覆盖默认模型 ID

'eleven_monolingual_v1'

返回

名称 类型 描述
str str

生成音频文件的路径

源代码位于 llama-index-integrations/tools/llama-index-tools-elevenlabs/llama_index/tools/elevenlabs/base.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def text_to_speech(
    self,
    text: str,
    output_path: str,
    voice_id: Optional[str] = None,
    voice_stability: Optional[float] = None,
    voice_similarity_boost: Optional[float] = None,
    voice_style: Optional[float] = None,
    voice_use_speaker_boost: Optional[bool] = None,
    model_id: Optional[str] = "eleven_monolingual_v1",
) -> str:
    """
    Convert text to speech using ElevenLabs API.

    Args:
        text (str): The text to convert to speech
        output_path (str): Where to save the output file
        output_path (str): Path to save the audio file. If None, generates one
        voice_id (Optional[str]): Override the default voice ID
        voice_stability (Optional[float]): The stability setting of the voice
        voice_similarity_boost (Optional[float]): The similarity boost setting of the voice
        voice_style: (Optional[float]): The style setting of the voice
        voice_use_speaker_boost (Optional[bool]): Whether to use speaker boost or not
        model_id (Optional[str]): Override the default model ID

    Returns:
        str: Path to the generated audio file

    """
    from elevenlabs import ElevenLabs, VoiceSettings
    from elevenlabs.client import DEFAULT_VOICE

    # Create client
    client = ElevenLabs(base_url=self.base_url, api_key=self.api_key)

    # Default the settings if not supplied
    if voice_stability is None:
        voice_stability = DEFAULT_VOICE.settings.stability

    if voice_similarity_boost is None:
        voice_similarity_boost = DEFAULT_VOICE.settings.similarity_boost

    if voice_style is None:
        voice_style = DEFAULT_VOICE.settings.style

    if voice_use_speaker_boost is None:
        voice_use_speaker_boost = DEFAULT_VOICE.settings.use_speaker_boost

    # Create the VoiceSettings
    voice_settings = VoiceSettings(
        stability=voice_stability,
        similarity_boost=voice_similarity_boost,
        style=voice_style,
        use_speaker_boost=voice_use_speaker_boost,
    )

    # Generate audio
    audio = client.generate(
        text=text, voice=voice_id, voice_settings=voice_settings, model=model_id
    )

    # Save the audio
    with open(output_path, "wb") as fp:
        fp.write(b"".join(audio))

    # Return the save location
    return output_path