Openai

OpenAIImageGenerationToolSpec #

基类：BaseToolSpec

OpenAI 图像生成工具 Spec。

源代码位置：llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py

class OpenAIImageGenerationToolSpec(BaseToolSpec):
    """OpenAI Image Generation tool spec."""

    spec_functions = ["image_generation"]

    def __init__(
        self, api_key: Optional[str] = None, cache_dir: Optional[str] = None
    ) -> None:
        try:
            from openai import OpenAI
        except ImportError:
            raise ImportError(
                "Please install openai with `pip install openai` to use this tool"
            )

        """Initialize with parameters."""
        self.client = OpenAI(api_key=api_key)
        self.cache_dir = cache_dir or DEFAULT_CACHE_DIR

    def get_cache_dir(self):
        return self.cache_dir

    def save_base64_image(self, base64_str, image_name):
        try:
            from io import BytesIO

            from PIL import Image
        except ImportError:
            raise ImportError(
                "Please install Pillow with `pip install Pillow` to use this tool"
            )
        cache_dir = self.cache_dir

        # Create cache directory if it doesn't exist
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        # Decode the base64 string
        image_data = base64.b64decode(base64_str)

        # Create an image from the decoded bytes and save it
        image_path = os.path.join(cache_dir, image_name)
        with Image.open(BytesIO(image_data)) as img:
            img.save(image_path)

        return image_path

    def image_generation(
        self,
        text: str,
        model: Optional[str] = "dall-e-3",
        quality: Optional[str] = "standard",
        num_images: Optional[int] = 1,
        size: Optional[str] = DEFAULT_SIZE,
        style: Optional[str] = "vivid",
        timeout: Optional[int] = None,
        download: Optional[bool] = False,
    ) -> str:
        """
        This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.

        Args:
            text: The text to generate an image from.

            model: The model to use for image generation. Defaults to `dall-e-3`.
                Must be one of `dall-e-2` or `dall-e-3`.

            num_images: The number of images to generate. Defaults to 1.
                Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.

            quality: The quality of the image that will be generated. Defaults to `standard`.
                Must be one of `standard` or `hd`. `hd` creates images with finer
                details and greater consistency across the image. This param is only supported
                for `dall-e-3`.

            size: The size of the generated images. Defaults to `1024x1024`.
                Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
                Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.

            style: The style of the generated images. Defaults to `vivid`.
                Must be one of `vivid` or `natural`.
                Vivid causes the model to lean towards generating hyper-real and dramatic images.
                Natural causes the model to produce more natural, less hyper-real looking images.
                This param is only supported for `dall-e-3`.

            timeout: Override the client-level default timeout for this request, in seconds. Defaults to `None`.

            download: If `True`, the image will be downloaded to the cache directory. Defaults to `True`.

        """
        if size not in valid_sizes[model]:
            raise Exception(f"Invalid size for {model}: {size}")

        response = self.client.images.generate(
            prompt=text,
            n=num_images,
            model=model,
            quality=quality,
            size=size,
            response_format="b64_json" if download else "url",
            style=style,
            timeout=timeout,
        )
        if download:
            image_bytes = response.data[0].b64_json
            ext = get_extension(image_bytes)
            filename = f"{time.time()}.{ext}"

            return (self.save_base64_image(image_bytes, filename),)

        return response.data[0].url

image_generation #

image_generation(text: str, model: Optional[str] = 'dall-e-3', quality: Optional[str] = 'standard', num_images: Optional[int] = 1, size: Optional[str] = DEFAULT_SIZE, style: Optional[str] = 'vivid', timeout: Optional[int] = None, download: Optional[bool] = False) -> str

此工具接受自然语言字符串，并将使用 OpenAI 的 DALL-E 模型生成图像。

参数

名称	类型	描述	默认值
`text`	`Optional[str]`	用于生成图像的文本。	必需
`model`	`Optional[str]`	用于图像生成的模型。默认为 `dall-e-3`。必须是 `dall-e-2` 或 `dall-e-3` 之一。	`'dall-e-3'`
`num_images`	`Optional[int]`	要生成的图像数量。默认为 1。必须介于 1 和 10 之间。对于 `dall-e-3`，仅支持 `n=1`。	`1`
`quality`	`Optional[str]`	将生成的图像质量。默认为 `standard`。必须是 `standard` 或 `hd` 之一。`hd` 会生成具有更精细细节和更高图像一致性的图像。此参数仅支持 `dall-e-3`。	`'standard'`
`size`	`Optional[str]`	生成图像的尺寸。默认为 `1024x1024`。对于 `dall-e-2` 模型，必须是 `256x256`、`512x512` 或 `1024x1024` 之一。对于 `dall-e-3` 模型，必须是 `1024x1024`、`1792x1024` 或 `1024x1792` 之一。	`DEFAULT_SIZE`
`style`	`Optional[str]`	生成图像的风格。默认为 `vivid`。必须是 `vivid` 或 `natural` 之一。`vivid` 风格使模型倾向于生成超现实和戏剧性的图像。`natural` 风格使模型生成更自然、看起来不那么超现实的图像。此参数仅支持 `dall-e-3` 模型。	`'vivid'`
`timeout`	`Optional[int]`	覆盖此请求的客户端级别默认超时时间，以秒为单位。默认为 `None`。	`无`
`download`	`Optional[bool]`	如果为 `True`，图像将被下载到缓存目录。默认为 `True`。	`False`

源代码位置：llama-index-integrations/tools/llama-index-tools-openai/llama_index/tools/openai/image_generation/base.py

def image_generation(
    self,
    text: str,
    model: Optional[str] = "dall-e-3",
    quality: Optional[str] = "standard",
    num_images: Optional[int] = 1,
    size: Optional[str] = DEFAULT_SIZE,
    style: Optional[str] = "vivid",
    timeout: Optional[int] = None,
    download: Optional[bool] = False,
) -> str:
    """
    This tool accepts a natural language string and will use OpenAI's DALL-E model to generate an image.

    Args:
        text: The text to generate an image from.

        model: The model to use for image generation. Defaults to `dall-e-3`.
            Must be one of `dall-e-2` or `dall-e-3`.

        num_images: The number of images to generate. Defaults to 1.
            Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.

        quality: The quality of the image that will be generated. Defaults to `standard`.
            Must be one of `standard` or `hd`. `hd` creates images with finer
            details and greater consistency across the image. This param is only supported
            for `dall-e-3`.

        size: The size of the generated images. Defaults to `1024x1024`.
            Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
            Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.

        style: The style of the generated images. Defaults to `vivid`.
            Must be one of `vivid` or `natural`.
            Vivid causes the model to lean towards generating hyper-real and dramatic images.
            Natural causes the model to produce more natural, less hyper-real looking images.
            This param is only supported for `dall-e-3`.

        timeout: Override the client-level default timeout for this request, in seconds. Defaults to `None`.

        download: If `True`, the image will be downloaded to the cache directory. Defaults to `True`.

    """
    if size not in valid_sizes[model]:
        raise Exception(f"Invalid size for {model}: {size}")

    response = self.client.images.generate(
        prompt=text,
        n=num_images,
        model=model,
        quality=quality,
        size=size,
        response_format="b64_json" if download else "url",
        style=style,
        timeout=timeout,
    )
    if download:
        image_bytes = response.data[0].b64_json
        ext = get_extension(image_bytes)
        filename = f"{time.time()}.{ext}"

        return (self.save_base64_image(image_bytes, filename),)

    return response.data[0].url