跳到内容

Google

GoogleIndex #

基类: BaseManagedIndex

Google 的生成式 AI 语义向量存储,支持 AQA。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
class GoogleIndex(BaseManagedIndex):
    """Google's Generative AI Semantic vector store with AQA."""

    _store: GoogleVectorStore
    _index: VectorStoreIndex

    def __init__(
        self,
        vector_store: GoogleVectorStore,
        embed_model: Optional[BaseEmbedding] = None,
        # deprecated
        **kwargs: Any,
    ) -> None:
        """
        Creates an instance of GoogleIndex.

        Prefer to use the factories `from_corpus` or `create_corpus` instead.
        """
        embed_model = embed_model or MockEmbedding(embed_dim=3)

        self._store = vector_store
        self._index = VectorStoreIndex.from_vector_store(
            vector_store, embed_model=embed_model, **kwargs
        )

        super().__init__(
            index_struct=self._index.index_struct,
            **kwargs,
        )

    @classmethod
    def from_corpus(
        cls: Type[IndexType], *, corpus_id: str, **kwargs: Any
    ) -> IndexType:
        """
        Creates a GoogleIndex from an existing corpus.

        Args:
            corpus_id: ID of an existing corpus on Google's server.

        Returns:
            An instance of GoogleIndex pointing to the specified corpus.

        """
        _logger.debug(f"\n\nGoogleIndex.from_corpus(corpus_id={corpus_id})")
        return cls(
            vector_store=GoogleVectorStore.from_corpus(corpus_id=corpus_id), **kwargs
        )

    @classmethod
    def create_corpus(
        cls: Type[IndexType],
        *,
        corpus_id: Optional[str] = None,
        display_name: Optional[str] = None,
        **kwargs: Any,
    ) -> IndexType:
        """
        Creates a GoogleIndex from a new corpus.

        Args:
            corpus_id: ID of the new corpus to be created. If not provided,
                Google server will provide one.
            display_name: Title of the new corpus. If not provided, Google
                server will provide one.

        Returns:
            An instance of GoogleIndex pointing to the specified corpus.

        """
        _logger.debug(
            f"\n\nGoogleIndex.from_new_corpus(new_corpus_id={corpus_id}, new_display_name={display_name})"
        )
        return cls(
            vector_store=GoogleVectorStore.create_corpus(
                corpus_id=corpus_id, display_name=display_name
            ),
            **kwargs,
        )

    @classmethod
    def from_documents(
        cls: Type[IndexType],
        documents: Sequence[Document],
        storage_context: Optional[StorageContext] = None,
        show_progress: bool = False,
        callback_manager: Optional[CallbackManager] = None,
        transformations: Optional[List[TransformComponent]] = None,
        # deprecated
        embed_model: Optional[BaseEmbedding] = None,
        **kwargs: Any,
    ) -> IndexType:
        """Build an index from a sequence of documents."""
        _logger.debug("\n\nGoogleIndex.from_documents(...)")

        new_display_name = f"Corpus created on {datetime.datetime.now()}"
        instance = cls(
            vector_store=GoogleVectorStore.create_corpus(display_name=new_display_name),
            embed_model=embed_model,
            storage_context=storage_context,
            show_progress=show_progress,
            callback_manager=callback_manager,
            transformations=transformations,
            **kwargs,
        )

        index = cast(GoogleIndex, instance)
        index.insert_documents(
            documents=documents,
        )

        return instance

    @property
    def corpus_id(self) -> str:
        """Returns the corpus ID being used by this GoogleIndex."""
        return self._store.corpus_id

    def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
        """Inserts a set of nodes."""
        self._index.insert_nodes(nodes=nodes, **insert_kwargs)

    def insert_documents(self, documents: Sequence[Document], **kwargs: Any) -> None:
        """Inserts a set of documents."""
        for document in documents:
            self.insert(document=document, **kwargs)

    def delete_ref_doc(
        self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
    ) -> None:
        """Deletes a document and its nodes by using ref_doc_id."""
        self._index.delete_ref_doc(ref_doc_id=ref_doc_id, **delete_kwargs)

    def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
        """Updates a document and its corresponding nodes."""
        self._index.update(document=document, **update_kwargs)

    def as_retriever(self, **kwargs: Any) -> BaseRetriever:
        """Returns a Retriever for this managed index."""
        return self._index.as_retriever(**kwargs)

    def as_query_engine(
        self,
        llm: Optional[LLMType] = None,
        temperature: float = 0.7,
        answer_style: Any = 1,
        safety_setting: List[Any] = [],
        **kwargs: Any,
    ) -> BaseQueryEngine:
        """
        Returns the AQA engine for this index.

        Example:
          query_engine = index.as_query_engine(
              temperature=0.7,
              answer_style=AnswerStyle.ABSTRACTIVE,
              safety_setting=[
                  SafetySetting(
                      category=HARM_CATEGORY_SEXUALLY_EXPLICIT,
                      threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
                  ),
              ]
          )

        Args:
            temperature: 0.0 to 1.0.
            answer_style: See `google.ai.generativelanguage.GenerateAnswerRequest.AnswerStyle`
            safety_setting: See `google.ai.generativelanguage.SafetySetting`.

        Returns:
            A query engine that uses Google's AQA model. The query engine will
            return a `Response` object.

            `Response`'s `source_nodes` will begin with a list of attributed
            passages. These passages are the ones that were used to construct
            the grounded response. These passages will always have no score,
            the only way to mark them as attributed passages. Then, the list
            will follow with the originally provided passages, which will have
            a score from the retrieval.

            `Response`'s `metadata` may also have have an entry with key
            `answerable_probability`, which is the probability that the grounded
            answer is likely correct.

        """
        # NOTE: lazy import
        from llama_index.core.query_engine.retriever_query_engine import (
            RetrieverQueryEngine,
        )

        # Don't overwrite the caller's kwargs, which may surprise them.
        local_kwargs = kwargs.copy()

        if "retriever" in kwargs:
            _logger.warning(
                "Ignoring user's retriever to GoogleIndex.as_query_engine, "
                "which uses its own retriever."
            )
            del local_kwargs["retriever"]

        if "response_synthesizer" in kwargs:
            _logger.warning(
                "Ignoring user's response synthesizer to "
                "GoogleIndex.as_query_engine, which uses its own retriever."
            )
            del local_kwargs["response_synthesizer"]

        local_kwargs["retriever"] = self.as_retriever(**local_kwargs)
        local_kwargs["response_synthesizer"] = GoogleTextSynthesizer.from_defaults(
            temperature=temperature,
            answer_style=answer_style,
            safety_setting=safety_setting,
        )

        return RetrieverQueryEngine.from_args(**local_kwargs)

    def _build_index_from_nodes(self, nodes: Sequence[BaseNode]) -> IndexDict:
        """Build the index from nodes."""
        return self._index._build_index_from_nodes(nodes)

corpus_id 属性 #

corpus_id: str

返回此 GoogleIndex 使用的语料库 ID。

from_corpus 类方法 #

from_corpus(*, corpus_id: str, **kwargs: Any) -> IndexType

从现有语料库创建 GoogleIndex。

参数

名称 类型 描述 默认值
corpus_id str

Google 服务器上现有语料库的 ID。

必需

返回值

类型 描述
IndexType

指向指定语料库的 GoogleIndex 实例。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
@classmethod
def from_corpus(
    cls: Type[IndexType], *, corpus_id: str, **kwargs: Any
) -> IndexType:
    """
    Creates a GoogleIndex from an existing corpus.

    Args:
        corpus_id: ID of an existing corpus on Google's server.

    Returns:
        An instance of GoogleIndex pointing to the specified corpus.

    """
    _logger.debug(f"\n\nGoogleIndex.from_corpus(corpus_id={corpus_id})")
    return cls(
        vector_store=GoogleVectorStore.from_corpus(corpus_id=corpus_id), **kwargs
    )

create_corpus 类方法 #

create_corpus(*, corpus_id: Optional[str] = None, display_name: Optional[str] = None, **kwargs: Any) -> IndexType

从新语料库创建 GoogleIndex。

参数

名称 类型 描述 默认值
corpus_id Optional[str]

要创建的新语料库的 ID。如果未提供,Google 服务器将提供一个。

display_name Optional[str]

新语料库的标题。如果未提供,Google 服务器将提供一个。

返回值

类型 描述
IndexType

指向指定语料库的 GoogleIndex 实例。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
@classmethod
def create_corpus(
    cls: Type[IndexType],
    *,
    corpus_id: Optional[str] = None,
    display_name: Optional[str] = None,
    **kwargs: Any,
) -> IndexType:
    """
    Creates a GoogleIndex from a new corpus.

    Args:
        corpus_id: ID of the new corpus to be created. If not provided,
            Google server will provide one.
        display_name: Title of the new corpus. If not provided, Google
            server will provide one.

    Returns:
        An instance of GoogleIndex pointing to the specified corpus.

    """
    _logger.debug(
        f"\n\nGoogleIndex.from_new_corpus(new_corpus_id={corpus_id}, new_display_name={display_name})"
    )
    return cls(
        vector_store=GoogleVectorStore.create_corpus(
            corpus_id=corpus_id, display_name=display_name
        ),
        **kwargs,
    )

from_documents 类方法 #

from_documents(documents: Sequence[Document], storage_context: Optional[StorageContext] = None, show_progress: bool = False, callback_manager: Optional[CallbackManager] = None, transformations: Optional[List[TransformComponent]] = None, embed_model: Optional[BaseEmbedding] = None, **kwargs: Any) -> IndexType

从文档序列构建索引。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
@classmethod
def from_documents(
    cls: Type[IndexType],
    documents: Sequence[Document],
    storage_context: Optional[StorageContext] = None,
    show_progress: bool = False,
    callback_manager: Optional[CallbackManager] = None,
    transformations: Optional[List[TransformComponent]] = None,
    # deprecated
    embed_model: Optional[BaseEmbedding] = None,
    **kwargs: Any,
) -> IndexType:
    """Build an index from a sequence of documents."""
    _logger.debug("\n\nGoogleIndex.from_documents(...)")

    new_display_name = f"Corpus created on {datetime.datetime.now()}"
    instance = cls(
        vector_store=GoogleVectorStore.create_corpus(display_name=new_display_name),
        embed_model=embed_model,
        storage_context=storage_context,
        show_progress=show_progress,
        callback_manager=callback_manager,
        transformations=transformations,
        **kwargs,
    )

    index = cast(GoogleIndex, instance)
    index.insert_documents(
        documents=documents,
    )

    return instance

insert_documents #

insert_documents(documents: Sequence[Document], **kwargs: Any) -> None

插入一组文档。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
164
165
166
167
def insert_documents(self, documents: Sequence[Document], **kwargs: Any) -> None:
    """Inserts a set of documents."""
    for document in documents:
        self.insert(document=document, **kwargs)

delete_ref_doc #

delete_ref_doc(ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any) -> None

使用 ref_doc_id 删除文档及其节点。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
169
170
171
172
173
def delete_ref_doc(
    self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
) -> None:
    """Deletes a document and its nodes by using ref_doc_id."""
    self._index.delete_ref_doc(ref_doc_id=ref_doc_id, **delete_kwargs)

update_ref_doc #

update_ref_doc(document: Document, **update_kwargs: Any) -> None

更新文档及其对应的节点。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
175
176
177
def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
    """Updates a document and its corresponding nodes."""
    self._index.update(document=document, **update_kwargs)

as_retriever #

as_retriever(**kwargs: Any) -> BaseRetriever

返回此托管索引的检索器。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
179
180
181
def as_retriever(self, **kwargs: Any) -> BaseRetriever:
    """Returns a Retriever for this managed index."""
    return self._index.as_retriever(**kwargs)

as_query_engine #

as_query_engine(llm: Optional[LLMType] = None, temperature: float = 0.7, answer_style: Any = 1, safety_setting: List[Any] = [], **kwargs: Any) -> BaseQueryEngine

返回此索引的 AQA 引擎。

示例

query_engine = index.as_query_engine( temperature=0.7, answer_style=AnswerStyle.ABSTRACTIVE, safety_setting=[ SafetySetting( category=HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, ), ] )

参数

名称 类型 描述 默认值
temperature float

0.0 到 1.0。

0.7
answer_style Any

请参见 google.ai.generativelanguage.GenerateAnswerRequest.AnswerStyle

1
safety_setting List[Any]

请参见 google.ai.generativelanguage.SafetySetting

[]

返回值

类型 描述
BaseQueryEngine

一个使用 Google AQA 模型的查询引擎。该查询引擎将

BaseQueryEngine

返回一个 Response 对象。

BaseQueryEngine

Responsesource_nodes 将以归因的

BaseQueryEngine

段落列表开始。这些段落是用于构建

BaseQueryEngine

接地响应的段落。这些段落将始终没有分数,

BaseQueryEngine

这是将它们标记为归因段落的唯一方法。然后,列表将

BaseQueryEngine

接着是最初提供的段落,这些段落将有

BaseQueryEngine

检索得出的分数。

BaseQueryEngine

Responsemetadata 也可能有一个条目,其键为

BaseQueryEngine

answerable_probability,这是接地

BaseQueryEngine

答案可能正确的概率。

源代码位于 llama-index-integrations/indices/llama-index-indices-managed-google/llama_index/indices/managed/google/base.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def as_query_engine(
    self,
    llm: Optional[LLMType] = None,
    temperature: float = 0.7,
    answer_style: Any = 1,
    safety_setting: List[Any] = [],
    **kwargs: Any,
) -> BaseQueryEngine:
    """
    Returns the AQA engine for this index.

    Example:
      query_engine = index.as_query_engine(
          temperature=0.7,
          answer_style=AnswerStyle.ABSTRACTIVE,
          safety_setting=[
              SafetySetting(
                  category=HARM_CATEGORY_SEXUALLY_EXPLICIT,
                  threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
              ),
          ]
      )

    Args:
        temperature: 0.0 to 1.0.
        answer_style: See `google.ai.generativelanguage.GenerateAnswerRequest.AnswerStyle`
        safety_setting: See `google.ai.generativelanguage.SafetySetting`.

    Returns:
        A query engine that uses Google's AQA model. The query engine will
        return a `Response` object.

        `Response`'s `source_nodes` will begin with a list of attributed
        passages. These passages are the ones that were used to construct
        the grounded response. These passages will always have no score,
        the only way to mark them as attributed passages. Then, the list
        will follow with the originally provided passages, which will have
        a score from the retrieval.

        `Response`'s `metadata` may also have have an entry with key
        `answerable_probability`, which is the probability that the grounded
        answer is likely correct.

    """
    # NOTE: lazy import
    from llama_index.core.query_engine.retriever_query_engine import (
        RetrieverQueryEngine,
    )

    # Don't overwrite the caller's kwargs, which may surprise them.
    local_kwargs = kwargs.copy()

    if "retriever" in kwargs:
        _logger.warning(
            "Ignoring user's retriever to GoogleIndex.as_query_engine, "
            "which uses its own retriever."
        )
        del local_kwargs["retriever"]

    if "response_synthesizer" in kwargs:
        _logger.warning(
            "Ignoring user's response synthesizer to "
            "GoogleIndex.as_query_engine, which uses its own retriever."
        )
        del local_kwargs["response_synthesizer"]

    local_kwargs["retriever"] = self.as_retriever(**local_kwargs)
    local_kwargs["response_synthesizer"] = GoogleTextSynthesizer.from_defaults(
        temperature=temperature,
        answer_style=answer_style,
        safety_setting=safety_setting,
    )

    return RetrieverQueryEngine.from_args(**local_kwargs)