跳到内容

Qdrant

QdrantReader #

继承自:BaseReader

Qdrant reader。

从现有的 Qdrant 集合中检索文档。

参数

名称 类型 描述 默认值
location Optional[str]

如果为 :memory: - 使用内存中的 Qdrant 实例。如果为 str - 将其用作 url 参数。如果为 None - 使用 hostport 的默认值。

url Optional[str]

可以是 host 或 "Optional[scheme], host, Optional[port], Optional[prefix]" 字符串。默认值:None

port Optional[int]

REST API 接口端口。默认值:6333

6333
grpc_port int

gRPC 接口端口。默认值:6334

6334
prefer_grpc bool

如果为 true - 在自定义方法中尽可能使用 gRPC 接口。

False
https Optional[bool]

如果为 true - 使用 HTTPS(SSL) 协议。默认值:false

api_key Optional[str]

Qdrant Cloud 认证 API 密钥。默认值:None

prefix Optional[str]

如果非 None - 将 prefix 添加到 REST URL 路径。示例:service/v1 将导致 REST API 的 URL 为 http://localhost:6333/service/v1/{qdrant-endpoint}。默认值:None

timeout Optional[float]

REST 和 gRPC API 请求的超时时间。默认值:REST 为 5.0 秒,gRPC 无限制。

host Optional[str]

Qdrant 服务主机名。如果 url 和 host 均为 None,则设置为 'localhost'。默认值:None

源代码位于 llama-index-integrations/readers/llama-index-readers-qdrant/llama_index/readers/qdrant/base.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class QdrantReader(BaseReader):
    """
    Qdrant reader.

    Retrieve documents from existing Qdrant collections.

    Args:
        location:
            If `:memory:` - use in-memory Qdrant instance.
            If `str` - use it as a `url` parameter.
            If `None` - use default values for `host` and `port`.
        url:
            either host or str of
            "Optional[scheme], host, Optional[port], Optional[prefix]".
            Default: `None`
        port: Port of the REST API interface. Default: 6333
        grpc_port: Port of the gRPC interface. Default: 6334
        prefer_grpc: If `true` - use gPRC interface whenever possible in custom methods.
        https: If `true` - use HTTPS(SSL) protocol. Default: `false`
        api_key: API key for authentication in Qdrant Cloud. Default: `None`
        prefix:
            If not `None` - add `prefix` to the REST URL path.
            Example: `service/v1` will result in
            `http://localhost:6333/service/v1/{qdrant-endpoint}` for REST API.
            Default: `None`
        timeout:
            Timeout for REST and gRPC API requests.
            Default: 5.0 seconds for REST and unlimited for gRPC
        host: Host name of Qdrant service. If url and host are None, set to 'localhost'.
            Default: `None`

    """

    def __init__(
        self,
        location: Optional[str] = None,
        url: Optional[str] = None,
        port: Optional[int] = 6333,
        grpc_port: int = 6334,
        prefer_grpc: bool = False,
        https: Optional[bool] = None,
        api_key: Optional[str] = None,
        prefix: Optional[str] = None,
        timeout: Optional[float] = None,
        host: Optional[str] = None,
        path: Optional[str] = None,
    ):
        """Initialize with parameters."""
        import_err_msg = (
            "`qdrant-client` package not found, please run `pip install qdrant-client`"
        )
        try:
            import qdrant_client
        except ImportError:
            raise ImportError(import_err_msg)

        self._client = qdrant_client.QdrantClient(
            location=location,
            url=url,
            port=port,
            grpc_port=grpc_port,
            prefer_grpc=prefer_grpc,
            https=https,
            api_key=api_key,
            prefix=prefix,
            timeout=timeout,
            host=host,
            path=path,
        )

    def load_data(
        self,
        collection_name: str,
        query_vector: List[float],
        should_search_mapping: Optional[Dict[str, str]] = None,
        must_search_mapping: Optional[Dict[str, str]] = None,
        must_not_search_mapping: Optional[Dict[str, str]] = None,
        rang_search_mapping: Optional[Dict[str, Dict[str, float]]] = None,
        limit: int = 10,
    ) -> List[Document]:
        """
        Load data from Qdrant.

        Args:
            collection_name (str): Name of the Qdrant collection.
            query_vector (List[float]): Query vector.
            should_search_mapping (Optional[Dict[str, str]]): Mapping from field name
                to query string.
            must_search_mapping (Optional[Dict[str, str]]): Mapping from field name
                to query string.
            must_not_search_mapping (Optional[Dict[str, str]]): Mapping from field
                name to query string.
            rang_search_mapping (Optional[Dict[str, Dict[str, float]]]): Mapping from
                field name to range query.
            limit (int): Number of results to return.

        Example:
            reader = QdrantReader()
            reader.load_data(
                 collection_name="test_collection",
                 query_vector=[0.1, 0.2, 0.3],
                 should_search_mapping={"text_field": "text"},
                 must_search_mapping={"text_field": "text"},
                 must_not_search_mapping={"text_field": "text"},
                 # gte, lte, gt, lt supported
                 rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}},
                 limit=10
            )

        Returns:
            List[Document]: A list of documents.

        """
        from qdrant_client.http.models import (
            FieldCondition,
            Filter,
            MatchText,
            MatchValue,
            Range,
        )
        from qdrant_client.http.models.models import Payload

        should_search_mapping = should_search_mapping or {}
        must_search_mapping = must_search_mapping or {}
        must_not_search_mapping = must_not_search_mapping or {}
        rang_search_mapping = rang_search_mapping or {}

        should_search_conditions = [
            FieldCondition(key=key, match=MatchText(text=value))
            for key, value in should_search_mapping.items()
            if should_search_mapping
        ]
        must_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_search_mapping.items()
            if must_search_mapping
        ]
        must_not_search_conditions = [
            FieldCondition(key=key, match=MatchValue(value=value))
            for key, value in must_not_search_mapping.items()
            if must_not_search_mapping
        ]
        rang_search_conditions = [
            FieldCondition(
                key=key,
                range=Range(
                    gte=value.get("gte"),
                    lte=value.get("lte"),
                    gt=value.get("gt"),
                    lt=value.get("lt"),
                ),
            )
            for key, value in rang_search_mapping.items()
            if rang_search_mapping
        ]
        should_search_conditions.extend(rang_search_conditions)
        response = self._client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            query_filter=Filter(
                must=must_search_conditions,
                must_not=must_not_search_conditions,
                should=should_search_conditions,
            ),
            with_vectors=True,
            with_payload=True,
            limit=limit,
        )

        documents = []
        for point in response:
            payload = cast(Payload, point.payload)
            try:
                vector = cast(List[float], point.vector)
            except ValueError as e:
                raise ValueError("Could not cast vector to List[float].") from e
            document = Document(
                id_=payload.get("doc_id"),
                text=payload.get("text"),
                metadata=payload.get("metadata"),
                embedding=vector,
            )
            documents.append(document)

        return documents

load_data #

load_data(collection_name: str, query_vector: List[float], should_search_mapping: Optional[Dict[str, str]] = None, must_search_mapping: Optional[Dict[str, str]] = None, must_not_search_mapping: Optional[Dict[str, str]] = None, rang_search_mapping: Optional[Dict[str, Dict[str, float]]] = None, limit: int = 10) -> List[Document]

从 Qdrant 加载数据。

参数

名称 类型 描述 默认值
collection_name str

Qdrant 集合名称。

必填
query_vector List[float]

查询向量。

必填
should_search_mapping Optional[Dict[str, str]]

字段名到查询字符串的映射。

must_search_mapping Optional[Dict[str, str]]

字段名到查询字符串的映射。

must_not_search_mapping Optional[Dict[str, str]]

字段名到查询字符串的映射。

rang_search_mapping Optional[Dict[str, Dict[str, float]]]

字段名到范围查询的映射。

limit int

返回结果数量。

10
示例

reader = QdrantReader() reader.load_data( collection_name="test_collection", query_vector=[0.1, 0.2, 0.3], should_search_mapping={"text_field": "text"}, must_search_mapping={"text_field": "text"}, must_not_search_mapping={"text_field": "text"}, # 支持 gte, lte, gt, lt rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}}, limit=10 )

返回值

类型 描述
List[Document]

List[Document]:文档列表。

源代码位于 llama-index-integrations/readers/llama-index-readers-qdrant/llama_index/readers/qdrant/base.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def load_data(
    self,
    collection_name: str,
    query_vector: List[float],
    should_search_mapping: Optional[Dict[str, str]] = None,
    must_search_mapping: Optional[Dict[str, str]] = None,
    must_not_search_mapping: Optional[Dict[str, str]] = None,
    rang_search_mapping: Optional[Dict[str, Dict[str, float]]] = None,
    limit: int = 10,
) -> List[Document]:
    """
    Load data from Qdrant.

    Args:
        collection_name (str): Name of the Qdrant collection.
        query_vector (List[float]): Query vector.
        should_search_mapping (Optional[Dict[str, str]]): Mapping from field name
            to query string.
        must_search_mapping (Optional[Dict[str, str]]): Mapping from field name
            to query string.
        must_not_search_mapping (Optional[Dict[str, str]]): Mapping from field
            name to query string.
        rang_search_mapping (Optional[Dict[str, Dict[str, float]]]): Mapping from
            field name to range query.
        limit (int): Number of results to return.

    Example:
        reader = QdrantReader()
        reader.load_data(
             collection_name="test_collection",
             query_vector=[0.1, 0.2, 0.3],
             should_search_mapping={"text_field": "text"},
             must_search_mapping={"text_field": "text"},
             must_not_search_mapping={"text_field": "text"},
             # gte, lte, gt, lt supported
             rang_search_mapping={"text_field": {"gte": 0.1, "lte": 0.2}},
             limit=10
        )

    Returns:
        List[Document]: A list of documents.

    """
    from qdrant_client.http.models import (
        FieldCondition,
        Filter,
        MatchText,
        MatchValue,
        Range,
    )
    from qdrant_client.http.models.models import Payload

    should_search_mapping = should_search_mapping or {}
    must_search_mapping = must_search_mapping or {}
    must_not_search_mapping = must_not_search_mapping or {}
    rang_search_mapping = rang_search_mapping or {}

    should_search_conditions = [
        FieldCondition(key=key, match=MatchText(text=value))
        for key, value in should_search_mapping.items()
        if should_search_mapping
    ]
    must_search_conditions = [
        FieldCondition(key=key, match=MatchValue(value=value))
        for key, value in must_search_mapping.items()
        if must_search_mapping
    ]
    must_not_search_conditions = [
        FieldCondition(key=key, match=MatchValue(value=value))
        for key, value in must_not_search_mapping.items()
        if must_not_search_mapping
    ]
    rang_search_conditions = [
        FieldCondition(
            key=key,
            range=Range(
                gte=value.get("gte"),
                lte=value.get("lte"),
                gt=value.get("gt"),
                lt=value.get("lt"),
            ),
        )
        for key, value in rang_search_mapping.items()
        if rang_search_mapping
    ]
    should_search_conditions.extend(rang_search_conditions)
    response = self._client.search(
        collection_name=collection_name,
        query_vector=query_vector,
        query_filter=Filter(
            must=must_search_conditions,
            must_not=must_not_search_conditions,
            should=should_search_conditions,
        ),
        with_vectors=True,
        with_payload=True,
        limit=limit,
    )

    documents = []
    for point in response:
        payload = cast(Payload, point.payload)
        try:
            vector = cast(List[float], point.vector)
        except ValueError as e:
            raise ValueError("Could not cast vector to List[float].") from e
        document = Document(
            id_=payload.get("doc_id"),
            text=payload.get("text"),
            metadata=payload.get("metadata"),
            embedding=vector,
        )
        documents.append(document)

    return documents