跳到主要内容

Exa

ExaToolSpec #

基类: BaseToolSpec

Exa 工具规范。

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
class ExaToolSpec(BaseToolSpec):
    """Exa tool spec."""

    spec_functions = [
        "search",
        "retrieve_documents",
        "search_and_retrieve_documents",
        "search_and_retrieve_highlights",
        "find_similar",
        "current_date",
    ]

    def __init__(
        self,
        api_key: str,
        verbose: bool = True,
        max_characters: int = 2000,
    ) -> None:
        """Initialize with parameters."""
        from exa_py import Exa

        self.client = Exa(api_key=api_key, user_agent="llama-index")
        self._verbose = verbose
        # max characters for the text field in the search_and_contents function
        self._max_characters = max_characters

    def search(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List:
        """
        Exa allows you to use a natural language query to search the internet.

        Args:
            query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
            num_results (Optional[int]): Number of results to return. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def retrieve_documents(self, ids: List[str]) -> List[Document]:
        """
        Retrieve a list of document texts returned by `exa_search`, using the ID field.

        Args:
            ids (List(str)): the ids of the documents to retrieve

        """
        response = self.client.get_contents(ids)
        return [Document(text=result.text) for result in response.results]

    def find_similar(
        self,
        url: str,
        num_results: Optional[int] = 3,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
    ) -> List:
        """
        Retrieve a list of similar documents to a given url.

        Args:
            url (str): The web page to find similar results of
            num_results (Optional[int]): Number of results to return. Default 3.
            start_published_date (Optional[str]): A date string like "2020-06-15"
            end_published_date (Optional[str]): End date string

        """
        response = self.client.find_similar(
            url,
            num_results=num_results,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
        )
        return [
            {"title": result.title, "url": result.url, "id": result.id}
            for result in response.results
        ]

    def search_and_retrieve_documents(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List[Document]:
        """
        Combines the functionality of `search` and `retrieve_documents`.

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            text={"max_characters": self._max_characters},
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.text) for document in response.results]

    def search_and_retrieve_highlights(
        self,
        query: str,
        num_results: Optional[int] = 10,
        include_domains: Optional[List[str]] = None,
        exclude_domains: Optional[List[str]] = None,
        start_published_date: Optional[str] = None,
        end_published_date: Optional[str] = None,
        use_autoprompt: bool = True,
        type: str = "magic",
    ) -> List[Document]:
        """
        Searches and retrieves highlights (intelligent snippets from the document).

        Args:
            query (str): the natural language query
            num_results (Optional[int]): Number of results. Defaults to 10.
            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
            exclude_domains (Optional[List(str)]): Top level domains to exclude.
            start_published_date (Optional[str]): A date string like "2020-06-15".
            end_published_date (Optional[str]): End date string

        """
        response = self.client.search_and_contents(
            query,
            num_results=num_results,
            include_domains=include_domains,
            exclude_domains=exclude_domains,
            start_published_date=start_published_date,
            end_published_date=end_published_date,
            use_autoprompt=use_autoprompt,
            highlights=True,
            type=type,
        )
        if self._verbose:
            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
        return [Document(text=document.highlights[0]) for document in response.results]

    def current_date(self):
        """
        A function to return todays date.

        Call this before any other functions that take timestamps as an argument
        """
        return datetime.date.today()

搜索 #

search(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List

Exa 允许您使用自然语言查询搜索互联网。

参数

名称 类型 描述 默认值
query str

一个自然语言查询,表述为链接提供的内容的答案,例如:“这是关于太空的最新消息:”

必需
num_results Optional[int]

返回结果的数量。默认为 10。

10
include_domains Optional[List(str)]

顶级域名列表,例如 ["wsj.com"],用于将搜索限制在特定站点。

exclude_domains Optional[List(str)]

要排除的顶级域名。

start_published_date Optional[str]

日期字符串,例如“2020-06-15”。从 current_date 获取日期

end_published_date Optional[str]

结束日期字符串

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def search(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List:
    """
    Exa allows you to use a natural language query to search the internet.

    Args:
        query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
        num_results (Optional[int]): Number of results to return. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specific sites.
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

检索文档 #

retrieve_documents(ids: List[str]) -> List[Document]

使用 ID 字段检索由 exa_search 返回的文档文本列表。

参数

名称 类型 描述 默认值
ids List(str

要检索的文档 ID

必需
源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
76
77
78
79
80
81
82
83
84
85
def retrieve_documents(self, ids: List[str]) -> List[Document]:
    """
    Retrieve a list of document texts returned by `exa_search`, using the ID field.

    Args:
        ids (List(str)): the ids of the documents to retrieve

    """
    response = self.client.get_contents(ids)
    return [Document(text=result.text) for result in response.results]

查找相似项 #

find_similar(url: str, num_results: Optional[int] = 3, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None) -> List

检索与给定 URL 相似的文档列表。

参数

名称 类型 描述 默认值
url str

要查找相似结果的网页

必需
num_results Optional[int]

返回结果的数量。默认为 3。

3
start_published_date Optional[str]

日期字符串,例如“2020-06-15”

end_published_date Optional[str]

结束日期字符串

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def find_similar(
    self,
    url: str,
    num_results: Optional[int] = 3,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
) -> List:
    """
    Retrieve a list of similar documents to a given url.

    Args:
        url (str): The web page to find similar results of
        num_results (Optional[int]): Number of results to return. Default 3.
        start_published_date (Optional[str]): A date string like "2020-06-15"
        end_published_date (Optional[str]): End date string

    """
    response = self.client.find_similar(
        url,
        num_results=num_results,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
    )
    return [
        {"title": result.title, "url": result.url, "id": result.id}
        for result in response.results
    ]

搜索并检索文档 #

search_and_retrieve_documents(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List[Document]

结合了 searchretrieve_documents 的功能。

参数

名称 类型 描述 默认值
query str

自然语言查询

必需
num_results Optional[int]

结果数量。默认为 10。

10
include_domains Optional[List(str)]

要搜索的顶级域名列表,例如 ["wsj.com"]

exclude_domains Optional[List(str)]

要排除的顶级域名。

start_published_date Optional[str]

日期字符串,例如“2020-06-15”。

end_published_date Optional[str]

结束日期字符串

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def search_and_retrieve_documents(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List[Document]:
    """
    Combines the functionality of `search` and `retrieve_documents`.

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        text={"max_characters": self._max_characters},
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.text) for document in response.results]

搜索并检索亮点 #

search_and_retrieve_highlights(query: str, num_results: Optional[int] = 10, include_domains: Optional[List[str]] = None, exclude_domains: Optional[List[str]] = None, start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: bool = True, type: str = 'magic') -> List[Document]

搜索并检索亮点 (文档中的智能摘要)。

参数

名称 类型 描述 默认值
query str

自然语言查询

必需
num_results Optional[int]

结果数量。默认为 10。

10
include_domains Optional[List(str)]

要搜索的顶级域名列表,例如 ["wsj.com"]

exclude_domains Optional[List(str)]

要排除的顶级域名。

start_published_date Optional[str]

日期字符串,例如“2020-06-15”。

end_published_date Optional[str]

结束日期字符串

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def search_and_retrieve_highlights(
    self,
    query: str,
    num_results: Optional[int] = 10,
    include_domains: Optional[List[str]] = None,
    exclude_domains: Optional[List[str]] = None,
    start_published_date: Optional[str] = None,
    end_published_date: Optional[str] = None,
    use_autoprompt: bool = True,
    type: str = "magic",
) -> List[Document]:
    """
    Searches and retrieves highlights (intelligent snippets from the document).

    Args:
        query (str): the natural language query
        num_results (Optional[int]): Number of results. Defaults to 10.
        include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
        exclude_domains (Optional[List(str)]): Top level domains to exclude.
        start_published_date (Optional[str]): A date string like "2020-06-15".
        end_published_date (Optional[str]): End date string

    """
    response = self.client.search_and_contents(
        query,
        num_results=num_results,
        include_domains=include_domains,
        exclude_domains=exclude_domains,
        start_published_date=start_published_date,
        end_published_date=end_published_date,
        use_autoprompt=use_autoprompt,
        highlights=True,
        type=type,
    )
    if self._verbose:
        print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
    return [Document(text=document.highlights[0]) for document in response.results]

当前日期 #

current_date()

一个返回今天日期的函数。

在调用任何其他接受时间戳作为参数的函数之前调用此函数

源代码位于 llama-index-integrations/tools/llama-index-tools-exa/llama_index/tools/exa/base.py
191
192
193
194
195
196
197
def current_date(self):
    """
    A function to return todays date.

    Call this before any other functions that take timestamps as an argument
    """
    return datetime.date.today()