LLM 重排序器演示 (了不起的盖茨比)¶
本教程展示了如何进行两阶段检索。首先使用高 top-k 值的基于嵌入的检索来最大化召回率,获得大量候选条目。然后,使用基于 LLM 的检索来动态选择与查询实际相关的节点。
在 [ ]
已复制!
%pip install llama-index-llms-openai
%pip install llama-index-llms-openai
在 [ ]
已复制!
import nest_asyncio
nest_asyncio.apply()
import nest_asyncio nest_asyncio.apply()
在 [ ]
已复制!
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.postprocessor import LLMRerank
from llama_index.llms.openai import OpenAI
from IPython.display import Markdown, display
import logging import sys logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.core.postprocessor import LLMRerank from llama_index.llms.openai import OpenAI from IPython.display import Markdown, display
加载数据,构建索引¶
在 [ ]
已复制!
from llama_index.core import Settings
# LLM (gpt-3.5-turbo)
Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
Settings.chunk_size = 512
from llama_index.core import Settings # LLM (gpt-3.5-turbo) Settings.llm = OpenAI(temperature=0, model="gpt-3.5-turbo") Settings.chunk_size = 512
在 [ ]
已复制!
# load documents
documents = SimpleDirectoryReader("../../../examples/gatsby/data").load_data()
# load documents documents = SimpleDirectoryReader("../../../examples/gatsby/data").load_data()
在 [ ]
已复制!
documents
documents
在 [ ]
已复制!
index = VectorStoreIndex.from_documents(
documents,
)
index = VectorStoreIndex.from_documents( documents, )
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens > [build_index_from_nodes] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 49266 tokens > [build_index_from_nodes] Total embedding token usage: 49266 tokens
检索¶
在 [ ]
已复制!
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
import pandas as pd
from IPython.display import display, HTML
pd.set_option("display.max_colwidth", -1)
def get_retrieved_nodes(
query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
query_bundle = QueryBundle(query_str)
# configure retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=vector_top_k,
)
retrieved_nodes = retriever.retrieve(query_bundle)
if with_reranker:
# configure reranker
reranker = LLMRerank(
choice_batch_size=5,
top_n=reranker_top_n,
)
retrieved_nodes = reranker.postprocess_nodes(
retrieved_nodes, query_bundle
)
return retrieved_nodes
def pretty_print(df):
return display(HTML(df.to_html().replace("\\n", "<br>")))
def visualize_retrieved_nodes(nodes) -> None:
result_dicts = []
for node in nodes:
result_dict = {"Score": node.score, "Text": node.node.get_text()}
result_dicts.append(result_dict)
pretty_print(pd.DataFrame(result_dicts))
from llama_index.core.retrievers import VectorIndexRetriever from llama_index.core import QueryBundle import pandas as pd from IPython.display import display, HTML pd.set_option("display.max_colwidth", -1) def get_retrieved_nodes( query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False ): query_bundle = QueryBundle(query_str) # configure retriever retriever = VectorIndexRetriever( index=index, similarity_top_k=vector_top_k, ) retrieved_nodes = retriever.retrieve(query_bundle) if with_reranker: # configure reranker reranker = LLMRerank( choice_batch_size=5, top_n=reranker_top_n, ) retrieved_nodes = reranker.postprocess_nodes( retrieved_nodes, query_bundle ) return retrieved_nodes def pretty_print(df): return display(HTML(df.to_html().replace("\\n", "
"))) def visualize_retrieved_nodes(nodes) -> None: result_dicts = [] for node in nodes: result_dict = {"Score": node.score, "Text": node.node.get_text()} result_dicts.append(result_dict) pretty_print(pd.DataFrame(result_dicts))
"))) def visualize_retrieved_nodes(nodes) -> None: result_dicts = [] for node in nodes: result_dict = {"Score": node.score, "Text": node.node.get_text()} result_dicts.append(result_dict) pretty_print(pd.DataFrame(result_dicts))
/var/folders/1r/c3h91d9s49xblwfvz79s78_c0000gn/T/ipykernel_44297/3519340226.py:7: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width. pd.set_option('display.max_colwidth', -1)
在 [ ]
已复制!
new_nodes = get_retrieved_nodes(
"Who was driving the car that hit Myrtle?",
vector_top_k=3,
with_reranker=False,
)
new_nodes = get_retrieved_nodes( "Who was driving the car that hit Myrtle?", vector_top_k=3, with_reranker=False, )
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens > [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 10 tokens > [retrieve] Total embedding token usage: 10 tokens
在 [ ]
已复制!
visualize_retrieved_nodes(new_nodes)
visualize_retrieved_nodes(new_nodes)
得分 | 文本 | |
---|---|---|
0 | 0.828844 | 和一个多嘴的男人一遍又一遍地讲着 发生的事情,直到对他自己来说都变得越来越不真实,他 无法再讲下去,米特尔·威尔逊悲剧性的成就被 遗忘了。现在我想回头稍微讲讲前一天晚上我们离开那里之后, 车库里发生的事情。 他们很难找到她的姐姐凯瑟琳。她那天晚上一定打破了她的禁酒令,因为当她到达时, 她醉醺醺的,无法理解救护车已经去了 弗拉兴。当他们让她相信这一点后,她 立刻晕倒了,仿佛那是这件事中不可忍受的部分。 有人,无论是好心还是好奇,用车载着她 跟着她姐姐的遗体。 直到午夜很久以后,不断变化的人群聚集在车库的 前面,而乔治·威尔逊在沙发上摇晃着自己。 有段时间办公室的门开着, 每个走进车库的人都会忍不住瞥一眼。 最后有人说真可惜,就关上了门。迈克利斯 和几个其他男人和他在一起;先是四五个男人,后来 是两三个男人。再后来,迈克利斯不得不请最后一个陌生人 在那里再等十五分钟,他好回自己的 地方煮一壶咖啡。之后,他一直一个人在那里和 威尔逊待到黎明。 大约三点钟,威尔逊语无伦次的咕哝 改变了——他安静下来,开始谈论那辆黄色汽车。他 宣布他有办法查出那辆黄色汽车属于谁, 然后他突然脱口而出说几个月前他 妻子从城里回来时脸上淤青,鼻子 肿着。 但他听到自己这么说时,畏缩了一下,又开始 用呻吟的声音哭喊着“哦,我的上帝!”。迈克利斯笨拙地 她冲进了暮色,挥舞着双手, |
1 | 0.827754 | 喊叫着——在他从门口挪开之前,事情已经结束了。 “死亡之车”,正如报纸所称,没有停下来;它从 聚拢的黑暗中出现,悲剧性地摇晃了一下,然后 消失在下一个弯道。马夫罗·迈克利斯甚至不确定 它的颜色——他告诉第一个警察是浅绿色。另 一辆车,那辆开往纽约的车,在一百码 外停了下来,它的司机匆忙跑回米特尔·威尔逊那里,她 的生命被暴力熄灭,跪在路上,将她浓黑的 血液与尘土混在一起。 迈克利斯和这个人最先赶到,但当他们撕开 她仍因汗水湿透的衬衫时,他们看到她的左 乳房像一块布一样松散地悬挂着,没有必要 听心跳。嘴巴张得很大,嘴角撕裂了一点, 仿佛她在放弃长期以来积攒的 巨大生命力时有点窒息。 我们离得很远就看到了三四辆汽车和人群。 ------------------------------------------------------------------------ “撞车了!”汤姆说。“太好了。威尔逊终于有点生意了。” 他放慢了速度,但仍然没有停下来的打算,直到 我们走近时,车库门口人们肃静、专注的 面孔让他下意识地踩了刹车。 “我们看看,”他疑虑地说,“就看一眼。” 我现在才注意到从车库里不断传来的 空洞、哀嚎的声音,这声音在我们下车 走近门口时,变成了断断续续 重复的“哦,我的上帝!”。 接着说,“把车留在了 我的车库里。我不认为有人看到我们,但当然我不能 确定。” |
2 | 0.826390 | 此时我非常讨厌他,觉得没必要 告诉他错了。 “那个女人是谁?”他问。 “她叫威尔逊。她丈夫是车库老板。这究竟是怎么 发生的?” “好吧,我试图转动方向盘——”他突然停下,我 突然猜到了真相。 “是黛西开的车吗?” “是的,”他过了一会儿说,“但当然我会说是我开的。你看, 我们离开纽约时她非常紧张,她觉得开车能让她 镇定下来——而这个女人在我们经过一辆 对面开来的车时冲了出来。一切都发生在一分钟内,但 我感觉她想跟我们说话,以为我们是 她认识的人。好吧,黛西先是避开那个女人转向 另一辆车,然后她就失去了勇气又转了回来。我手 碰到方向盘时,我感到了震动——那一定立即要了她的命。” “把她撞得开了花——” “别告诉我,老兄。”他畏缩了一下。“反正——黛西踩了油门。我 想让她停下,但她停不下来,所以我拉了手刹。 然后她倒在我膝盖上,我就继续开了。” “她明天就会好的,”他一会儿说。“我只是在这儿等,看看他会不会因为 下午那件不愉快的事去烦她。她把自己锁在房间里了,如果他敢 有任何粗暴行为,她就会把灯关上再打开。” “他不会碰 new_nodes = get_retrieved_nodes( "Who was driving the car that hit Myrtle?", vector_top_k=10, reranker_top_n=3, with_reranker=True, ) new_nodes = get_retrieved_nodes( "What did Gatsby want Daisy to do in front of Tom?", vector_top_k=3, with_reranker=False, ) new_nodes = get_retrieved_nodes( "What did Gatsby want Daisy to do in front of Tom?", vector_top_k=10, reranker_top_n=3, with_reranker=True, ) 查询引擎¶ query_engine = index.as_query_engine( similarity_top_k=10, node_postprocessors=[ LLMRerank( choice_batch_size=5, top_n=2, ) ], response_mode="tree_summarize", ) response = query_engine.query( "What did the author do during his time at Y Combinator?", ) |
在 [ ]
已复制!
new_nodes = get_retrieved_nodes(
"Who was driving the car that hit Myrtle?",
vector_top_k=10,
reranker_top_n=3,
with_reranker=True,
)
回到顶部
在 [ ]
已复制!
visualize_retrieved_nodes(new_nodes)
visualize_retrieved_nodes(new_nodes)
得分 | 文本 | |
---|---|---|
0 | 10.0 | 此时我非常讨厌他,觉得没必要 告诉他错了。 “那个女人是谁?”他问。 “她叫威尔逊。她丈夫是车库老板。这究竟是怎么 发生的?” “好吧,我试图转动方向盘——”他突然停下,我 突然猜到了真相。 “是黛西开的车吗?” “是的,”他过了一会儿说,“但当然我会说是我开的。你看, 我们离开纽约时她非常紧张,她觉得开车能让她 镇定下来——而这个女人在我们经过一辆 对面开来的车时冲了出来。一切都发生在一分钟内,但 我感觉她想跟我们说话,以为我们是 她认识的人。好吧,黛西先是避开那个女人转向 另一辆车,然后她就失去了勇气又转了回来。我手 碰到方向盘时,我感到了震动——那一定立即要了她的命。” “把她撞得开了花——” “别告诉我,老兄。”他畏缩了一下。“反正——黛西踩了油门。我 想让她停下,但她停不下来,所以我拉了手刹。 然后她倒在我膝盖上,我就继续开了。” “她明天就会好的,”他一会儿说。“我只是在这儿等,看看他会不会因为 下午那件不愉快的事去烦她。她把自己锁在房间里了,如果他敢 有任何粗暴行为,她就会把灯关上再打开。” “他不会碰 new_nodes = get_retrieved_nodes( "Who was driving the car that hit Myrtle?", vector_top_k=10, reranker_top_n=3, with_reranker=True, ) new_nodes = get_retrieved_nodes( "What did Gatsby want Daisy to do in front of Tom?", vector_top_k=3, with_reranker=False, ) new_nodes = get_retrieved_nodes( "What did Gatsby want Daisy to do in front of Tom?", vector_top_k=10, reranker_top_n=3, with_reranker=True, ) 查询引擎¶ query_engine = index.as_query_engine( similarity_top_k=10, node_postprocessors=[ LLMRerank( choice_batch_size=5, top_n=2, ) ], response_mode="tree_summarize", ) response = query_engine.query( "What did the author do during his time at Y Combinator?", ) |
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens > [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 14 tokens > [retrieve] Total embedding token usage: 14 tokens
在 [ ]
已复制!
visualize_retrieved_nodes(new_nodes)
visualize_retrieved_nodes(new_nodes)
****Score****: 0.8647796939111776 ****Node text**** : got to make your house into a pigsty in order to have any friends—in the modern world.” Angry as I was, as we all were, I was tempted to laugh whenever he opened his mouth. The transition from libertine to prig was so complete. “I’ve got something to tell you, old sport—” began Gatsby. But Daisy guessed at his intention. “Please don’t!” she interrupted helplessly. “Please let’s all go home. Why don’t we all go home?” “That’s a good idea,” I got up. “Come on, Tom. Nobody wants a drink.” “I want to know what Mr. Gatsby has to tell me.” “Your wife doesn’t love you,” said Gatsby. “She’s never loved you. She loves me.” “You must be crazy!” exclaimed Tom automatically. Gatsby sprang to his feet, vivid with excitement. “She never loved you, do you hear?” he cried. “She only married you because I was poor and she was tired of waiting for me. It was a terrible mistake, but in her heart she never loved anyone except me!” At this point Jordan and I tried to go, but Tom and Gatsby insisted with competitive firmness that we remain—as though neither of them had anything to conceal and it would be a privilege to partake vicariously of their emotions. “Sit down, Daisy,” Tom’s voice groped unsuccessfully for the paternal note. “What’s been going on? I want to hear all about it.” “I told you what’s been going on,” said Gatsby. “Going on for five years—and you didn’t know.” Tom turned to Daisy ****Score****: 0.8609230717744326 ****Node text**** : to keep your shoes dry?” There was a husky tenderness in his tone … “Daisy?” “Please don’t.” Her voice was cold, but the rancour was gone from it. She looked at Gatsby. “There, Jay,” she said—but her hand as she tried to light a cigarette was trembling. Suddenly she threw the cigarette and the burning match on the carpet. “Oh, you want too much!” she cried to Gatsby. “I love you now—isn’t that enough? I can’t help what’s past.” She began to sob helplessly. “I did love him once—but I loved you too.” Gatsby’s eyes opened and closed. “You loved me too?” he repeated. “Even that’s a lie,” said Tom savagely. “She didn’t know you were alive. Why—there’s things between Daisy and me that you’ll never know, things that neither of us can ever forget.” The words seemed to bite physically into Gatsby. “I want to speak to Daisy alone,” he insisted. “She’s all excited now—” “Even alone I can’t say I never loved Tom,” she admitted in a pitiful voice. “It wouldn’t be true.” “Of course it wouldn’t,” agreed Tom. She turned to her husband. “As if it mattered to you,” she said. “Of course it matters. I’m going to take better care of you from now on.” “You don’t understand,” said Gatsby, with a touch of panic. “You’re not going to take care of her any more.” “I’m not?” Tom opened his eyes wide and ****Score****: 0.8555028907426916 ****Node text**** : shadowed well with awnings, was dark and cool. Daisy and Jordan lay upon an enormous couch, like silver idols weighing down their own white dresses against the singing breeze of the fans. “We can’t move,” they said together. Jordan’s fingers, powdered white over their tan, rested for a moment in mine. “And Mr. Thomas Buchanan, the athlete?” I inquired. Simultaneously I heard his voice, gruff, muffled, husky, at the hall telephone. Gatsby stood in the centre of the crimson carpet and gazed around with fascinated eyes. Daisy watched him and laughed, her sweet, exciting laugh; a tiny gust of powder rose from her bosom into the air. “The rumour is,” whispered Jordan, “that that’s Tom’s girl on the telephone.” We were silent. The voice in the hall rose high with annoyance: “Very well, then, I won’t sell you the car at all … I’m under no obligations to you at all … and as for your bothering me about it at lunch time, I won’t stand that at all!” “Holding down the receiver,” said Daisy cynically. “No, he’s not,” I assured her. “It’s a bona-fide deal. I happen to know about it.” Tom flung open the door, blocked out its space for a moment with his thick body, and hurried into the room. “Mr. Gatsby!” He put out his broad, flat hand with well-concealed dislike. “I’m glad to see you, sir … Nick …” “Make us a cold drink,” cried Daisy. As he left the room again she got up and went over to Gatsby and pulled his face
在 [ ]
已复制!
new_nodes = get_retrieved_nodes(
"What did Gatsby want Daisy to do in front of Tom?",
vector_top_k=10,
reranker_top_n=3,
with_reranker=True,
)
new_nodes = get_retrieved_nodes( "What did Gatsby want Daisy to do in front of Tom?", vector_top_k=10, reranker_top_n=3, with_reranker=True, )
INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens > [retrieve] Total LLM token usage: 0 tokens INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 14 tokens > [retrieve] Total embedding token usage: 14 tokens Doc: 2, Relevance: 10 No relevant documents found. Please provide a different question.
在 [ ]
已复制!
visualize_retrieved_nodes(new_nodes)
visualize_retrieved_nodes(new_nodes)
****Score****: 10.0 ****Node text**** : to keep your shoes dry?” There was a husky tenderness in his tone … “Daisy?” “Please don’t.” Her voice was cold, but the rancour was gone from it. She looked at Gatsby. “There, Jay,” she said—but her hand as she tried to light a cigarette was trembling. Suddenly she threw the cigarette and the burning match on the carpet. “Oh, you want too much!” she cried to Gatsby. “I love you now—isn’t that enough? I can’t help what’s past.” She began to sob helplessly. “I did love him once—but I loved you too.” Gatsby’s eyes opened and closed. “You loved me too?” he repeated. “Even that’s a lie,” said Tom savagely. “She didn’t know you were alive. Why—there’s things between Daisy and me that you’ll never know, things that neither of us can ever forget.” The words seemed to bite physically into Gatsby. “I want to speak to Daisy alone,” he insisted. “She’s all excited now—” “Even alone I can’t say I never loved Tom,” she admitted in a pitiful voice. “It wouldn’t be true.” “Of course it wouldn’t,” agreed Tom. She turned to her husband. “As if it mattered to you,” she said. “Of course it matters. I’m going to take better care of you from now on.” “You don’t understand,” said Gatsby, with a touch of panic. “You’re not going to take care of her any more.” “I’m not?” Tom opened his eyes wide and
查询引擎¶
在 [ ]
已复制!
query_engine = index.as_query_engine(
similarity_top_k=10,
node_postprocessors=[
LLMRerank(
choice_batch_size=5,
top_n=2,
)
],
response_mode="tree_summarize",
)
response = query_engine.query(
"What did the author do during his time at Y Combinator?",
)
query_engine = index.as_query_engine( similarity_top_k=10, node_postprocessors=[ LLMRerank( choice_batch_size=5, top_n=2, ) ], response_mode="tree_summarize", ) response = query_engine.query( "作者在 Y Combinator 期间做了什么?", )