Protein Data Bank 条目主要引用摘要读取器。
源代码位于 llama-index-integrations/readers/llama-index-readers-pdb/llama_index/readers/pdb/base.py
load_data
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | class PdbAbstractReader(BaseReader):
"""Protein Data Bank entries' primary citation abstract reader."""
def __init__(self) -> None:
super().__init__()
def load_data(self, pdb_ids: List[str]) -> List[Document]:
"""
Load data from RCSB or EBI REST API.
Args:
pdb_ids (List[str]): List of PDB ids \
for which primary citation abstract are to be read.
"""
results = []
for pdb_id in pdb_ids:
title, abstracts = get_pdb_abstract(pdb_id)
primary_citation = abstracts[title]
abstract = primary_citation["abstract"]
abstract_text = "\n".join(
["\n".join([str(k), str(v)]) for k, v in abstract.items()]
)
results.append(
Document(
text=abstract_text,
extra_info={"pdb_id": pdb_id, "primary_citation": primary_citation},
)
)
return results
|
从 RCSB 或 EBI REST API 加载数据。
load_data(pdb_ids: List[str]) -> List[Document]
参数
名称
类型 |
描述 |
默认值 |
pdb_ids |
List[str]
|
要读取主要引用摘要的 PDB ID 列表。
|
|
返回顶部
|
load_data
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | def load_data(self, pdb_ids: List[str]) -> List[Document]:
"""
Load data from RCSB or EBI REST API.
Args:
pdb_ids (List[str]): List of PDB ids \
for which primary citation abstract are to be read.
"""
results = []
for pdb_id in pdb_ids:
title, abstracts = get_pdb_abstract(pdb_id)
primary_citation = abstracts[title]
abstract = primary_citation["abstract"]
abstract_text = "\n".join(
["\n".join([str(k), str(v)]) for k, v in abstract.items()]
)
results.append(
Document(
text=abstract_text,
extra_info={"pdb_id": pdb_id, "primary_citation": primary_citation},
)
)
return results
|