跳到内容

Gitlab

GitLabIssuesReader #

基类: BaseReader

GitLab 问题读取器。

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/issues/base.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
class GitLabIssuesReader(BaseReader):
    """
    GitLab issues reader.
    """

    class IssueState(enum.Enum):
        """
        Issue type.

        Used to decide what issues to retrieve.

        Attributes:
            - OPEN: Issues that are open.
            - CLOSED: Issues that are closed.
            - ALL: All issues, open and closed.

        """

        OPEN = "opened"
        CLOSED = "closed"
        ALL = "all"

    class IssueType(enum.Enum):
        """
        Issue type.

        Used to decide what issues to retrieve.

        Attributes:
            - ISSUE: Issues.
            - INCIDENT: Incident.
            - TEST_CASE: Test case.
            - TASK: Task.

        """

        ISSUE = "issue"
        INCIDENT = "incident"
        TEST_CASE = "test_case"
        TASK = "task"

    class Scope(enum.Enum):
        """
        Scope.

        Used to determine the scope of the issue.

        Attributes:
            - CREATED_BY_ME: Issues created by the authenticated user.
            - ASSIGNED_TO_ME: Issues assigned to the authenticated user.
            - ALL: All issues.

        """

        CREATED_BY_ME = "created_by_me"
        ASSIGNED_TO_ME = "assigned_to_me"
        ALL = "all"

    def __init__(
        self,
        gitlab_client: gitlab.Gitlab,
        project_id: Optional[int] = None,
        group_id: Optional[int] = None,
        verbose: bool = False,
    ):
        super().__init__()

        self._gl = gitlab_client
        self._project_id = project_id
        self._group_id = group_id
        self._verbose = verbose

    def _build_document_from_issue(self, issue: GitLabIssue) -> Document:
        issue_dict = issue.asdict()
        title = issue_dict["title"]
        description = issue_dict["description"]
        document = Document(
            doc_id=str(issue_dict["iid"]),
            text=f"{title}\n{description}",
        )
        extra_info = {
            "state": issue_dict["state"],
            "labels": issue_dict["labels"],
            "created_at": issue_dict["created_at"],
            "closed_at": issue_dict["closed_at"],
            "url": issue_dict["_links"]["self"],  # API URL
            "source": issue_dict["web_url"],  # HTML URL, more convenient for humans
        }
        if issue_dict["assignee"]:
            extra_info["assignee"] = issue_dict["assignee"]["username"]
        if issue_dict["author"]:
            extra_info["author"] = issue_dict["author"]["username"]
        document.extra_info = extra_info
        return document

    def _get_project_issues(self, **kwargs):
        project = self._gl.projects.get(self._project_id)
        return project.issues.list(**kwargs)

    def _get_group_issues(self, **kwargs):
        group = self._gl.groups.get(self._group_id)
        return group.issues.list(**kwargs)

    def _to_gitlab_datetime_format(self, dt: Optional[datetime]) -> str:
        return dt.strftime("%Y-%m-%dT%H:%M:%S") if dt else None

    def load_data(
        self,
        assignee: Optional[Union[str, int]] = None,
        author: Optional[Union[str, int]] = None,
        confidential: Optional[bool] = None,
        created_after: Optional[datetime] = None,
        created_before: Optional[datetime] = None,
        iids: Optional[List[int]] = None,
        issue_type: Optional[IssueType] = None,
        labels: Optional[List[str]] = None,
        milestone: Optional[str] = None,
        non_archived: Optional[bool] = None,
        scope: Optional[Scope] = None,
        search: Optional[str] = None,
        state: Optional[IssueState] = IssueState.OPEN,
        updated_after: Optional[datetime] = None,
        updated_before: Optional[datetime] = None,
        **kwargs: Any,
    ) -> List[Document]:
        """
        Load group or project issues and converts them to documents. Please refer to the GitLab API documentation for the full list of parameters.

        Each issue is converted to a document by doing the following:

            - The doc_id of the document is the issue number.
            - The text of the document is the concatenation of the title and the description of the issue.
            - The extra_info of the document is a dictionary with the following keys:
                - state: State of the issue.
                - labels: List of labels of the issue.
                - created_at: Date when the issue was created.
                - closed_at: Date when the issue was closed. Only present if the issue is closed.
                - url: URL of the issue.
                - source: URL of the issue. More convenient for humans.
                - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

        Args:
            - assignee: Username or ID of the user assigned to the issue.
            - author: Username or ID of the user that created the issue.
            - confidential: Filter confidential issues.
            - created_after: Filter issues created after the specified date.
            - created_before: Filter issues created before the specified date.
            - iids: Return only the issues having the given iid.
            - issue_type: Filter issues by type.
            - labels: List of label names, issues must have all labels to be returned.
            - milestone: The milestone title.
            - non_archived: Return issues from non archived projects.
            - scope: Return issues for the given scope.
            - search: Search issues against their title and description.
            - state: State of the issues to retrieve.
            - updated_after: Filter issues updated after the specified date.
            - updated_before: Filter issues updated before the specified date.


        Returns:
            List[Document]: List of documents.

        """
        to_gitlab_datetime_format = self._to_gitlab_datetime_format
        params = {
            "confidential": confidential,
            "created_after": to_gitlab_datetime_format(created_after),
            "created_before": to_gitlab_datetime_format(created_before),
            "iids": iids,
            "issue_type": issue_type.value if issue_type else None,
            "labels": labels,
            "milestone": milestone,
            "non_archived": non_archived,
            "scope": scope.value if scope else None,
            "search": search,
            "state": state.value if state else None,
            "updated_after": to_gitlab_datetime_format(updated_after),
            "updated_before": to_gitlab_datetime_format(updated_before),
        }

        if isinstance(assignee, str):
            params["assignee_username"] = assignee
        elif isinstance(assignee, int):
            params["assignee_id"] = assignee

        if isinstance(author, str):
            params["author_username"] = author
        elif isinstance(author, int):
            params["author_id"] = author

        filtered_params = {k: v for k, v in params.items() if v is not None}

        filtered_params.update(kwargs)

        issues = []

        if self._project_id:
            issues = self._get_project_issues(**filtered_params)
        if self._group_id:
            issues = self._get_group_issues(**filtered_params)

        return [self._build_document_from_issue(issue) for issue in issues]

IssueState #

基类: Enum

问题状态。

用于决定要检索的问题。

属性

名称 类型 描述
- OPEN

开放的问题。

- CLOSED

已关闭的问题。

- ALL

所有问题,包括开放和已关闭的。

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/issues/base.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class IssueState(enum.Enum):
    """
    Issue type.

    Used to decide what issues to retrieve.

    Attributes:
        - OPEN: Issues that are open.
        - CLOSED: Issues that are closed.
        - ALL: All issues, open and closed.

    """

    OPEN = "opened"
    CLOSED = "closed"
    ALL = "all"

IssueType #

基类: Enum

问题状态。

用于决定要检索的问题。

属性

名称 类型 描述
- ISSUE

问题。

- INCIDENT

事件。

- TEST_CASE

测试用例。

- TASK

任务。

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/issues/base.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class IssueType(enum.Enum):
    """
    Issue type.

    Used to decide what issues to retrieve.

    Attributes:
        - ISSUE: Issues.
        - INCIDENT: Incident.
        - TEST_CASE: Test case.
        - TASK: Task.

    """

    ISSUE = "issue"
    INCIDENT = "incident"
    TEST_CASE = "test_case"
    TASK = "task"

Scope #

基类: Enum

范围。

用于确定问题的范围。

属性

名称 类型 描述
- CREATED_BY_ME

由经过身份验证的用户创建的问题。

- ASSIGNED_TO_ME

指派给经过身份验证的用户的问题。

- ALL

所有问题。

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/issues/base.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class Scope(enum.Enum):
    """
    Scope.

    Used to determine the scope of the issue.

    Attributes:
        - CREATED_BY_ME: Issues created by the authenticated user.
        - ASSIGNED_TO_ME: Issues assigned to the authenticated user.
        - ALL: All issues.

    """

    CREATED_BY_ME = "created_by_me"
    ASSIGNED_TO_ME = "assigned_to_me"
    ALL = "all"

load_data #

load_data(assignee: Optional[Union[str, int]] = None, author: Optional[Union[str, int]] = None, confidential: Optional[bool] = None, created_after: Optional[datetime] = None, created_before: Optional[datetime] = None, iids: Optional[List[int]] = None, issue_type: Optional[IssueType] = None, labels: Optional[List[str]] = None, milestone: Optional[str] = None, non_archived: Optional[bool] = None, scope: Optional[Scope] = None, search: Optional[str] = None, state: Optional[IssueState] = OPEN, updated_after: Optional[datetime] = None, updated_before: Optional[datetime] = None, **kwargs: Any) -> List[Document]

加载群组或项目问题并将其转换为文档。有关参数完整列表,请参阅 GitLab API 文档。

通过以下方式将每个问题转换为文档

- The doc_id of the document is the issue number.
- The text of the document is the concatenation of the title and the description of the issue.
- The extra_info of the document is a dictionary with the following keys:
    - state: State of the issue.
    - labels: List of labels of the issue.
    - created_at: Date when the issue was created.
    - closed_at: Date when the issue was closed. Only present if the issue is closed.
    - url: URL of the issue.
    - source: URL of the issue. More convenient for humans.
    - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

参数

名称 类型 描述 默认值
- assignee

指派给该用户的用户名或 ID。

必需
- author

创建该问题的用户的用户名或 ID。

必需
- confidential

过滤机密问题。

必需
- created_after

过滤指定日期之后创建的问题。

必需
- created_before

过滤指定日期之前创建的问题。

必需
- iids

仅返回具有给定 iid 的问题。

必需
- issue_type

按类型过滤问题。

必需
- labels

标签名称列表,问题必须包含所有标签才能返回。

必需
- milestone

里程碑标题。

必需
- non_archived

返回非存档项目中的问题。

必需
- scope

返回指定范围的问题。

必需
- search

按标题和描述搜索问题。

必需
- state

要检索的问题状态。

必需
- updated_after

过滤指定日期之后更新的问题。

必需
- updated_before

过滤指定日期之前更新的问题。

必需

返回

类型 描述
List[Document]

List[Document]: 文档列表。

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/issues/base.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def load_data(
    self,
    assignee: Optional[Union[str, int]] = None,
    author: Optional[Union[str, int]] = None,
    confidential: Optional[bool] = None,
    created_after: Optional[datetime] = None,
    created_before: Optional[datetime] = None,
    iids: Optional[List[int]] = None,
    issue_type: Optional[IssueType] = None,
    labels: Optional[List[str]] = None,
    milestone: Optional[str] = None,
    non_archived: Optional[bool] = None,
    scope: Optional[Scope] = None,
    search: Optional[str] = None,
    state: Optional[IssueState] = IssueState.OPEN,
    updated_after: Optional[datetime] = None,
    updated_before: Optional[datetime] = None,
    **kwargs: Any,
) -> List[Document]:
    """
    Load group or project issues and converts them to documents. Please refer to the GitLab API documentation for the full list of parameters.

    Each issue is converted to a document by doing the following:

        - The doc_id of the document is the issue number.
        - The text of the document is the concatenation of the title and the description of the issue.
        - The extra_info of the document is a dictionary with the following keys:
            - state: State of the issue.
            - labels: List of labels of the issue.
            - created_at: Date when the issue was created.
            - closed_at: Date when the issue was closed. Only present if the issue is closed.
            - url: URL of the issue.
            - source: URL of the issue. More convenient for humans.
            - assignee: username of the user assigned to the issue. Only present if the issue is assigned.

    Args:
        - assignee: Username or ID of the user assigned to the issue.
        - author: Username or ID of the user that created the issue.
        - confidential: Filter confidential issues.
        - created_after: Filter issues created after the specified date.
        - created_before: Filter issues created before the specified date.
        - iids: Return only the issues having the given iid.
        - issue_type: Filter issues by type.
        - labels: List of label names, issues must have all labels to be returned.
        - milestone: The milestone title.
        - non_archived: Return issues from non archived projects.
        - scope: Return issues for the given scope.
        - search: Search issues against their title and description.
        - state: State of the issues to retrieve.
        - updated_after: Filter issues updated after the specified date.
        - updated_before: Filter issues updated before the specified date.


    Returns:
        List[Document]: List of documents.

    """
    to_gitlab_datetime_format = self._to_gitlab_datetime_format
    params = {
        "confidential": confidential,
        "created_after": to_gitlab_datetime_format(created_after),
        "created_before": to_gitlab_datetime_format(created_before),
        "iids": iids,
        "issue_type": issue_type.value if issue_type else None,
        "labels": labels,
        "milestone": milestone,
        "non_archived": non_archived,
        "scope": scope.value if scope else None,
        "search": search,
        "state": state.value if state else None,
        "updated_after": to_gitlab_datetime_format(updated_after),
        "updated_before": to_gitlab_datetime_format(updated_before),
    }

    if isinstance(assignee, str):
        params["assignee_username"] = assignee
    elif isinstance(assignee, int):
        params["assignee_id"] = assignee

    if isinstance(author, str):
        params["author_username"] = author
    elif isinstance(author, int):
        params["author_id"] = author

    filtered_params = {k: v for k, v in params.items() if v is not None}

    filtered_params.update(kwargs)

    issues = []

    if self._project_id:
        issues = self._get_project_issues(**filtered_params)
    if self._group_id:
        issues = self._get_group_issues(**filtered_params)

    return [self._build_document_from_issue(issue) for issue in issues]

GitLabRepositoryReader #

基类: BaseReader

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/repository/base.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class GitLabRepositoryReader(BaseReader):
    def __init__(
        self,
        gitlab_client: gitlab.Gitlab,
        project_id: int,
        use_parser: bool = False,
        verbose: bool = False,
    ):
        super().__init__()

        self._gl = gitlab_client
        self._use_parser = use_parser
        self._verbose = verbose
        self._project_url = f"{gitlab_client.api_url}/projects/{project_id}"

        self._project = gitlab_client.projects.get(project_id)

    def _parse_file_content(self, file_properties: dict, file_content: str) -> Document:
        raise NotImplementedError

    def _load_single_file(self, file_path: str, ref: Optional[str] = None) -> Document:
        file = self._project.files.get(file_path=file_path, ref=ref)
        file_properties = file.asdict()
        file_content = file.decode()

        if self._use_parser:
            return self._parse_file_content(file_properties, file_content)

        return Document(
            doc_id=file_properties["blob_id"],
            text=file_content,
            extra_info={
                "file_path": file_properties["file_path"],
                "file_name": file_properties["file_name"],
                "size": file_properties["size"],
                "url": f"{self._project_url}/projects/repository/files/{file_properties['file_path']}/raw",
            },
        )

    def load_data(
        self,
        ref: str,
        file_path: Optional[str] = None,
        path: Optional[str] = None,
        recursive: bool = False,
    ) -> List[Document]:
        """
        Load data from a GitLab repository.

        Args:
            ref: The name of a repository branch or commit id
            file_path: Path to the file to load.
            path: Path to the directory to load.
            recursive: Whether to load files recursively.

        Returns:
            List[Document]: List of documents loaded from the repository

        """
        if file_path:
            return [self._load_single_file(file_path, ref)]

        project = self._project

        params = {
            "ref": ref,
            "path": path,
            "recursive": recursive,
        }

        filtered_params = {k: v for k, v in params.items() if v is not None}

        repo_items = project.repository_tree(**filtered_params)

        documents = []

        for item in repo_items:
            if item["type"] == "blob":
                documents.append(self._load_single_file(item["path"], ref))

        return documents

load_data #

load_data(ref: str, file_path: Optional[str] = None, path: Optional[str] = None, recursive: bool = False) -> List[Document]

从 GitLab 仓库加载数据。

参数

名称 类型 描述 默认值
ref str

仓库分支的名称或提交 ID

必需
file_path Optional[str]

要加载的文件路径。

path Optional[str]

要加载的目录路径。

recursive bool

是否递归加载文件。

False

返回

类型 描述
List[Document]

List[Document]: 从仓库加载的文档列表

源代码位于 llama-index-integrations/readers/llama-index-readers-gitlab/llama_index/readers/gitlab/repository/base.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def load_data(
    self,
    ref: str,
    file_path: Optional[str] = None,
    path: Optional[str] = None,
    recursive: bool = False,
) -> List[Document]:
    """
    Load data from a GitLab repository.

    Args:
        ref: The name of a repository branch or commit id
        file_path: Path to the file to load.
        path: Path to the directory to load.
        recursive: Whether to load files recursively.

    Returns:
        List[Document]: List of documents loaded from the repository

    """
    if file_path:
        return [self._load_single_file(file_path, ref)]

    project = self._project

    params = {
        "ref": ref,
        "path": path,
        "recursive": recursive,
    }

    filtered_params = {k: v for k, v in params.items() if v is not None}

    repo_items = project.repository_tree(**filtered_params)

    documents = []

    for item in repo_items:
        if item["type"] == "blob":
            documents.append(self._load_single_file(item["path"], ref))

    return documents