DjangoPeng commited on
Commit
3851ff8
·
1 Parent(s): e5c7047

add GitHubSentinel v0.8.2

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +24 -0
  2. LICENSE +201 -0
  3. README-EN.md +318 -0
  4. README.md +307 -1
  5. build_image.sh +16 -0
  6. config.json +29 -0
  7. daemon_control.sh +75 -0
  8. daily_progress/langchain-ai_langchain/.ipynb_checkpoints/2024-08-18-checkpoint.md +36 -0
  9. docs/ollama.md +195 -0
  10. docs/unit_test.md +169 -0
  11. images/build_docker_image.jpg +0 -0
  12. images/gradio_demo.png +0 -0
  13. images/gradio_v0.8_github.png +0 -0
  14. images/gradio_v0.8_hn.png +0 -0
  15. logs/DaemonProcess.log +0 -0
  16. logs/app.2024-09-20_10-00-00_575591.log +0 -0
  17. logs/app.log +0 -0
  18. prompts/github_ollama_prompt.txt +23 -0
  19. prompts/github_openai_prompt.txt +22 -0
  20. prompts/hacker_news_daily_report_ollama_prompt.txt +37 -0
  21. prompts/hacker_news_daily_report_openai_prompt.txt +35 -0
  22. prompts/hacker_news_hours_topic_ollama_prompt.txt +24 -0
  23. prompts/hacker_news_hours_topic_openai_prompt.txt +24 -0
  24. requirements.txt +6 -0
  25. src/__pycache__/cli.cpython-311.pyc +0 -0
  26. src/__pycache__/command_handler.cpython-310.pyc +0 -0
  27. src/__pycache__/config.cpython-310.pyc +0 -0
  28. src/__pycache__/config.cpython-311.pyc +0 -0
  29. src/__pycache__/config.cpython-312.pyc +0 -0
  30. src/__pycache__/email_notifier.cpython-311.pyc +0 -0
  31. src/__pycache__/github_client.cpython-310.pyc +0 -0
  32. src/__pycache__/github_client.cpython-311.pyc +0 -0
  33. src/__pycache__/github_client.cpython-312.pyc +0 -0
  34. src/__pycache__/hacker_news_client.cpython-310.pyc +0 -0
  35. src/__pycache__/llm.cpython-310.pyc +0 -0
  36. src/__pycache__/logger.cpython-310.pyc +0 -0
  37. src/__pycache__/logger.cpython-312.pyc +0 -0
  38. src/__pycache__/notifier.cpython-310.pyc +0 -0
  39. src/__pycache__/notifier.cpython-311.pyc +0 -0
  40. src/__pycache__/report_generator.cpython-310.pyc +0 -0
  41. src/__pycache__/report_generator.cpython-311.pyc +0 -0
  42. src/__pycache__/scheduler.cpython-310.pyc +0 -0
  43. src/__pycache__/scheduler.cpython-311.pyc +0 -0
  44. src/__pycache__/scheduler.cpython-312.pyc +0 -0
  45. src/__pycache__/subscription_manager.cpython-310.pyc +0 -0
  46. src/__pycache__/subscription_manager.cpython-311.pyc +0 -0
  47. src/command_handler.py +85 -0
  48. src/command_tool.py +38 -0
  49. src/config.py +34 -0
  50. src/daemon_process.py +94 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+
3
+ # 使用官方的 Python 基础镜像
4
+ FROM python:3.10-slim
5
+
6
+ # 设置工作目录
7
+ WORKDIR /app
8
+
9
+ # 复制 requirements.txt 并安装依赖
10
+ COPY requirements.txt .
11
+
12
+ # 安装 Python 依赖
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # 复制项目的所有文件到容器
16
+ COPY . .
17
+
18
+ # 复制并执行 validate_tests.sh 脚本
19
+ COPY validate_tests.sh .
20
+ RUN chmod +x validate_tests.sh
21
+ RUN ./validate_tests.sh
22
+
23
+ # 设置容器入口
24
+ CMD ["python", "src/daemon_process.py"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README-EN.md ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: GitHubSentinel
3
+ emoji: 👀
4
+ colorFrom: gray
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 4.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # GitHub Sentinel
14
+
15
+ ![GitHub stars](https://img.shields.io/github/stars/DjangoPeng/GitHubSentinel?style=social)
16
+ ![GitHub forks](https://img.shields.io/github/forks/DjangoPeng/GitHubSentinel?style=social)
17
+ ![GitHub watchers](https://img.shields.io/github/watchers/DjangoPeng/GitHubSentinel?style=social)
18
+ ![GitHub repo size](https://img.shields.io/github/repo-size/DjangoPeng/GitHubSentinel)
19
+ ![GitHub language count](https://img.shields.io/github/languages/count/DjangoPeng/GitHubSentinel)
20
+ ![GitHub top language](https://img.shields.io/github/languages/top/DjangoPeng/GitHubSentinel)
21
+ ![GitHub last commit](https://img.shields.io/github/last-commit/DjangoPeng/GitHubSentinel?color=red)
22
+
23
+ <p align="center">
24
+ <br> <a href="README.md">中文</a> | English
25
+ </p>
26
+
27
+ ## Table of Contents
28
+
29
+ - [GitHub Sentinel](#github-sentinel)
30
+ - [Key Features](#key-features)
31
+ - [Product Screenshot](#product-screenshot)
32
+ - [Quick Start](#quick-start)
33
+ - [1. Install Dependencies](#1-install-dependencies)
34
+ - [2. Configure the Application](#2-configure-the-application)
35
+ - [3. How to Run](#3-how-to-run)
36
+ - [A. Run as a Command Line Tool](#a-run-as-a-command-line-tool)
37
+ - [B. Run as a Background Service](#b-run-as-a-background-service)
38
+ - [C. Run as a Gradio Server](#c-run-as-a-gradio-server)
39
+ - [Ollama Installation and Service Deployment](#ollama-installation-and-service-deployment)
40
+ - [Unit Testing](#unit-testing)
41
+ - [Unit Testing and Validation Script `validate_tests.sh`](#unit-testing-and-validation-script-validate_testssh)
42
+ - [Purpose](#purpose)
43
+ - [Functionality](#functionality)
44
+ - [Building and Validating with Docker](#building-and-validating-with-docker)
45
+ - [1. `Dockerfile`](#1-dockerfile)
46
+ - [Purpose](#purpose)
47
+ - [Key Steps](#key-steps)
48
+ - [2. `build_image.sh`](#2-build_imagesh)
49
+ - [Purpose](#purpose)
50
+ - [Functionality](#functionality)
51
+ - [Contributing](#contributing)
52
+ - [License](#license)
53
+ - [Contact](#contact)
54
+
55
+
56
+ GitHub Sentinel is an `AI Agent` designed for the era of large language models (LLMs), specializing in intelligent information retrieval and high-value content extraction. It is tailored for users with high-frequency and large-scale information needs, such as open-source enthusiasts, individual developers, and investors.
57
+
58
+ ### Key Features
59
+
60
+ - **Subscription Management**: Easily manage and track your subscribed GitHub repositories.
61
+ - **Update Retrieval**: Automatically fetch and aggregate the latest updates from your subscribed repositories, including commits, issues, and pull requests.
62
+ - **Notification System**: Receive real-time notifications via email about the latest project progress.
63
+ - **Report Generation**: Generate detailed project progress reports based on the retrieved updates, supporting various formats and templates to meet different needs.
64
+ - **Multi-Model Support**: Leverage OpenAI and Ollama models to generate natural language project reports, offering more intelligent and accurate information services.
65
+ - **Scheduled Tasks**: Supports scheduled tasks running as a daemon process, ensuring timely retrieval of updates.
66
+ - **Graphical Interface**: Provides a simple and user-friendly GUI based on Gradio, making it accessible to a wider audience.
67
+ - **Containerization**: The project supports Docker build and containerized deployment, allowing quick and easy setup in different environments.
68
+ - **Continuous Integration**: Comprehensive unit tests are implemented, facilitating the configuration of production-level CI/CD pipelines to ensure project stability and high-quality delivery.
69
+
70
+ GitHub Sentinel not only helps users automatically track and analyze the progress of `GitHub open-source projects` but also extends its capabilities to other information sources, such as trending topics on `Hacker News`, providing a more comprehensive approach to information extraction and analysis.
71
+
72
+ ### Product Screenshot
73
+
74
+ **GitHub Project Tracking and Summary**
75
+
76
+ ![gradio_v0.8_github](images/gradio_v0.8_github.png)
77
+
78
+ **Hacker News Trending Tech Topic Mining**
79
+
80
+ ![gradio_v0.8_hn](images/gradio_v0.8_hn.png)
81
+
82
+ ## Quick Start
83
+
84
+ ### 1. Install Dependencies
85
+
86
+ First, install the required dependencies:
87
+
88
+ ```sh
89
+ pip install -r requirements.txt
90
+ ```
91
+
92
+ ### 2. Configure the Application
93
+
94
+ Edit the `config.json` file to set up your GitHub Token, Email settings (using Tencent WeCom Email as an example), subscription file, update settings, large model service configurations (supporting OpenAI GPT API and Ollama private large model service) and report types autogenerated by LLMs:
95
+
96
+ ```json
97
+ {
98
+ "github": {
99
+ "token": "your_github_token",
100
+ "subscriptions_file": "subscriptions.json",
101
+ "progress_frequency_days": 1,
102
+ "progress_execution_time": "08:00"
103
+ },
104
+ "email": {
105
+ "smtp_server": "smtp.exmail.qq.com",
106
+ "smtp_port": 465,
107
+ "from": "from_email@example.com",
108
+ "password": "your_email_password",
109
+ "to": "to_email@example.com"
110
+ },
111
+ "llm": {
112
+ "model_type": "ollama",
113
+ "openai_model_name": "gpt-4o-mini",
114
+ "ollama_model_name": "llama3",
115
+ "ollama_api_url": "http://localhost:11434/api/chat"
116
+ },
117
+ "report_types": [
118
+ "github",
119
+ "hacker_news_hours_topic",
120
+ "hacker_news_daily_report"
121
+ ],
122
+ "slack": {
123
+ "webhook_url": "your_slack_webhook_url"
124
+ }
125
+ }
126
+ ```
127
+
128
+ **For security reasons:** The GitHub Token and Email Password settings support using environment variables to avoid configuring sensitive information in plain text, as shown below:
129
+
130
+ ```shell
131
+ # Github
132
+ export GITHUB_TOKEN="github_pat_xxx"
133
+ # Email
134
+ export EMAIL_PASSWORD="password"
135
+ ```
136
+
137
+ ### 3. How to Run
138
+
139
+ GitHub Sentinel supports the following three running modes:
140
+
141
+ #### A. Run as a Command Line Tool
142
+
143
+ You can run the application interactively from the command line:
144
+
145
+ ```sh
146
+ python src/command_tool.py
147
+ ```
148
+
149
+ In this mode, you can manually input commands to manage subscriptions, retrieve updates, and generate reports.
150
+
151
+ #### B. Run as a Background Service
152
+
153
+ To run the application as a background service (daemon process), it will automatically update periodically according to the relevant configuration.
154
+
155
+ You can directly use the daemon management script [daemon_control.sh](daemon_control.sh) to start, check the status, stop, and restart:
156
+
157
+ 1. Start the service:
158
+
159
+ ```sh
160
+ $ ./daemon_control.sh start
161
+ Starting DaemonProcess...
162
+ DaemonProcess started.
163
+ ```
164
+
165
+ - This will start [./src/daemon_process.py], which will periodically generate reports and send emails according to the update frequency and time point set in `config.json`.
166
+ - The service log will be saved to the `logs/DaemonProcess.log` file. At the same time, historical cumulative logs will also be appended to the `logs/app.log` log file.
167
+
168
+ 2. Check the service status:
169
+
170
+ ```sh
171
+ $ ./daemon_control.sh status
172
+ DaemonProcess is running.
173
+ ```
174
+
175
+ 3. Stop the service:
176
+
177
+ ```sh
178
+ $ ./daemon_control.sh stop
179
+ Stopping DaemonProcess...
180
+ DaemonProcess stopped.
181
+ ```
182
+
183
+ 4. Restart the service:
184
+
185
+ ```sh
186
+ $ ./daemon_control.sh restart
187
+ Stopping DaemonProcess...
188
+ DaemonProcess stopped.
189
+ Starting DaemonProcess...
190
+ DaemonProcess started.
191
+ ```
192
+
193
+ #### C. Run as a Gradio Server
194
+
195
+ To run the application with a Gradio interface, allowing users to interact with the tool via a web interface:
196
+
197
+ ```sh
198
+ python src/gradio_server.py
199
+ ```
200
+
201
+ ![gradio_v0.8_github](images/gradio_v0.8_github.png)
202
+
203
+ ![gradio_v0.8_hn](images/gradio_v0.8_hn.png)
204
+
205
+
206
+ - This will start a web server on your machine, allowing you to manage subscriptions and generate reports through a user-friendly interface.
207
+ - By default, the Gradio server will be accessible at `http://localhost:7860`, but it can be shared publicly if needed.
208
+
209
+ ## Ollama Installation and Service Deployment
210
+
211
+ Ollama is a private large model management tool that supports local and containerized deployment, command-line interaction, and REST API calls.
212
+
213
+ For detailed instructions on Ollama installation and private large model service deployment, please refer to [Ollama Installation and Service Deployment](docs/ollama.md).
214
+
215
+ ### Ollama Brief Official Installation
216
+
217
+ To use Ollama for calling private large model services in GitHub Sentinel, follow these steps for installation and configuration:
218
+
219
+ 1. **Install Ollama**:
220
+ Download and install the Ollama service according to the official Ollama documentation. Ollama supports multiple operating systems, including Linux, Windows, and macOS.
221
+
222
+ 2. **Start the Ollama Service**:
223
+ After installation, start the Ollama service with the following command:
224
+
225
+ ```bash
226
+ ollama serve
227
+ ```
228
+
229
+ By default, the Ollama API will run on `http://localhost:11434`.
230
+
231
+ 3. **Configure Ollama for Use in GitHub Sentinel**:
232
+ In the `config.json` file, configure the relevant information for the Ollama API:
233
+
234
+ ```json
235
+ {
236
+ "llm": {
237
+ "model_type": "ollama",
238
+ "ollama_model_name": "llama3",
239
+ "ollama_api_url": "http://localhost:11434/api/chat"
240
+ }
241
+ }
242
+ ```
243
+
244
+ 4. **Validate the Configuration**:
245
+ Start GitHub Sentinel and generate a report with the following command to verify that the Ollama configuration is correct:
246
+
247
+ ```bash
248
+ python src/command_tool.py
249
+ ```
250
+
251
+ If the configuration is correct, you will be able to generate reports using the Ollama model.
252
+
253
+ ## Unit Testing
254
+
255
+ To ensure the quality and reliability of the code, GitHub Sentinel uses the `unittest` module for unit testing. For detailed explanations of `unittest` and related tools (such as `@patch` and `MagicMock`), please refer to [Detailed Unit Test Explanation](docs/unit_test.md).
256
+
257
+ ### Unit Testing and Validation Script `validate_tests.sh`
258
+
259
+ #### Purpose
260
+ `validate_tests.sh` is a shell script used to run unit tests and validate the results. It is executed during the Docker image build process to ensure the correctness and stability of the code.
261
+
262
+ #### Functionality
263
+ - The script runs all unit tests and outputs the results to the `test_results.txt` file.
264
+ - If the tests fail, the script outputs the test results and causes the Docker build to fail.
265
+ - If all tests pass, the script continues the build process.
266
+
267
+ ## Building and Validating with Docker
268
+
269
+ To facilitate building and deploying the GitHub Sentinel project in various environments, we provide Docker support. This support
270
+
271
+ includes the following files and functionalities:
272
+
273
+ ### 1. `Dockerfile`
274
+
275
+ #### Purpose
276
+ The `Dockerfile` is a configuration file used to define how to build a Docker image. It describes the steps to build the image, including installing dependencies, copying project files, running unit tests, etc.
277
+
278
+ #### Key Steps
279
+ - Use `python:3.10-slim` as the base image and set the working directory to `/app`.
280
+ - Copy the project's `requirements.txt` file and install Python dependencies.
281
+ - Copy all project files to the container and grant execution permission to the `validate_tests.sh` script.
282
+ - During the build process, execute the `validate_tests.sh` script to ensure that all unit tests pass. If the tests fail, the build process will be aborted.
283
+ - After a successful build, the container will default to running `src/main.py` as the entry point.
284
+
285
+ ### 2. `build_image.sh`
286
+
287
+ #### Purpose
288
+ `build_image.sh` is a shell script used to automatically build a Docker image. It retrieves the branch name from the current Git branch and uses it as the tag for the Docker image, facilitating the generation of different Docker images on different branches.
289
+
290
+ #### Functionality
291
+ - Retrieve the current Git branch name and use it as the tag for the Docker image.
292
+ - Use the `docker build` command to build the Docker image and tag it with the current Git branch name.
293
+
294
+ #### Usage Example
295
+ ```bash
296
+ chmod +x build_image.sh
297
+ ./build_image.sh
298
+ ```
299
+
300
+ ![build_docker_image](images/build_docker_image.jpg)
301
+
302
+ With these scripts and configuration files, you can ensure that Docker images built in different development branches are based on code that has passed unit tests, thereby improving code quality and deployment reliability.
303
+
304
+ ## Contributing
305
+
306
+ Contributions make the open-source community a wonderful place to learn, inspire, and create. Any contributions you make are **greatly appreciated**. If you have any suggestions or feature requests, please start an issue to discuss what you would like to change.
307
+
308
+ <a href='https://github.com/repo-reviews/repo-reviews.github.io/blob/main/create.md' target="_blank"><img alt='Github' src='https://img.shields.io/badge/review_me-100000?style=flat&logo=Github&logoColor=white&labelColor=888888&color=555555'/></a>
309
+
310
+ ## License
311
+
312
+ This project is licensed under the terms of the Apache-2.0 License. See the [LICENSE](LICENSE) file for details.
313
+
314
+ ## Contact
315
+
316
+ Django Peng - pjt73651@email.com
317
+
318
+ Project Link: https://github.com/DjangoPeng/GitHubSentinel
README.md CHANGED
@@ -10,4 +10,310 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: apache-2.0
11
  ---
12
 
13
+ # GitHub Sentinel
14
+
15
+ ![GitHub stars](https://img.shields.io/github/stars/DjangoPeng/GitHubSentinel?style=social)
16
+ ![GitHub forks](https://img.shields.io/github/forks/DjangoPeng/GitHubSentinel?style=social)
17
+ ![GitHub watchers](https://img.shields.io/github/watchers/DjangoPeng/GitHubSentinel?style=social)
18
+ ![GitHub repo size](https://img.shields.io/github/repo-size/DjangoPeng/GitHubSentinel)
19
+ ![GitHub language count](https://img.shields.io/github/languages/count/DjangoPeng/GitHubSentinel)
20
+ ![GitHub top language](https://img.shields.io/github/languages/top/DjangoPeng/GitHubSentinel)
21
+ ![GitHub last commit](https://img.shields.io/github/last-commit/DjangoPeng/GitHubSentinel?color=red)
22
+
23
+ <p align="center">
24
+ <br> <a href="README-EN.md">English</a> | 中文
25
+ </p>
26
+
27
+ ## 目录
28
+
29
+ - [GitHub Sentinel](#github-sentinel)
30
+ - [主要功能](#主要功能)
31
+ - [产品截图](#产品截图)
32
+ - [快速开始](#快速开始)
33
+ - [1. 安装依赖](#1-安装依赖)
34
+ - [2. 配置应用](#2-配置应用)
35
+ - [3. 如何运行](#3-如何运行)
36
+ - [A. 作为命令行工具运行](#a-作为命令行工具运行)
37
+ - [B. 作为后台服务运行](#b-作为后台服务运行)
38
+ - [C. 作为 Gradio 服务器运行](#c-作为-gradio-服务器运行)
39
+ - [Ollama 安装与服务发布](#Ollama-安装与服务发布)
40
+ - [单元测试](#单元测试)
41
+ - [单元测试和验证脚本 `validate_tests.sh`](#单元测试和验证脚本-validate_testssh)
42
+ - [用途](#用途)
43
+ - [功能](#功能)
44
+ - [使用 Docker 构建与验证](#使用-docker-构建与验证)
45
+ - [1. `Dockerfile`](#1-dockerfile)
46
+ - [用途](#用途)
47
+ - [关键步骤](#关键步骤)
48
+ - [2. `build_image.sh`](#2-build_imagesh)
49
+ - [用途](#用途)
50
+ - [功能](#功能)
51
+ - [贡献](#贡献)
52
+ - [许可证](#许可证)
53
+ - [联系](#联系)
54
+
55
+
56
+
57
+ GitHub Sentinel 是专为大模型(LLMs)时代打造的智能信息检索和高价值内容挖掘 `AI Agent`。它面向那些需要高频次、大量信息获取的用户,特别是开源爱好者、个人开发者和投资人等。
58
+
59
+
60
+ ### 主要功能
61
+
62
+ - **订阅管理**:轻松管理和跟踪您关注的 GitHub 仓库。
63
+ - **更新检索**:自动检索并汇总订阅仓库的最新动态,包括提交记录、问题和拉取请求。
64
+ - **通知系统**:通过电子邮件等方式,实时通知订阅者项目的最新进展。
65
+ - **报告生成**:基于检索到的更新生成详细的项目进展报告,支持多种格式和模板,满足不同需求。
66
+ - **多模型支持**:结合 OpenAI 和 Ollama 模型,生成自然语言项目报告,提供更智能、精准的信息服务。
67
+ - **定时任务**:支持以守护进程方式执行定时任务,确保信息更新及时获取。
68
+ - **图形化界面**:基于 Gradio 实现了简单易用的 GUI 操作模式,降低使用门槛。
69
+ - **容器化**:项目支持 Docker 构建和容器化部署,便于在不同环境中快速部署和运行。
70
+ - **持续集成**:实现了完备的单元测试,便于进一步配置生产级 CI/CD 流程,确保项目的稳定性和高质量交付。
71
+
72
+ GitHub Sentinel 不仅能帮助用户自动跟踪和分析 `GitHub 开源项目` 的最新动态,还能快速扩展到其他信息渠道,如 `Hacker News` 的热门话题,提供更全面的信息挖掘与分析能力。
73
+
74
+ ### 产品截图
75
+
76
+ **GitHub 项目进度跟踪与总结**
77
+
78
+ ![gradio_v0.8_github](images/gradio_v0.8_github.png)
79
+
80
+ **Hacker News 热门技术话题挖掘**
81
+ ![gradio_v0.8_hn](images/gradio_v0.8_hn.png)
82
+
83
+
84
+ ## 快速开始
85
+
86
+ ### 1. 安装依赖
87
+
88
+ 首先,安装所需的依赖项:
89
+
90
+ ```sh
91
+ pip install -r requirements.txt
92
+ ```
93
+
94
+ ### 2. 配置应用
95
+
96
+ 编辑 `config.json` 文件,以设置您的 GitHub Token、Email 设置(以腾讯企微邮箱为例)、订阅文件、更新设置,大模型服务配置(支持 OpenAI GPT API 和 Ollama 私有化大模型服务),以及自动检索和生成的报告类型(GitHub项目进展, Hacker News 热门话题和前沿技术趋势):
97
+
98
+ ```json
99
+ {
100
+ "github": {
101
+ "token": "your_github_token",
102
+ "subscriptions_file": "subscriptions.json",
103
+ "progress_frequency_days": 1,
104
+ "progress_execution_time": "08:00"
105
+ },
106
+ "email": {
107
+ "smtp_server": "smtp.exmail.qq.com",
108
+ "smtp_port": 465,
109
+ "from": "from_email@example.com",
110
+ "password": "your_email_password",
111
+ "to": "to_email@example.com"
112
+ },
113
+ "llm": {
114
+ "model_type": "ollama",
115
+ "openai_model_name": "gpt-4o-mini",
116
+ "ollama_model_name": "llama3",
117
+ "ollama_api_url": "http://localhost:11434/api/chat"
118
+ },
119
+ "report_types": [
120
+ "github",
121
+ "hacker_news_hours_topic",
122
+ "hacker_news_daily_report"
123
+ ],
124
+ "slack": {
125
+ "webhook_url": "your_slack_webhook_url"
126
+ }
127
+ }
128
+ ```
129
+
130
+ **出于安全考虑:** GitHub Token 和 Email Password 的设置均支持使用环境变量进行配置,以避免明文配置重要信息,如下所示:
131
+
132
+ ```shell
133
+ # Github
134
+ export GITHUB_TOKEN="github_pat_xxx"
135
+ # Email
136
+ export EMAIL_PASSWORD="password"
137
+ ```
138
+
139
+
140
+ ### 3. 如何运行
141
+
142
+ GitHub Sentinel 支持以下三种运行方式:
143
+
144
+ #### A. 作为命令行工具运行
145
+
146
+ 您可以从命令行交互式地运行该应用:
147
+
148
+ ```sh
149
+ python src/command_tool.py
150
+ ```
151
+
152
+ 在此模式下,您可以手动输入命令来管理订阅、检索更新和生成报告。
153
+
154
+ #### B. 作为后台服务运行
155
+
156
+ 要将该应用作为后台服务(守护进程)运行,它将根据相关配置定期自动更新。
157
+
158
+ 您可以直接使用守护进程管理脚本 [daemon_control.sh](daemon_control.sh) 来启动、查询状态、关闭和重启:
159
+
160
+ 1. 启动服务:
161
+
162
+ ```sh
163
+ $ ./daemon_control.sh start
164
+ Starting DaemonProcess...
165
+ DaemonProcess started.
166
+ ```
167
+
168
+ - 这将启动[./src/daemon_process.py],按照 `config.json` 中设置的更新频率和时间点定期生成报告,并发送邮件。
169
+ - 本次服务日志将保存到 `logs/DaemonProcess.log` 文件中。同时,历史累计日志也将同步追加到 `logs/app.log` 日志文件中。
170
+
171
+ 2. 查询服务状态:
172
+
173
+ ```sh
174
+ $ ./daemon_control.sh status
175
+ DaemonProcess is running.
176
+ ```
177
+
178
+ 3. 关闭服务:
179
+
180
+ ```sh
181
+ $ ./daemon_control.sh stop
182
+ Stopping DaemonProcess...
183
+ DaemonProcess stopped.
184
+ ```
185
+
186
+ 4. 重启服务:
187
+
188
+ ```sh
189
+ $ ./daemon_control.sh restart
190
+ Stopping DaemonProcess...
191
+ DaemonProcess stopped.
192
+ Starting DaemonProcess...
193
+ DaemonProcess started.
194
+ ```
195
+
196
+ #### C. 作为 Gradio 服务器运行
197
+
198
+ 要使用 Gradio 界面运行应用,允许用户通过 Web 界面与该工具交互:
199
+
200
+ ```sh
201
+ python src/gradio_server.py
202
+ ```
203
+
204
+
205
+ - 这将在您的机器上启动一个 Web 服务器,允许您通过用户友好的界面管理订阅和生成报告。
206
+ - 默认情况下,Gradio 服务器将可在 `http://localhost:7860` 访问,但如果需要,您可以公开共享它。
207
+
208
+
209
+ ## Ollama 安装与服务发布
210
+
211
+ Ollama 是一个私有化大模型管理工具,支持本地和容器化部署,命令行交互和 REST API 调用。
212
+
213
+ 关于 Ollama 安装部署与私有化大模型服务发布的详细说明,请参考[Ollama 安装部署与服务发布](docs/ollama.md)。
214
+
215
+ ### Ollama 简要官方安装
216
+
217
+ 要在 GitHub Sentinel 中使用 Ollama 调用私有化大模型服务,请按照以下步骤进行安装和配置:
218
+
219
+ 1. **安装 Ollama**:
220
+ 请根据 Ollama 的官方文档下载并安装 Ollama 服务。Ollama 支持多种操作系统,包括 Linux、Windows 和 macOS。
221
+
222
+ 2. **启动 Ollama 服务**:
223
+ 安装完成后,通过以下命令启动 Ollama 服务:
224
+
225
+ ```bash
226
+ ollama serve
227
+ ```
228
+
229
+ 默认情况下,Ollama API 将在 `http://localhost:11434` 运行。
230
+
231
+ 3. **配置 Ollama 在 GitHub Sentinel 中使用**:
232
+ 在 `config.json` 文件中,配置 Ollama API 的相关信息:
233
+
234
+ ```json
235
+ {
236
+ "llm": {
237
+ "model_type": "ollama",
238
+ "ollama_model_name": "llama3",
239
+ "ollama_api_url": "http://localhost:11434/api/chat"
240
+ }
241
+ }
242
+ ```
243
+
244
+ 4. **验证配置**:
245
+ 使用以下命令启动 GitHub Sentinel 并生成报告,以验证 Ollama 配置是否正确:
246
+
247
+ ```bash
248
+ python src/command_tool.py
249
+ ```
250
+
251
+ 如果配置正确,您将能够通过 Ollama 模型生成报告。
252
+
253
+
254
+
255
+ ## 单元测试
256
+
257
+ 为了确保代码的质量和可靠性,GitHub Sentinel 使用了 `unittest` 模块进行单元测试。关于 `unittest` 及其相关工具(如 `@patch` 和 `MagicMock`)的详细说明,请参考 [单元测试详细说明](docs/unit_test.md)。
258
+
259
+ ### 单元测试和验证脚本 `validate_tests.sh`
260
+
261
+ #### 用途
262
+ `validate_tests.sh` 是一个用于运行单元测试并验证结果的 Shell 脚本。它在 Docker 镜像构建过程中被执行,以确保代码的正确性和稳定性。
263
+
264
+ #### 功能
265
+ - 脚本运行所有单元测试,并将结果输出到 `test_results.txt` 文件中。
266
+ - 如果测试失败,脚本会输出测试结果并导致 Docker 构建失败。
267
+ - 如果所有测试通过,脚本会继续构建过程。
268
+
269
+
270
+ ## 使用 Docker 构建与验证
271
+
272
+ 为了便于在各种环境中构建和部署 GitHub Sentinel 项目,我们提供了 Docker 支持。该支持包括以下文件和功能:
273
+
274
+ ### 1. `Dockerfile`
275
+
276
+ #### 用途
277
+ `Dockerfile` 是用于定义如何构建 Docker 镜像的配置文件。它描述了镜像的构建步骤,包括安装依赖、复制项目文件、运行单元测试等。
278
+
279
+ #### 关键步骤
280
+ - 使用 `python:3.10-slim` 作为基础镜像,并设置工作目录为 `/app`。
281
+ - 复制项目的 `requirements.txt` 文件并安装 Python 依赖。
282
+ - 复制项目的所有文件到容器,并赋予 `validate_tests.sh` 脚本执行权限。
283
+ - 在构建过程中执行 `validate_tests.sh` 脚本,以确保所有单元测试通过。如果测试失败,��建过程将中止。
284
+ - 构建成功后,将默认运行 `src/main.py` 作为容器的入口点。
285
+
286
+ ### 2. `build_image.sh`
287
+
288
+ #### 用途
289
+ `build_image.sh` 是一个用于自动构建 Docker 镜像的 Shell 脚本。它从当前的 Git 分支获取分支名称,并将其用作 Docker 镜像的标签,便于在不同分支上生成不同的 Docker 镜像。
290
+
291
+ #### 功能
292
+ - 获取当前的 Git 分支名称,并将其用作 Docker 镜像的标签。
293
+ - 使用 `docker build` 命令构建 Docker 镜像,并使用当前 Git 分支名称作为标签。
294
+
295
+ #### 使用示例
296
+ ```bash
297
+ chmod +x build_image.sh
298
+ ./build_image.sh
299
+ ```
300
+
301
+ ![build_docker_image](images/build_docker_image.jpg)
302
+
303
+ 通过这些脚本和配置文件,确保在不同的开发分支中,构建的 Docker 镜像都是基于通过单元测试的代码,从而提高了代码质量和部署的可靠性。
304
+
305
+ ## 贡献
306
+
307
+ 贡献是使开源社区成为学习、激励和创造的惊人之处。非常感谢你所做的任何贡献。如果你有任何建议或功能请求,请先开启一个议题讨论你想要改变的内容。
308
+
309
+ <a href='https://github.com/repo-reviews/repo-reviews.github.io/blob/main/create.md' target="_blank"><img alt='Github' src='https://img.shields.io/badge/review_me-100000?style=flat&logo=Github&logoColor=white&labelColor=888888&color=555555'/></a>
310
+
311
+ ## 许可证
312
+
313
+ 该项目根据 Apache-2.0 许可证的条款进行许可。详情请参见 [LICENSE](LICENSE) 文件。
314
+
315
+ ## 联系
316
+
317
+ Django Peng - pjt73651@email.com
318
+
319
+ 项目链接: https://github.com/DjangoPeng/GitHubSentinel
build_image.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 获取当前的 Git 分支名称
4
+ BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD)
5
+
6
+ # 如果需要,可以处理分支名称,例如替换无效字符
7
+ BRANCH_NAME=${BRANCH_NAME//\//-}
8
+
9
+ # 使用 Git 分支名称作为 Docker 镜像的标签
10
+ IMAGE_TAG="github_sentinel:${BRANCH_NAME}"
11
+
12
+ # 构建 Docker 镜像
13
+ docker build -t $IMAGE_TAG .
14
+
15
+ # 输出构建结果
16
+ echo "Docker 镜像已构建并打上标签: $IMAGE_TAG"
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "github": {
3
+ "token": "your_github_token",
4
+ "subscriptions_file": "subscriptions.json",
5
+ "progress_frequency_days": 1,
6
+ "progress_execution_time": "08:00"
7
+ },
8
+ "email": {
9
+ "smtp_server": "smtp.exmail.qq.com",
10
+ "smtp_port": 465,
11
+ "from": "pjt@zaijidata.com",
12
+ "password": "",
13
+ "to": "test@zaijidata.com"
14
+ },
15
+ "llm": {
16
+ "model_type": "ollama",
17
+ "openai_model_name": "gpt-4o-mini",
18
+ "ollama_model_name": "llama3.1",
19
+ "ollama_api_url": "http://localhost:11434/api/chat"
20
+ },
21
+ "report_types": [
22
+ "github",
23
+ "hacker_news_hours_topic",
24
+ "hacker_news_daily_report"
25
+ ],
26
+ "slack": {
27
+ "webhook_url": "your_slack_webhook_url"
28
+ }
29
+ }
daemon_control.sh ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # 守护进程控制脚本
3
+
4
+ # 定义守护进程 Python 脚本的路径
5
+ DAEMON_PATH="./src/daemon_process.py"
6
+ # 定义守护进程的名称
7
+ DAEMON_NAME="DaemonProcess"
8
+ # 定义日志文件的路径
9
+ LOG_FILE="./logs/$DAEMON_NAME.log"
10
+ # 定义守护进程的 PID 文件路径,用于存储进程号
11
+ PID_FILE="./run/$DAEMON_NAME.pid"
12
+
13
+ # 启动守护进程的函数
14
+ start() {
15
+ echo "Starting $DAEMON_NAME..."
16
+ # 使用 nohup 命令在后台运行 Python 脚本,并将输出重定向到日志文件
17
+ nohup python3 $DAEMON_PATH > $LOG_FILE 2>&1 &
18
+ # 将守护进程的 PID 写入文件
19
+ echo $! > $PID_FILE
20
+ echo "$DAEMON_NAME started."
21
+ }
22
+
23
+ # 停止守护进程的函数
24
+ stop() {
25
+ if [ -f $PID_FILE ]; then
26
+ # 如果 PID 文件存在,读取 PID
27
+ PID=$(cat $PID_FILE)
28
+ echo "Stopping $DAEMON_NAME..."
29
+ # 使用 kill 命令停止进程
30
+ kill $PID
31
+ echo "$DAEMON_NAME stopped."
32
+ # 删除 PID 文件
33
+ rm $PID_FILE
34
+ else
35
+ echo "$DAEMON_NAME is not running."
36
+ fi
37
+ }
38
+
39
+ # 检查守护进程状态的函数
40
+ status() {
41
+ if [ -f $PID_FILE ]; then
42
+ PID=$(cat $PID_FILE)
43
+ # 检查进程是否在运行
44
+ if ps -p $PID > /dev/null
45
+ then
46
+ echo "$DAEMON_NAME is running."
47
+ else
48
+ echo "$DAEMON_NAME is not running."
49
+ fi
50
+ else
51
+ echo "$DAEMON_NAME is not running."
52
+ fi
53
+ }
54
+
55
+ # 根据输入参数选择执行哪个函数
56
+ case "$1" in
57
+ start)
58
+ start
59
+ ;;
60
+ stop)
61
+ stop
62
+ ;;
63
+ status)
64
+ status
65
+ ;;
66
+ restart)
67
+ # 重启守护进程
68
+ stop
69
+ start
70
+ ;;
71
+ *)
72
+ # 如果参数不符合预期,显示用法
73
+ echo "Usage: $0 {start|stop|status|restart}"
74
+ exit 1
75
+ esac
daily_progress/langchain-ai_langchain/.ipynb_checkpoints/2024-08-18-checkpoint.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Daily Progress for langchain-ai/langchain (2024-08-18)
2
+
3
+
4
+ ## Issues Closed Today
5
+
6
+ ## Pull Requests Merged Today
7
+ - docs: add prompt to install nltk #25519
8
+ - docs: add prompt to install beautifulsoup4. #25518
9
+ - add embeddings integration tests #25508
10
+ - docs: fix Databricks Vector Search demo notebook #25504
11
+ - openai[patch]: update core dep #25502
12
+ - core[patch]: Release 0.2.33 #25498
13
+ - openai[patch]: Release 0.1.22 #25496
14
+ - docs: use .invoke rather than __call__ in openai integration notebook #25494
15
+ - openai[patch]: fix json mode for Azure #25488
16
+ - langchain-box: add langchain box package and DocumentLoader #25465
17
+ - docs: fix mimetype parser docstring #25463
18
+ - langchain-box: Add DocumentLoader #25458
19
+ - core[minor], anthropic[patch]: Upgrade @root_validator usage to be consistent with pydantic 2 #25457
20
+ - voyageai[patch]: Upgrade root validators for pydantic 2 #25455
21
+ - ai21[patch]: Upgrade @root_validators for pydantic 2 migration #25454
22
+ - pinecone[patch]: Upgrade @root_validators to be consistent with pydantic 2 #25453
23
+ - docs: `arxiv` page update #25450
24
+ - docs: format oai embeddings docstring #25448
25
+ - docs: fix api ref mod links in pkg page #25447
26
+ - mistralai[patch]: Update more @root_validators for pydantic 2 compatibility #25446
27
+ - docs[patch]: Update code that checks API keys #25444
28
+ - fireworks[patch]: Upgrade @root_validators to be pydantic 2 compliant #25443
29
+ - docs[patch]: Fix integration notebook for Fireworks llm #25442
30
+ - experimental: Fix divide by 0 error #25439
31
+ - community[patch]: update connection string in azure cosmos integration test #25438
32
+ - convert _DocumentWithState to Document in ContextualCompressionRetriever, #25432
33
+ - [docs]: pdf loaders #25425
34
+ - docs[patch]: Fix a few typos in the chat integration docs for TogetherAI #25424
35
+ - together[patch]: Update @root_validator for pydantic 2 compatibility #25423
36
+ - langchain: bump core to 0.2.32 #25421
docs/ollama.md ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ollama 安装部署与服务发布
2
+
3
+ ### Linux
4
+
5
+ ```bash
6
+ curl -fsSL https://ollama.com/install.sh | sh
7
+ ```
8
+
9
+ [手动安装说明](https://github.com/ollama/ollama/blob/main/docs/linux.md)
10
+
11
+ ### macOS
12
+
13
+ [下载](https://ollama.com/download/Ollama-darwin.zip)
14
+
15
+ ### Windows 预览版
16
+
17
+ [下载](https://ollama.com/download/OllamaSetup.exe)
18
+
19
+ ---
20
+
21
+ ## 快速入门
22
+
23
+ 要运行并与 [Llama 3.1](https://ollama.com/library/llama3.1) 进行对话:
24
+
25
+ ```bash
26
+ ollama run llama3.1
27
+ ```
28
+
29
+ ---
30
+
31
+ ## 模型库
32
+
33
+ Ollama 支持在 [ollama.com/library](https://ollama.com/library) 上提供的一系列模型。
34
+
35
+ 以下是一些可以下载的示例模型:
36
+
37
+ | 模型 | 参数 | 大小 | 下载命令 |
38
+ | ------------------ | ----- | ----- | ------------------------------ |
39
+ | Llama 3.1 | 8B | 4.7GB | `ollama run llama3.1` |
40
+ | Llama 3.1 | 70B | 40GB | `ollama run llama3.1:70b` |
41
+ | Llama 3.1 | 405B | 231GB | `ollama run llama3.1:405b` |
42
+ | Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
43
+ | Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
44
+ | Gemma 2 | 2B | 1.6GB | `ollama run gemma2:2b` |
45
+ | Gemma 2 | 9B | 5.5GB | `ollama run gemma2` |
46
+ | Gemma 2 | 27B | 16GB | `ollama run gemma2:27b` |
47
+ | Mistral | 7B | 4.1GB | `ollama run mistral` |
48
+ | Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
49
+ | Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
50
+ | Starling | 7B | 4.1GB | `ollama run starling-lm` |
51
+ | Code Llama | 7B | 3.8GB | `ollama run codellama` |
52
+ | Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
53
+ | LLaVA | 7B | 4.5GB | `ollama run llava` |
54
+ | Solar | 10.7B | 6.1GB | `ollama run solar` |
55
+
56
+ ---
57
+
58
+ ### 命令行工具
59
+
60
+ #### 创建模型
61
+
62
+ `ollama create` 用于从 Modelfile 创建模型。
63
+
64
+ ```bash
65
+ ollama create mymodel -f ./Modelfile
66
+ ```
67
+
68
+ #### 拉取模型
69
+
70
+ ```bash
71
+ ollama pull llama3.1
72
+ ```
73
+
74
+ > 此命令还可用于更新本地模型。仅会拉取差异部分。
75
+
76
+ #### 删除模型
77
+
78
+ ```bash
79
+ ollama rm llama3.1
80
+ ```
81
+
82
+ #### 复制模型
83
+
84
+ ```bash
85
+ ollama cp llama3.1 my-model
86
+ ```
87
+
88
+ #### 多行输入
89
+
90
+ 对于多行输入,可以使用 `"""` 包裹文本:
91
+
92
+ ```bash
93
+ >>> """Hello,
94
+ ... world!
95
+ ... """
96
+ ```
97
+ 这将输出一个包含“Hello, world!”消息的简单程序。
98
+
99
+ #### 多模态模型
100
+
101
+ ```bash
102
+ ollama run llava "这张图片中有什么? /Users/jmorgan/Desktop/smile.png"
103
+ ```
104
+ 图像中显示的是一个黄色的笑脸,可能是图片的中心焦点。
105
+
106
+ #### 以参数传递提示
107
+
108
+ ```bash
109
+ $ ollama run llama3.1 "总结此文件: $(cat README.md)"
110
+ ```
111
+ Ollama 是一个轻量级、可扩展的框架,用于在本地计算机上构建和运行语言模型。
112
+
113
+ ---
114
+
115
+ ### REST API
116
+
117
+ Ollama 提供 REST API 来运行和管理模型。
118
+
119
+ #### 生成响应
120
+
121
+ ```bash
122
+ curl http://localhost:11434/api/generate -d '{
123
+ "model": "llama3.1",
124
+ "prompt":"为什么天空是蓝色的?"
125
+ }'
126
+ ```
127
+
128
+ #### 与模型对话
129
+
130
+ ```bash
131
+ curl http://localhost:11434/api/chat -d '{
132
+ "model": "llama3.1",
133
+ "messages": [
134
+ { "role": "user", "content": "为什么天空是蓝色的?" }
135
+ ]
136
+ }'
137
+ ```
138
+
139
+ 有关所有端点(Endpoint)的详细信息,请参阅 [API 文档](./docs/api.md)。
140
+
141
+ ---
142
+
143
+ ### Docker 支持
144
+
145
+ Ollama 官方提供了 Docker 镜像 `ollama/ollama`,可以在 Docker Hub 上找到。
146
+
147
+ #### 使用 CPU 运行
148
+
149
+ ```bash
150
+ docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
151
+ ```
152
+
153
+ #### 使用 Nvidia GPU 运行
154
+
155
+ 要使用 Nvidia GPU,首先需要安装 NVIDIA Container Toolkit:
156
+
157
+ ```bash
158
+ # 配置仓库
159
+ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
160
+ curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
161
+ sudo apt-get update
162
+
163
+ # 安装 NVIDIA Container Toolkit 包
164
+ sudo apt-get install -y nvidia-container-toolkit
165
+
166
+ # 配置 Docker 使用 Nvidia 驱动
167
+ sudo nvidia-ctk runtime configure --runtime=docker
168
+ sudo systemctl restart docker
169
+ ```
170
+
171
+ 启动容器:
172
+
173
+ ```bash
174
+ docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
175
+ ```
176
+
177
+ #### 使用 AMD GPU 运行
178
+
179
+ 要使用 AMD GPU 运行 Ollama,可以使用 `rocm` 标签,并运行以下命令:
180
+
181
+ ```bash
182
+ docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
183
+ ```
184
+
185
+ ### 本地运行模型
186
+
187
+ 现在,你可以运行一个模型:
188
+
189
+ ```bash
190
+ docker exec -it ollama ollama run llama3
191
+ ```
192
+
193
+ ---
194
+
195
+ 请根据以上内容进行 Ollama 的安装和配置,使用 CLI 工具和 Docker 镜像来管理和运行各种模型。如需更多信息,请访问 [Ollama GitHub 仓库](https://github.com/ollama/ollama)。
docs/unit_test.md ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # 单元测试详细说明
3
+
4
+ - [单元测试详细说明](#单元测试详细说明)
5
+ - [unittest.TestCase 单元测试样例](#unittesttestcase-单元测试样例)
6
+ - [主要功能和特点](#主要功能和特点)
7
+ - [使用示例](#使用示例)
8
+ - [@patch 装饰器详细](#patch-装饰器详细)
9
+ - [主要参数](#主要参数)
10
+ - [使用场景](#使用场景)
11
+ - [在 test_subscription_manager.py 中的应用](#在-test_subscription_managerpy-中的应用)
12
+ - [其他常见用法](#其他常见用法)
13
+ - [小结](#小结)
14
+ - [MagicMock 模拟工具](#magicmock-模拟工具)
15
+ - [主要功能和特点](#主要功能和特点)
16
+ - [在 test_report_generator.py 中的应用](#在-test_report_generatorpy-中的应用)
17
+ - [小结](#小结)
18
+
19
+ ## `unittest.TestCase` 单元测试样例
20
+
21
+ `unittest.TestCase` 是所有测试类的基类,它为测试提供了丰富的断言方法和测试工具。通过继承 `unittest.TestCase`,可以创建自己的测试类,并定义测试方法来验证代码的行为。
22
+
23
+ ### 主要功能和特点
24
+
25
+ 1. **断言方法**:
26
+ - `assertEqual(a, b)`:检查 `a` 和 `b` 是否相等。
27
+ - `assertTrue(x)`:检查 `x` 是否为 `True`。
28
+ - `assertFalse(x)`:检查 `x` 是否为 `False`。
29
+ - `assertRaises(Exception, func, *args, **kwargs)`:检查是否抛出指定的异常。
30
+
31
+ 2. **测试方法的命名**:
32
+ - 在 `TestCase` 类中,以 `test_` 开头的方法将被自动识别为测试方法,并在运行测试时自动执行。
33
+
34
+ 3. **设置和清理**:
35
+ - `setUp()`:在每个测试方法运行之前执行,用于初始化测试环境。
36
+ - `tearDown()`:在每个测试方法运行之后执行,用于清理测试环境。
37
+
38
+ ### 使用示例
39
+
40
+ ```python
41
+ import unittest
42
+
43
+ class MyTestCase(unittest.TestCase):
44
+ def setUp(self):
45
+ # 初始化代码
46
+ pass
47
+
48
+ def test_example(self):
49
+ self.assertEqual(1 + 1, 2)
50
+
51
+ def tearDown(self):
52
+ # 清理代码
53
+ pass
54
+
55
+ if __name__ == '__main__':
56
+ unittest.main()
57
+ ```
58
+
59
+ ## `@patch` 装饰器详细
60
+
61
+ `@patch` 装饰器是 `unittest.mock` 模块中的一个功能强大的工具,用于在单元测试中替换模块或类的属性,使其指向一个模拟对象。通过使用 `@patch`,可以在测试过程中替换特定的函数或对象,以控制其行为,并避免依赖外部资源(如文件系统、数据库、网络请求等)。
62
+
63
+ ### 主要参数
64
+
65
+ - **`target`**:指定要替换的对象。通常是一个字符串,表示模块路径(如 `'builtins.open'`)。
66
+ - **`new`**:提供一个新的对象来替换目标对象。可以是任何对象,通常是一个模拟对象(如 `mock_open`)。
67
+ - **`new_callable`**:指定一个可以调用的对象,当目标对象被替换时,将返回这个对象的实例。常用于创建模拟对象(如 `mock_open`)。
68
+
69
+ ### 使用场景
70
+
71
+ 在单元测试中,`@patch` 主要用于:
72
+
73
+ 1. **模拟外部依赖**:例如,模拟文件读取和写入、网络请求、数据库操作等。
74
+ 2. **控制测试环境**:通过替换特定对象,可以精确控制测试中的行为,使得测试更加可靠和可控。
75
+ 3. **验证调用**:可以检查被替换对象的调用情况,如是否被调用、调用次数、传入的参数等。
76
+
77
+ ### 在 `test_subscription_manager.py` 中的应用
78
+
79
+ ```python
80
+ @patch('builtins.open', new_callable=mock_open, read_data=json.dumps(["DjangoPeng/openai-quickstart", "some/repo"]))
81
+ def test_save_subscriptions(self, mock_file):
82
+ # 测试代码...
83
+ ```
84
+
85
+ **代码解释:**
86
+
87
+ 1. **`@patch('builtins.open', new_callable=mock_open, read_data=json.dumps(["DjangoPeng/openai-quickstart", "some/repo"]))`**:
88
+ - **`'builtins.open'`**:表示我们要替换 Python 内置的 `open` 函数,因为在 `SubscriptionManager` 中会使用 `open` 来读写文件。
89
+ - **`new_callable=mock_open`**:指示 `patch` 使用 `mock_open` 来替换 `open`。`mock_open` 是一个专门用于模拟文件操作的工具,它能够模拟文件的打开、读取、写入等行为。
90
+ - **`read_data=json.dumps(["DjangoPeng/openai-quickstart", "some/repo"])`**:指定当文件被读取时,`mock_open` 将返回的模拟文件内容。在这个例子中,文件内容是一个 JSON 字符串,表示一个包含订阅数据的列表。
91
+
92
+ 2. **模拟文件操作**:
93
+ - 在测试 `save_subscriptions` 和 `load_subscriptions` 方法时,`@patch` 替换了真实的文件操作,使得测试环境完全受控,不依赖外部的文件系统。
94
+ - 使用 `mock_open` 替换 `open` 后,所有针对文件的操作都变成了对模拟对象的操作,这样可以捕获和检查这些操作的细节(如写入内容、调用次数等)。
95
+
96
+ 3. **`mock_file` 参数**:
97
+ - `mock_file` 是 `mock_open` 返回的模拟对象,它被传递到测试函数中,允许测试代码对其进行检查和验证。例如,`mock_file.assert_called_with` 用于验证 `open` 是否以特定的��数被调用。
98
+
99
+ ### 其他常见用法
100
+
101
+ - **`@patch.object`**:用于替换特定对象的属性。
102
+
103
+ ```python
104
+ @patch.object(SomeClass, 'some_method')
105
+ def test_some_method(self, mock_method):
106
+ # 测试代码...
107
+ ```
108
+
109
+ - **`@patch.multiple`**:用于一次性替换多个对象的属性。
110
+
111
+ ```python
112
+ @patch.multiple(SomeClass, method1=DEFAULT, method2=DEFAULT)
113
+ def test_multiple_methods(self, method1, method2):
114
+ # 测试代码...
115
+ ```
116
+
117
+ ### 小结
118
+
119
+ - `@patch` 是单元测试中替换和模拟依赖的强大工具,能够使测试更加可靠和独立。
120
+ - 在 `test_subscription_manager.py` 中,我们使用 `@patch` 模拟了文件操作,从而避免了对实际文件系统的依赖,同时能够检查和验证文件操作的正确性。
121
+
122
+
123
+ ## `MagicMock` 模拟工具
124
+
125
+ `MagicMock` 是 `unittest.mock` 模块中的一个强大的模拟工具。它是 `Mock` 类的子类,继承了 `Mock` 的所有功能,并扩展了一些额外的功能,使其更强大和灵活。在单元测试中,`MagicMock` 通常用于替代或模拟某些对象的行为,从而控制测试环境,避免依赖外部资源或复杂的逻辑。
126
+
127
+ ### 主要功能和特点
128
+
129
+ 1. **模拟对象的方法和属性**:
130
+ - `MagicMock` 可以模拟任何对象的属性和方法。在测试中,您可以随意定义这些属性和方法的返回值、调用次数、传入的参数等。
131
+
132
+ 2. **自动处理魔术方法**:
133
+ - `MagicMock` 可以自动处理 Python 中的魔术方法(如 `__str__`、`__call__`、`__iter__` 等)。这使得它在模拟类或复杂对象时更加灵活。
134
+
135
+ 3. **行为定义**:
136
+ - 您可以通过设置 `MagicMock` 的返回值或副作用(side effect)来定义其行为。例如,可以指定某个方法在调用时返回特定的值,或引发特定的异常。
137
+
138
+ 4. **调用检查**:
139
+ - `MagicMock` 记录所有的调用信息,您可以在测试中检查这些信息,以验证某些方法是否被调用过,调用了几次,传入了哪些参数等。
140
+
141
+ ### 在 `test_report_generator.py` 中的应用
142
+
143
+ 在 `test_report_generator.py` 中,`MagicMock` 被用来模拟 `LLM`(大语言模型)的行为。这是因为在实际的测试中,调用真正的 LLM 可能会消耗大量资源或依赖外部服务,而我们只关心 `ReportGenerator` 是否正确调用了 LLM 并处理其返回结果。因此,我们使用 `MagicMock` 来替代真实的 LLM。
144
+
145
+ ```python
146
+ self.mock_llm = MagicMock()
147
+ ```
148
+
149
+ - **`MagicMock` 作为 LLM 的模拟对象**:这里的 `MagicMock` 对象 `self.mock_llm` 被传递给 `ReportGenerator`。在测试中,`self.mock_llm` 的 `generate_daily_report` 方法被模拟,返回一个我们指定的报告字符串 `mock_report`。
150
+
151
+ - **模拟方法的返回值**:
152
+
153
+ ```python
154
+ self.mock_llm.generate_daily_report.return_value = mock_report
155
+ ```
156
+
157
+ 这行代码设置了 `generate_daily_report` 方法的返回值为 `mock_report`,这样在测试中调用这个方法时,总是返回我们预期的报告内容。
158
+
159
+ - **验证调用**:
160
+
161
+ ```python
162
+ self.mock_llm.generate_daily_report.assert_called_once_with(self.markdown_content)
163
+ ```
164
+
165
+ 通过 `assert_called_once_with`,我们验证 `generate_daily_report` 方法是否被调用了一次,并且传入的参数与预期一致。
166
+
167
+ ### 小结
168
+
169
+ `MagicMock` 是一个非常灵活和强大的工具,允许您在测试中替代复杂对象或外部依赖,模拟其行为,并验证其调用情况。它在单元测试中被广泛使用,尤其适合模拟依赖注入、API 调用、数据库操作等场景,使得测试更加独立、可控和高效。
images/build_docker_image.jpg ADDED
images/gradio_demo.png ADDED
images/gradio_v0.8_github.png ADDED
images/gradio_v0.8_hn.png ADDED
logs/DaemonProcess.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/app.2024-09-20_10-00-00_575591.log ADDED
The diff for this file is too large to render. See raw diff
 
logs/app.log ADDED
The diff for this file is too large to render. See raw diff
 
prompts/github_ollama_prompt.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一个热爱开源社区的技术爱好者,经常关注 GitHub 上热门开源项目的进展。
2
+
3
+ 任务:
4
+ 1.你收到的开源项目 Closed issues 分类整理为:新增功能、主要改进,修复问题等。
5
+ 2.将1中的整理结果生成一个中文报告,符合以下的参考格式
6
+
7
+ 格式:
8
+ # {repo} 项目进展
9
+
10
+ ## 时间周期:{date}
11
+
12
+ ## 新增功能
13
+ - langchain-box: 添加langchain box包和DocumentLoader
14
+ - 添加嵌入集成测试
15
+
16
+ ## 主要改进
17
+ - 将@root_validator用法升级以与pydantic 2保持一致
18
+ - 将根验证器升级为与pydantic 2兼容
19
+
20
+ ## 修复问题
21
+ - 修复Azure的json模式问题
22
+ - 修复Databricks Vector Search演示笔记本问题
23
+ - 修复Microsoft Azure Cosmos集成测试中的连接字符串问题
prompts/github_openai_prompt.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你接下来收到的都是开源项目的最新进展。
2
+
3
+ 你根据进展,总结成一个中文的报告,以 项目名称和日期 开头,包含:新增功能、主要改进,修复问题等章节。
4
+
5
+ 参考示例如下:
6
+
7
+ # LangChain 项目进展
8
+
9
+ ## 时间周期:2024-08-13至2024-08-18
10
+
11
+ ## 新增功能
12
+ - langchain-box: 添加langchain box包和DocumentLoader
13
+ - 添加嵌入集成测试
14
+
15
+ ## 主要改进
16
+ - 将@root_validator用法升级以与pydantic 2保持一致
17
+ - 将根验证器升级为与pydantic 2兼容
18
+
19
+ ## 修复问题
20
+ - 修复Azure的json模式问题
21
+ - 修复Databricks Vector Search演示笔记本问题
22
+ - 修复Microsoft Azure Cosmos集成测试中的连接字符串问题
prompts/hacker_news_daily_report_ollama_prompt.txt ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一个关注 Hacker News 的技术专家,擅于洞察技术热点和发展趋势。
2
+
3
+ 任务:
4
+ 1.你的技术经验分类整理 Hacker News 所有热点话题,
5
+ 2.根据话题出现次数,总结今天最热门的 Top 3 技术趋势,并保留原始链接。
6
+ 3.报告格式参考下面示例。
7
+
8
+ 格式:
9
+ # 【Hacker News 前沿技术趋势】
10
+
11
+ 时间: {日期}
12
+
13
+ ## Top 1:Rust 编程语言引发热门讨论
14
+
15
+ 关于 Rust 的多个讨论,尤其是关于小字符串处理和安全垃圾回收技术的文章,显示出 Rust 语言在现代编程中的应用迅速增长,开发者对其性能和安全特性的兴趣不断上升。
16
+
17
+ 详细内容见相关链接:
18
+
19
+ - https://fasterthanli.me/articles/small-strings-in-rust
20
+ - https://kyju.org/blog/rust-safe-garbage-collection/
21
+
22
+ ### Top 2: Nvidia 在 AI 领域中的强大竞争力
23
+
24
+ 有关于 Nvidia 的四个未知客户,每个人购买价值超过 3 亿美元的讨论,显示出 N 维达在 AI 领域中的强大竞争力。
25
+
26
+ 详细内容见相关链接:
27
+
28
+ - https://fortune.com/2024/08/29/nvidia-jensen-huang-ai-customers/
29
+
30
+ ### Top 3:Bubbletea 的应用性和可能性
31
+
32
+ 有关于构建 Bubbletea 程序的讨论,展示了 Bubbletea 在开发中的应用性和可能性。
33
+
34
+ 详细内容见相关链接:
35
+
36
+ - https://leg100.github.io/en/posts/building-bubbletea-programs/
37
+ - https://www.sfchronicle.com/crime/article/tesla-sentry-mode-police-evidence-19731000.php
prompts/hacker_news_daily_report_openai_prompt.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一个关注 Hacker News 的技术专家,擅于洞察技术热点和发展趋势。
2
+
3
+ 任务:
4
+ 1.你的技术经验分类整理 Hacker News 所有热点话题,
5
+ 2.根据话题出现次数,总结今天最热门的 Top 3 技术趋势,并保留原始链接。
6
+ 3.报告格式参考下面示例。
7
+
8
+ 格式:
9
+ # 【Hacker News 前沿技术趋势】
10
+
11
+ 时间: {日期}
12
+
13
+ ## Top 1:Rust 编程语言引发热门讨论
14
+
15
+ 关于 Rust 的多个讨论,尤其是关于小字符串处理和安全垃圾回收技术的文章,显示出 Rust 语言在现代编程中的应用迅速增长,开发者对其性能和安全特性的兴趣不断上升。
16
+
17
+ 详细内容见相关链接:
18
+ - https://fasterthanli.me/articles/small-strings-in-rust
19
+ - https://kyju.org/blog/rust-safe-garbage-collection/
20
+
21
+ ### Top 2: Nvidia 在 AI 领域中的强大竞争力
22
+
23
+ 有关于 Nvidia 的四个未知客户,每个人购买价值超过 3 亿美元的讨论,显示出 N 维达在 AI 领域中的强大竞争力。
24
+
25
+ 详细内容见相关链接:
26
+ - https://fortune.com/2024/08/29/nvidia-jensen-huang-ai-customers/
27
+
28
+ ### Top 3:Bubbletea 的应用性和可能性
29
+
30
+ 有关于构建 Bubbletea 程序的讨论,展示了 Bubbletea 在开发中的应用性和可能性。
31
+
32
+ 详细内容见相关链接:
33
+ - https://leg100.github.io/en/posts/building-bubbletea-programs/
34
+ - https://www.sfchronicle.com/crime/article/tesla-sentry-mode-police-evidence-19731000.php
35
+
prompts/hacker_news_hours_topic_ollama_prompt.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一个关注 Hacker News 的技术专家,擅于洞察技术热点和发展趋势。
2
+
3
+ 任务:
4
+ 1.根据你收到的 Hacker News Top List,分析和总结当前技术圈讨论的热点话题。
5
+ 2.使用中文生成报告,内容仅包含5个热点话题,并保留原始链接。
6
+
7
+ 格式:
8
+ # Hacker News 热门话题 {日期} {小时}
9
+
10
+ 1. **Rust 编程语言的讨论**:关于 Rust 的多个讨论,尤其是关于小字符串处理和安全垃圾回收技术的文章,显示出 Rust 语言在现代编程中的应用迅速增长,开发者对其性能和安全特性的兴趣不断上升。
11
+ - https://fasterthanli.me/articles/small-strings-in-rust
12
+ - https://kyju.org/blog/rust-safe-garbage-collection/
13
+
14
+ 2. **网络安全思考**:有关于“防守者和攻击者思考方式”的讨论引发了对网络安全策略的深入思考。这种对比强调防守与攻击之间的心理与技术差异,表明网络安全领域对攻击者策略的关注日益增加。
15
+ - https://github.com/JohnLaTwC/Shared/blob/master/Defenders%20think%20in%20lists.%20Attackers%20think%20in%20graphs.%20As%20long%20as%20this%20is%20true%2C%20attackers%20win.md
16
+
17
+ 3. **Linux 开发者的理由**:关于 Linux 的讨论,强调了 Linux 在现代开发中的重要性和应用性。
18
+ - https://opiero.medium.com/why-you-should-learn-linux-9ceace168e5c
19
+
20
+ 4. **Nvidia 的秘密客户**:有关于 Nvidia 的四个未知客户,每个人购买价值超过 3 亿美元的讨论,显示出 N 维达在 AI 领域中的强大竞争力。
21
+ - https://fortune.com/2024/08/29/nvidia-jensen-huang-ai-customers/
22
+
23
+ 5. **Building Bubbletea Programs**:有关于构建 Bubbletea 程序的讨论,展示了 Bubbletea 在开发中的应用性和可能性。
24
+ - https://leg100.github.io/en/posts/building-bubbletea-programs/
prompts/hacker_news_hours_topic_openai_prompt.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一个关注 Hacker News 的技术专家,擅于洞察技术热点和发展趋势。
2
+
3
+ 任务:
4
+ 1.根据你收到的 Hacker News Top List,分析和总结当前技术圈讨论的热点话题。
5
+ 2.使用中文生成报告,内容仅包含5个热点话题,并保留原始链接。
6
+
7
+ 格式:
8
+ # Hacker News 热门话题 {日期} {小时}
9
+
10
+ 1. **Rust 编程语言的讨论**:关于 Rust 的多个讨论,尤其是关于小字符串处理和安全垃圾回收技术的文章,显示出 Rust 语言在现代编程中的应用迅速增长,开发者对其性能和安全特性的兴趣不断上升。
11
+ - https://fasterthanli.me/articles/small-strings-in-rust
12
+ - https://kyju.org/blog/rust-safe-garbage-collection/
13
+
14
+ 2. **网络安全思考**:有关于“防守者和攻击者思考方式”的讨论引发了对网络安全策略的深入思考。这种对比强调防守与攻击之间的心理与技术差异,表明网络安全领域对攻击者策略的关注日益增加。
15
+ - https://github.com/JohnLaTwC/Shared/blob/master/Defenders%20think%20in%20lists.%20Attackers%20think%20in%20graphs.%20As%20long%20as%20this%20is%20true%2C%20attackers%20win.md
16
+
17
+ 3. **Linux 开发者的理由**:关于 Linux 的讨论,强调了 Linux 在现代开发中的重要性和应用性。
18
+ - https://opiero.medium.com/why-you-should-learn-linux-9ceace168e5c
19
+
20
+ 4. **Nvidia 的秘密客户**:有关于 Nvidia 的四个未知客户,每个人购买价值超过 3 亿美元的讨论,显示出 N 维达在 AI 领域中的强大竞争力。
21
+ - https://fortune.com/2024/08/29/nvidia-jensen-huang-ai-customers/
22
+
23
+ 5. **Building Bubbletea Programs**:有关于构建 Bubbletea 程序的讨论,展示了 Bubbletea 在开发中的应用性和可能性。
24
+ - https://leg100.github.io/en/posts/building-bubbletea-programs/
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ requests==2.31.0
2
+ gradio==4.42.0
3
+ loguru==0.7.2
4
+ markdown2==2.5.0
5
+ openai==1.44.0
6
+ schedule==1.2.2
src/__pycache__/cli.cpython-311.pyc ADDED
Binary file (2.98 kB). View file
 
src/__pycache__/command_handler.cpython-310.pyc ADDED
Binary file (3.55 kB). View file
 
src/__pycache__/config.cpython-310.pyc ADDED
Binary file (1.42 kB). View file
 
src/__pycache__/config.cpython-311.pyc ADDED
Binary file (1.54 kB). View file
 
src/__pycache__/config.cpython-312.pyc ADDED
Binary file (1.19 kB). View file
 
src/__pycache__/email_notifier.cpython-311.pyc ADDED
Binary file (2.19 kB). View file
 
src/__pycache__/github_client.cpython-310.pyc ADDED
Binary file (3.88 kB). View file
 
src/__pycache__/github_client.cpython-311.pyc ADDED
Binary file (1.18 kB). View file
 
src/__pycache__/github_client.cpython-312.pyc ADDED
Binary file (5.88 kB). View file
 
src/__pycache__/hacker_news_client.cpython-310.pyc ADDED
Binary file (2.6 kB). View file
 
src/__pycache__/llm.cpython-310.pyc ADDED
Binary file (3.61 kB). View file
 
src/__pycache__/logger.cpython-310.pyc ADDED
Binary file (531 Bytes). View file
 
src/__pycache__/logger.cpython-312.pyc ADDED
Binary file (564 Bytes). View file
 
src/__pycache__/notifier.cpython-310.pyc ADDED
Binary file (3.78 kB). View file
 
src/__pycache__/notifier.cpython-311.pyc ADDED
Binary file (693 Bytes). View file
 
src/__pycache__/report_generator.cpython-310.pyc ADDED
Binary file (3.81 kB). View file
 
src/__pycache__/report_generator.cpython-311.pyc ADDED
Binary file (1.15 kB). View file
 
src/__pycache__/scheduler.cpython-310.pyc ADDED
Binary file (1.15 kB). View file
 
src/__pycache__/scheduler.cpython-311.pyc ADDED
Binary file (1.59 kB). View file
 
src/__pycache__/scheduler.cpython-312.pyc ADDED
Binary file (1.52 kB). View file
 
src/__pycache__/subscription_manager.cpython-310.pyc ADDED
Binary file (1.58 kB). View file
 
src/__pycache__/subscription_manager.cpython-311.pyc ADDED
Binary file (2.58 kB). View file
 
src/command_handler.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/command_handler.py
2
+
3
+ import argparse # 导入argparse库,用于处理命令行参数解析
4
+
5
+ class CommandHandler:
6
+ def __init__(self, github_client, subscription_manager, report_generator):
7
+ # 初始化CommandHandler,接收GitHub客户端、订阅管理器和报告生成器
8
+ self.github_client = github_client
9
+ self.subscription_manager = subscription_manager
10
+ self.report_generator = report_generator
11
+ self.parser = self.create_parser() # 创建命令行解析器
12
+
13
+ def create_parser(self):
14
+ # 创建并配置命令行解析器
15
+ parser = argparse.ArgumentParser(
16
+ description='GitHub Sentinel Command Line Interface',
17
+ formatter_class=argparse.RawTextHelpFormatter
18
+ )
19
+ subparsers = parser.add_subparsers(title='Commands', dest='command')
20
+
21
+ # 添加订阅命令
22
+ parser_add = subparsers.add_parser('add', help='Add a subscription')
23
+ parser_add.add_argument('repo', type=str, help='The repository to subscribe to (e.g., owner/repo)')
24
+ parser_add.set_defaults(func=self.add_subscription)
25
+
26
+ # 删除订阅命令
27
+ parser_remove = subparsers.add_parser('remove', help='Remove a subscription')
28
+ parser_remove.add_argument('repo', type=str, help='The repository to unsubscribe from (e.g., owner/repo)')
29
+ parser_remove.set_defaults(func=self.remove_subscription)
30
+
31
+ # 列出所有订阅命令
32
+ parser_list = subparsers.add_parser('list', help='List all subscriptions')
33
+ parser_list.set_defaults(func=self.list_subscriptions)
34
+
35
+ # 导出每日进展命令
36
+ parser_export = subparsers.add_parser('export', help='Export daily progress')
37
+ parser_export.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)')
38
+ parser_export.set_defaults(func=self.export_daily_progress)
39
+
40
+ # 导出特定日期范围进展命令
41
+ parser_export_range = subparsers.add_parser('export-range', help='Export progress over a range of dates')
42
+ parser_export_range.add_argument('repo', type=str, help='The repository to export progress from (e.g., owner/repo)')
43
+ parser_export_range.add_argument('days', type=int, help='The number of days to export progress for')
44
+ parser_export_range.set_defaults(func=self.export_progress_by_date_range)
45
+
46
+ # 生成日报命令
47
+ parser_generate = subparsers.add_parser('generate', help='Generate daily report from markdown file')
48
+ parser_generate.add_argument('file', type=str, help='The markdown file to generate report from')
49
+ parser_generate.set_defaults(func=self.generate_daily_report)
50
+
51
+ # 帮助命令
52
+ parser_help = subparsers.add_parser('help', help='Show help message')
53
+ parser_help.set_defaults(func=self.print_help)
54
+
55
+ return parser # 返回配置好的解析器
56
+
57
+ # 下面是各种命令对应的方法实现,每个方法都使用了相应的管理器来执行实际操作,并输出结果信息
58
+ def add_subscription(self, args):
59
+ self.subscription_manager.add_subscription(args.repo)
60
+ print(f"Added subscription for repository: {args.repo}")
61
+
62
+ def remove_subscription(self, args):
63
+ self.subscription_manager.remove_subscription(args.repo)
64
+ print(f"Removed subscription for repository: {args.repo}")
65
+
66
+ def list_subscriptions(self, args):
67
+ subscriptions = self.subscription_manager.list_subscriptions()
68
+ print("Current subscriptions:")
69
+ for sub in subscriptions:
70
+ print(f" - {sub}")
71
+
72
+ def export_daily_progress(self, args):
73
+ self.github_client.export_daily_progress(args.repo)
74
+ print(f"Exported daily progress for repository: {args.repo}")
75
+
76
+ def export_progress_by_date_range(self, args):
77
+ self.github_client.export_progress_by_date_range(args.repo, days=args.days)
78
+ print(f"Exported progress for the last {args.days} days for repository: {args.repo}")
79
+
80
+ def generate_daily_report(self, args):
81
+ self.report_generator.generate_github_report(args.file)
82
+ print(f"Generated daily report from file: {args.file}")
83
+
84
+ def print_help(self, args=None):
85
+ self.parser.print_help() # 输出帮助信息
src/command_tool.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shlex # 导入shlex库,用于正确解析命令行输入
2
+
3
+ from config import Config # 从config模块导入Config类,用于配置管理
4
+ from github_client import GitHubClient # 从github_client模块导入GitHubClient类,用于GitHub API操作
5
+ from report_generator import ReportGenerator # 从report_generator模块导入ReportGenerator类,用于报告生成
6
+ from llm import LLM # 从llm模块导入LLM类,可能用于语言模型相关操作
7
+ from subscription_manager import SubscriptionManager # 从subscription_manager模块导入SubscriptionManager类,管理订阅
8
+ from command_handler import CommandHandler # 从command_handler模块导入CommandHandler类,处理命令行命令
9
+ from logger import LOG # 从logger模块导入LOG对象,用于日志记录
10
+
11
+ def main():
12
+ config = Config() # 创建配置实例
13
+ github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例
14
+ llm = LLM(config) # 创建语言模型实例
15
+ report_generator = ReportGenerator(llm, config.report_types) # 创建报告生成器实例
16
+ subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例
17
+ command_handler = CommandHandler(github_client, subscription_manager, report_generator) # 创建命令处理器实例
18
+
19
+ parser = command_handler.parser # 获取命令解析器
20
+ command_handler.print_help() # 打印帮助信息
21
+
22
+ while True:
23
+ try:
24
+ user_input = input("GitHub Sentinel> ") # 等待用户输入
25
+ if user_input in ['exit', 'quit']: # 如果输入为退出命令,则结束循环
26
+ break
27
+ try:
28
+ args = parser.parse_args(shlex.split(user_input)) # 解析用户输入的命令
29
+ if args.command is None: # 如果没有命令被解析,则继续循环
30
+ continue
31
+ args.func(args) # 执行对应的命令函数
32
+ except SystemExit as e: # 捕获由于错误命令引发的异常
33
+ LOG.error("Invalid command. Type 'help' to see the list of available commands.")
34
+ except Exception as e:
35
+ LOG.error(f"Unexpected error: {e}") # 记录其他未预期的错误
36
+
37
+ if __name__ == '__main__':
38
+ main() # 如果直接运行该文件,则执行main函数
src/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ class Config:
5
+ def __init__(self):
6
+ self.load_config()
7
+
8
+ def load_config(self):
9
+ with open('config.json', 'r') as f:
10
+ config = json.load(f)
11
+
12
+ self.email = config.get('email', {})
13
+ self.email['password'] = os.getenv('EMAIL_PASSWORD', self.email.get('password', ''))
14
+
15
+ # 加载 GitHub 相关配置
16
+ github_config = config.get('github', {})
17
+ self.github_token = os.getenv('GITHUB_TOKEN', github_config.get('token'))
18
+ self.subscriptions_file = github_config.get('subscriptions_file')
19
+ self.freq_days = github_config.get('progress_frequency_days', 1)
20
+ self.exec_time = github_config.get('progress_execution_time', "08:00")
21
+
22
+ # 加载 LLM 相关配置
23
+ llm_config = config.get('llm', {})
24
+ self.llm_model_type = llm_config.get('model_type', 'openai')
25
+ self.openai_model_name = llm_config.get('openai_model_name', 'gpt-4o-mini')
26
+ self.ollama_model_name = llm_config.get('ollama_model_name', 'llama3')
27
+ self.ollama_api_url = llm_config.get('ollama_api_url', 'http://localhost:11434/api/chat')
28
+
29
+ # 加载报告类型配置
30
+ self.report_types = config.get('report_types', ["github", "hacker_news"]) # 默认报告类型
31
+
32
+ # 加载 Slack 配置
33
+ slack_config = config.get('slack', {})
34
+ self.slack_webhook_url = slack_config.get('webhook_url')
src/daemon_process.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import schedule # 导入 schedule 实现定时任务执行器
2
+ import time # 导入time库,用于控制时间间隔
3
+ import os # 导入os模块用于文件和目录操作
4
+ import signal # 导入signal库,用于信号处理
5
+ import sys # 导入sys库,用于执行系统相关的操作
6
+ from datetime import datetime # 导入 datetime 模块用于获取当前日期
7
+
8
+ from config import Config # 导入配置管理类
9
+ from github_client import GitHubClient # 导入GitHub客户端类,处理GitHub API请求
10
+ from hacker_news_client import HackerNewsClient
11
+ from notifier import Notifier # 导入通知器类,用于发送通知
12
+ from report_generator import ReportGenerator # 导入报告生成器类
13
+ from llm import LLM # 导入语言模型类,可能用于生成报告内容
14
+ from subscription_manager import SubscriptionManager # 导入订阅管理器类,管理GitHub仓库订阅
15
+ from logger import LOG # 导入日志记录器
16
+
17
+
18
+ def graceful_shutdown(signum, frame):
19
+ # 优雅关闭程序的函数,处理信号时调用
20
+ LOG.info("[优雅退出]守护进程接收到终止信号")
21
+ sys.exit(0) # 安全退出程序
22
+
23
+ def github_job(subscription_manager, github_client, report_generator, notifier, days):
24
+ LOG.info("[开始执行定时任务]GitHub Repo 项目进展报告")
25
+ subscriptions = subscription_manager.list_subscriptions() # 获取当前所有订阅
26
+ LOG.info(f"订阅列表:{subscriptions}")
27
+ for repo in subscriptions:
28
+ # 遍历每个订阅的仓库,执行以下操作
29
+ markdown_file_path = github_client.export_progress_by_date_range(repo, days)
30
+ # 从Markdown文件自动生成进展简报
31
+ report, _ = report_generator.generate_github_report(markdown_file_path)
32
+ notifier.notify_github_report(repo, report)
33
+ LOG.info(f"[定时任务执行完毕]")
34
+
35
+
36
+ def hn_topic_job(hacker_news_client, report_generator):
37
+ LOG.info("[开始执行定时任务]Hacker News 热点话题跟踪")
38
+ markdown_file_path = hacker_news_client.export_top_stories()
39
+ _, _ = report_generator.generate_hn_topic_report(markdown_file_path)
40
+ LOG.info(f"[定时任务执行完毕]")
41
+
42
+
43
+ def hn_daily_job(hacker_news_client, report_generator, notifier):
44
+ LOG.info("[开始执行定时任务]Hacker News 今日前沿技术趋势")
45
+ # 获取当前日期,并格式化为 'YYYY-MM-DD' 格式
46
+ date = datetime.now().strftime('%Y-%m-%d')
47
+ # 生成每日汇总报告的目录路径
48
+ directory_path = os.path.join('hacker_news', date)
49
+ # 生成每日汇总报告并保存
50
+ report, _ = report_generator.generate_hn_daily_report(directory_path)
51
+ notifier.notify_hn_report(date, report)
52
+ LOG.info(f"[定时任务执行完毕]")
53
+
54
+
55
+ def main():
56
+ # 设置信号处理器
57
+ signal.signal(signal.SIGTERM, graceful_shutdown)
58
+
59
+ config = Config() # 创建配置实例
60
+ github_client = GitHubClient(config.github_token) # 创建GitHub客户端实例
61
+ hacker_news_client = HackerNewsClient() # 创建 Hacker News 客户端实例
62
+ notifier = Notifier(config.email) # 创建通知器实例
63
+ llm = LLM(config) # 创建语言模型实例
64
+ report_generator = ReportGenerator(llm, config.report_types) # 创建报告生成器实例
65
+ subscription_manager = SubscriptionManager(config.subscriptions_file) # 创建订阅管理器实例
66
+
67
+ # 启动时立即执行(如不需要可注释)
68
+ # github_job(subscription_manager, github_client, report_generator, notifier, config.freq_days)
69
+ hn_daily_job(hacker_news_client, report_generator, notifier)
70
+
71
+ # 安排 GitHub 的定时任务
72
+ schedule.every(config.freq_days).days.at(
73
+ config.exec_time
74
+ ).do(github_job, subscription_manager, github_client, report_generator, notifier, config.freq_days)
75
+
76
+ # 安排 hn_topic_job 每4小时执行一次,从0点开始
77
+ schedule.every(4).hours.at(":00").do(hn_topic_job, hacker_news_client, report_generator)
78
+
79
+ # 安排 hn_daily_job 每天早上10点执行一次
80
+ schedule.every().day.at("10:00").do(hn_daily_job, hacker_news_client, report_generator, notifier)
81
+
82
+ try:
83
+ # 在守护进程中持续运行
84
+ while True:
85
+ schedule.run_pending()
86
+ time.sleep(1) # 短暂休眠以减少 CPU 使用
87
+ except Exception as e:
88
+ LOG.error(f"主进程发生异常: {str(e)}")
89
+ sys.exit(1)
90
+
91
+
92
+
93
+ if __name__ == '__main__':
94
+ main()