Tsitsi19 commited on
Commit
3dc7a0f
·
verified ·
1 Parent(s): 7ace9b2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. Dockerfile +37 -0
  3. OpenManus/.gitattributes +30 -0
  4. OpenManus/.github/ISSUE_TEMPLATE/config.yml +5 -0
  5. OpenManus/.github/ISSUE_TEMPLATE/request_new_features.yaml +21 -0
  6. OpenManus/.github/ISSUE_TEMPLATE/show_me_the_bug.yaml +44 -0
  7. OpenManus/.github/PULL_REQUEST_TEMPLATE.md +17 -0
  8. OpenManus/.github/dependabot.yml +58 -0
  9. OpenManus/.github/workflows/build-package.yaml +33 -0
  10. OpenManus/.github/workflows/environment-corrupt-check.yaml +33 -0
  11. OpenManus/.github/workflows/pr-autodiff.yaml +138 -0
  12. OpenManus/.github/workflows/pre-commit.yaml +26 -0
  13. OpenManus/.github/workflows/stale.yaml +23 -0
  14. OpenManus/.github/workflows/top-issues.yaml +29 -0
  15. OpenManus/.gitignore +199 -0
  16. OpenManus/.pre-commit-config.yaml +39 -0
  17. OpenManus/.vscode/extensions.json +8 -0
  18. OpenManus/.vscode/settings.json +20 -0
  19. OpenManus/CODE_OF_CONDUCT.md +162 -0
  20. OpenManus/Dockerfile +13 -0
  21. OpenManus/LICENSE +21 -0
  22. OpenManus/README.md +185 -0
  23. OpenManus/README_ja.md +180 -0
  24. OpenManus/README_ko.md +180 -0
  25. OpenManus/README_zh.md +187 -0
  26. OpenManus/app/__init__.py +10 -0
  27. OpenManus/app/agent/__init__.py +16 -0
  28. OpenManus/app/agent/base.py +196 -0
  29. OpenManus/app/agent/browser.py +124 -0
  30. OpenManus/app/agent/manus.py +165 -0
  31. OpenManus/app/agent/mcp.py +185 -0
  32. OpenManus/app/agent/react.py +38 -0
  33. OpenManus/app/agent/swe.py +24 -0
  34. OpenManus/app/agent/toolcall.py +258 -0
  35. OpenManus/app/bedrock.py +334 -0
  36. OpenManus/app/config.py +320 -0
  37. OpenManus/app/exceptions.py +13 -0
  38. OpenManus/app/flow/__init__.py +0 -0
  39. OpenManus/app/flow/base.py +57 -0
  40. OpenManus/app/flow/flow_factory.py +30 -0
  41. OpenManus/app/flow/planning.py +424 -0
  42. OpenManus/app/llm.py +773 -0
  43. OpenManus/app/logger.py +42 -0
  44. OpenManus/app/mcp/__init__.py +0 -0
  45. OpenManus/app/mcp/server.py +180 -0
  46. OpenManus/app/prompt/__init__.py +0 -0
  47. OpenManus/app/prompt/browser.py +94 -0
  48. OpenManus/app/prompt/cot.py +15 -0
  49. OpenManus/app/prompt/manus.py +10 -0
  50. OpenManus/app/prompt/mcp.py +43 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ OpenManus-main/assets/community_group.jpg filter=lfs diff=lfs merge=lfs -text
37
+ OpenManus-main/examples/use_case/pictures/japan-travel-plan-1.png filter=lfs diff=lfs merge=lfs -text
38
+ OpenManus/assets/community_group.jpg filter=lfs diff=lfs merge=lfs -text
39
+ OpenManus/examples/use_case/pictures/japan-travel-plan-1.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────
2
+ # OpenManus Dockerfile for Hugging Face *Docker* Space + Gradio
3
+ # ─────────────────────────────────────────────────────────────
4
+
5
+ FROM python:3.12-slim
6
+
7
+ # 1) OS packages + uv (old --prerelease flag compatible)
8
+ RUN apt-get update && apt-get install -y --no-install-recommends unzip && \
9
+ rm -rf /var/lib/apt/lists/* && \
10
+ pip install --no-cache-dir uv==0.6.13
11
+
12
+ # 2) copy everything in the Space repo
13
+ WORKDIR /app
14
+ COPY . /app
15
+
16
+ # 3) unzip the GitHub archive (if you uploaded the raw folder, this is a no‑op)
17
+ RUN if [ -f OpenManus-main.zip ]; then \
18
+ unzip OpenManus-main.zip && mv OpenManus-main OpenManus ; \
19
+ fi
20
+
21
+ # 4) make sure logs dir is writable by HF's non‑root user (uid 1000)
22
+ RUN mkdir -p /app/OpenManus/logs && \
23
+ chown -R 1000:1000 /app/OpenManus
24
+
25
+ # 5) install Python deps (project + gradio UI)
26
+ WORKDIR /app/OpenManus
27
+ RUN uv pip install --system -r requirements.txt && \
28
+ pip install --no-cache-dir "gradio>=4"
29
+
30
+ # 6) ship default config so the agent can start
31
+ RUN cp config/config.example.toml config/config.toml
32
+
33
+ # 7) expose the web UI port for Hugging Face
34
+ EXPOSE 7860
35
+
36
+ # 8) launch the Gradio wrapper
37
+ CMD ["python", "serve.py"]
OpenManus/.gitattributes ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HTML code is incorrectly calculated into statistics, so ignore them
2
+ *.html linguist-detectable=false
3
+ # Auto detect text files and perform LF normalization
4
+ * text=auto eol=lf
5
+ # Ensure shell scripts use LF (Linux style) line endings on Windows
6
+ *.sh text eol=lf
7
+ # Treat specific binary files as binary and prevent line ending conversion
8
+ *.png binary
9
+ *.jpg binary
10
+ *.gif binary
11
+ *.ico binary
12
+ *.jpeg binary
13
+ *.mp3 binary
14
+ *.zip binary
15
+ *.bin binary
16
+ # Preserve original line endings for specific document files
17
+ *.doc text eol=crlf
18
+ *.docx text eol=crlf
19
+ *.pdf binary
20
+ # Ensure source code and script files use LF line endings
21
+ *.py text eol=lf
22
+ *.js text eol=lf
23
+ *.html text eol=lf
24
+ *.css text eol=lf
25
+ # Specify custom diff driver for specific file types
26
+ *.md diff=markdown
27
+ *.json diff=json
28
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
29
+ *.mov filter=lfs diff=lfs merge=lfs -text
30
+ *.webm filter=lfs diff=lfs merge=lfs -text
OpenManus/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: "Join the Community Group"
4
+ about: Join the OpenManus community to discuss and get help from others
5
+ url: https://github.com/mannaandpoem/OpenManus?tab=readme-ov-file#community-group
OpenManus/.github/ISSUE_TEMPLATE/request_new_features.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "🤔 Request new features"
2
+ description: Suggest ideas or features you’d like to see implemented in OpenManus.
3
+ labels: enhancement
4
+ body:
5
+ - type: textarea
6
+ id: feature-description
7
+ attributes:
8
+ label: Feature description
9
+ description: |
10
+ Provide a clear and concise description of the proposed feature
11
+ validations:
12
+ required: true
13
+ - type: textarea
14
+ id: your-feature
15
+ attributes:
16
+ label: Your Feature
17
+ description: |
18
+ Explain your idea or implementation process, if any. Optionally, include a Pull Request URL.
19
+ Ensure accompanying docs/tests/examples are provided for review.
20
+ validations:
21
+ required: false
OpenManus/.github/ISSUE_TEMPLATE/show_me_the_bug.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "🪲 Show me the Bug"
2
+ description: Report a bug encountered while using OpenManus and seek assistance.
3
+ labels: bug
4
+ body:
5
+ - type: textarea
6
+ id: bug-description
7
+ attributes:
8
+ label: Bug Description
9
+ description: |
10
+ Clearly describe the bug you encountered
11
+ validations:
12
+ required: true
13
+ - type: textarea
14
+ id: solve-method
15
+ attributes:
16
+ label: Bug solved method
17
+ description: |
18
+ If resolved, explain the solution. Optionally, include a Pull Request URL.
19
+ If unresolved, provide additional details to aid investigation
20
+ validations:
21
+ required: true
22
+ - type: textarea
23
+ id: environment-information
24
+ attributes:
25
+ label: Environment information
26
+ description: |
27
+ System: e.g., Ubuntu 22.04
28
+ Python: e.g., 3.12
29
+ OpenManus version: e.g., 0.1.0
30
+ value: |
31
+ - System version:
32
+ - Python version:
33
+ - OpenManus version or branch:
34
+ - Installation method (e.g., `pip install -r requirements.txt` or `pip install -e .`):
35
+ validations:
36
+ required: true
37
+ - type: textarea
38
+ id: extra-information
39
+ attributes:
40
+ label: Extra information
41
+ description: |
42
+ For example, attach screenshots or logs to help diagnose the issue
43
+ validations:
44
+ required: false
OpenManus/.github/PULL_REQUEST_TEMPLATE.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ **Features**
2
+ <!-- Describe the features or bug fixes in this PR. For bug fixes, link to the issue. -->
3
+
4
+ - Feature 1
5
+ - Feature 2
6
+
7
+ **Feature Docs**
8
+ <!-- Provide RFC, tutorial, or use case links for significant updates. Optional for minor changes. -->
9
+
10
+ **Influence**
11
+ <!-- Explain the impact of these changes for reviewer focus. -->
12
+
13
+ **Result**
14
+ <!-- Include screenshots or logs of unit tests or running results. -->
15
+
16
+ **Other**
17
+ <!-- Additional notes about this PR. -->
OpenManus/.github/dependabot.yml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "pip"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ open-pull-requests-limit: 4
8
+ groups:
9
+ # Group critical packages that might need careful review
10
+ core-dependencies:
11
+ patterns:
12
+ - "pydantic*"
13
+ - "openai"
14
+ - "fastapi"
15
+ - "tiktoken"
16
+ browsergym-related:
17
+ patterns:
18
+ - "browsergym*"
19
+ - "browser-use"
20
+ - "playwright"
21
+ search-tools:
22
+ patterns:
23
+ - "googlesearch-python"
24
+ - "baidusearch"
25
+ - "duckduckgo_search"
26
+ pre-commit:
27
+ patterns:
28
+ - "pre-commit"
29
+ security-all:
30
+ applies-to: "security-updates"
31
+ patterns:
32
+ - "*"
33
+ version-all:
34
+ applies-to: "version-updates"
35
+ patterns:
36
+ - "*"
37
+ exclude-patterns:
38
+ - "pydantic*"
39
+ - "openai"
40
+ - "fastapi"
41
+ - "tiktoken"
42
+ - "browsergym*"
43
+ - "browser-use"
44
+ - "playwright"
45
+ - "googlesearch-python"
46
+ - "baidusearch"
47
+ - "duckduckgo_search"
48
+ - "pre-commit"
49
+
50
+ - package-ecosystem: "github-actions"
51
+ directory: "/"
52
+ schedule:
53
+ interval: "weekly"
54
+ open-pull-requests-limit: 4
55
+ groups:
56
+ actions:
57
+ patterns:
58
+ - "*"
OpenManus/.github/workflows/build-package.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and upload Python package
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ release:
6
+ types: [created, published]
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - name: Set up Python
14
+ uses: actions/setup-python@v5
15
+ with:
16
+ python-version: '3.12'
17
+ cache: 'pip'
18
+ - name: Install dependencies
19
+ run: |
20
+ python -m pip install --upgrade pip
21
+ pip install -r requirements.txt
22
+ pip install setuptools wheel twine
23
+ - name: Set package version
24
+ run: |
25
+ export VERSION="${GITHUB_REF#refs/tags/v}"
26
+ sed -i "s/version=.*/version=\"${VERSION}\",/" setup.py
27
+ - name: Build and publish
28
+ env:
29
+ TWINE_USERNAME: __token__
30
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
31
+ run: |
32
+ python setup.py bdist_wheel sdist
33
+ twine upload dist/*
OpenManus/.github/workflows/environment-corrupt-check.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Environment Corruption Check
2
+ on:
3
+ push:
4
+ branches: ["main"]
5
+ paths:
6
+ - requirements.txt
7
+ pull_request:
8
+ branches: ["main"]
9
+ paths:
10
+ - requirements.txt
11
+ concurrency:
12
+ group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}
13
+ cancel-in-progress: true
14
+ jobs:
15
+ test-python-versions:
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ matrix:
19
+ python-version: ["3.11.11", "3.12.8", "3.13.2"]
20
+ fail-fast: false
21
+ steps:
22
+ - name: Checkout repository
23
+ uses: actions/checkout@v4
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ - name: Upgrade pip
29
+ run: |
30
+ python -m pip install --upgrade pip
31
+ - name: Install dependencies
32
+ run: |
33
+ pip install -r requirements.txt
OpenManus/.github/workflows/pr-autodiff.yaml ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: PR Diff Summarization
2
+ on:
3
+ # pull_request:
4
+ # branches: [main]
5
+ # types: [opened, ready_for_review, reopened]
6
+ issue_comment:
7
+ types: [created]
8
+ permissions:
9
+ contents: read
10
+ pull-requests: write
11
+ jobs:
12
+ pr-diff-summarization:
13
+ runs-on: ubuntu-latest
14
+ if: |
15
+ (github.event_name == 'pull_request') ||
16
+ (github.event_name == 'issue_comment' &&
17
+ contains(github.event.comment.body, '!pr-diff') &&
18
+ (github.event.comment.author_association == 'CONTRIBUTOR' || github.event.comment.author_association == 'COLLABORATOR' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') &&
19
+ github.event.issue.pull_request)
20
+ steps:
21
+ - name: Get PR head SHA
22
+ id: get-pr-sha
23
+ run: |
24
+ PR_URL="${{ github.event.issue.pull_request.url || github.event.pull_request.url }}"
25
+ # https://api.github.com/repos/OpenManus/pulls/1
26
+ RESPONSE=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL)
27
+ SHA=$(echo $RESPONSE | jq -r '.head.sha')
28
+ TARGET_BRANCH=$(echo $RESPONSE | jq -r '.base.ref')
29
+ echo "pr_sha=$SHA" >> $GITHUB_OUTPUT
30
+ echo "target_branch=$TARGET_BRANCH" >> $GITHUB_OUTPUT
31
+ echo "Retrieved PR head SHA from API: $SHA, target branch: $TARGET_BRANCH"
32
+ - name: Check out code
33
+ uses: actions/checkout@v4
34
+ with:
35
+ ref: ${{ steps.get-pr-sha.outputs.pr_sha }}
36
+ fetch-depth: 0
37
+ - name: Set up Python
38
+ uses: actions/setup-python@v5
39
+ with:
40
+ python-version: '3.11'
41
+ - name: Install dependencies
42
+ run: |
43
+ python -m pip install --upgrade pip
44
+ pip install openai requests
45
+ - name: Create and run Python script
46
+ env:
47
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
48
+ OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
49
+ GH_TOKEN: ${{ github.token }}
50
+ PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
51
+ TARGET_BRANCH: ${{ steps.get-pr-sha.outputs.target_branch }}
52
+ run: |-
53
+ cat << 'EOF' > /tmp/_workflow_core.py
54
+ import os
55
+ import subprocess
56
+ import json
57
+ import requests
58
+ from openai import OpenAI
59
+
60
+ def get_diff():
61
+ result = subprocess.run(
62
+ ['git', 'diff', 'origin/' + os.getenv('TARGET_BRANCH') + '...HEAD'],
63
+ capture_output=True, text=True, check=True)
64
+ return '\n'.join(
65
+ line for line in result.stdout.split('\n')
66
+ if any(line.startswith(c) for c in ('+', '-'))
67
+ and not line.startswith(('---', '+++'))
68
+ )[:round(200000 * 0.4)] # Truncate to prevent overflow
69
+
70
+ def generate_comment(diff_content):
71
+ client = OpenAI(
72
+ base_url=os.getenv("OPENAI_BASE_URL"),
73
+ api_key=os.getenv("OPENAI_API_KEY")
74
+ )
75
+
76
+ guidelines = '''
77
+ 1. English version first, Chinese Simplified version after
78
+ 2. Example format:
79
+ # Diff Report
80
+ ## English
81
+ - Added `ABC` class
82
+ - Fixed `f()` behavior in `foo` module
83
+
84
+ ### Comments Highlight
85
+ - `config.toml` needs to be configured properly to make sure new features work as expected.
86
+
87
+ ### Spelling/Offensive Content Check
88
+ - No spelling mistakes or offensive content found in the code or comments.
89
+
90
+ ## 中文(简体)
91
+ - 新增了 `ABC` 类
92
+ - `foo` 模块中的 `f()` 行为已修复
93
+
94
+ ### 评论高亮
95
+ - `config.toml` 需要正确配置才能确保新功能正常运行。
96
+
97
+ ### 内容检查
98
+ - 没有发现代码或注释中的拼写错误或不当措辞。
99
+
100
+ 3. Highlight non-English comments
101
+ 4. Check for spelling/offensive content'''
102
+
103
+ response = client.chat.completions.create(
104
+ model="o3-mini",
105
+ messages=[{
106
+ "role": "system",
107
+ "content": "Generate bilingual code review feedback."
108
+ }, {
109
+ "role": "user",
110
+ "content": f"Review these changes per guidelines:\n{guidelines}\n\nDIFF:\n{diff_content}"
111
+ }]
112
+ )
113
+ return response.choices[0].message.content
114
+
115
+ def post_comment(comment):
116
+ repo = os.getenv("GITHUB_REPOSITORY")
117
+ pr_number = os.getenv("PR_NUMBER")
118
+
119
+ headers = {
120
+ "Authorization": f"Bearer {os.getenv('GH_TOKEN')}",
121
+ "Accept": "application/vnd.github.v3+json"
122
+ }
123
+ url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
124
+
125
+ requests.post(url, json={"body": comment}, headers=headers)
126
+
127
+ if __name__ == "__main__":
128
+ diff_content = get_diff()
129
+ if not diff_content.strip():
130
+ print("No meaningful diff detected.")
131
+ exit(0)
132
+
133
+ comment = generate_comment(diff_content)
134
+ post_comment(comment)
135
+ print("Comment posted successfully.")
136
+ EOF
137
+
138
+ python /tmp/_workflow_core.py
OpenManus/.github/workflows/pre-commit.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Pre-commit checks
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - '**'
7
+ push:
8
+ branches:
9
+ - '**'
10
+
11
+ jobs:
12
+ pre-commit-check:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Checkout Source Code
16
+ uses: actions/checkout@v4
17
+ - name: Set up Python 3.12
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.12'
21
+ - name: Install pre-commit and tools
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install pre-commit black==23.1.0 isort==5.12.0 autoflake==2.0.1
25
+ - name: Run pre-commit hooks
26
+ run: pre-commit run --all-files
OpenManus/.github/workflows/stale.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Close inactive issues
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "5 0 * * *"
6
+
7
+ jobs:
8
+ close-issues:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ issues: write
12
+ pull-requests: write
13
+ steps:
14
+ - uses: actions/stale@v9
15
+ with:
16
+ days-before-issue-stale: 30
17
+ days-before-issue-close: 14
18
+ stale-issue-label: "inactive"
19
+ stale-issue-message: "This issue has been inactive for 30 days. Please comment if you have updates."
20
+ close-issue-message: "This issue was closed due to 45 days of inactivity. Reopen if still relevant."
21
+ days-before-pr-stale: -1
22
+ days-before-pr-close: -1
23
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
OpenManus/.github/workflows/top-issues.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Top issues
2
+ on:
3
+ schedule:
4
+ - cron: '0 0/2 * * *'
5
+ workflow_dispatch:
6
+ jobs:
7
+ ShowAndLabelTopIssues:
8
+ permissions:
9
+ issues: write
10
+ pull-requests: write
11
+ actions: read
12
+ contents: read
13
+ name: Display and label top issues
14
+ runs-on: ubuntu-latest
15
+ if: github.repository == 'mannaandpoem/OpenManus'
16
+ steps:
17
+ - name: Run top issues action
18
+ uses: rickstaa/top-issues-action@7e8dda5d5ae3087670f9094b9724a9a091fc3ba1 # v1.3.101
19
+ env:
20
+ github_token: ${{ secrets.GITHUB_TOKEN }}
21
+ with:
22
+ label: true
23
+ dashboard: true
24
+ dashboard_show_total_reactions: true
25
+ top_issues: true
26
+ top_features: true
27
+ top_bugs: true
28
+ top_pull_requests: true
29
+ top_list_size: 14
OpenManus/.gitignore ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Project-specific ###
2
+ # Logs
3
+ logs/
4
+
5
+ # Data
6
+ data/
7
+
8
+ # Workspace
9
+ workspace/
10
+
11
+ ### Python ###
12
+ # Byte-compiled / optimized / DLL files
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+
17
+ # C extensions
18
+ *.so
19
+
20
+ # Distribution / packaging
21
+ .Python
22
+ build/
23
+ develop-eggs/
24
+ dist/
25
+ downloads/
26
+ eggs/
27
+ .eggs/
28
+ lib/
29
+ lib64/
30
+ parts/
31
+ sdist/
32
+ var/
33
+ wheels/
34
+ share/python-wheels/
35
+ *.egg-info/
36
+ .installed.cfg
37
+ *.egg
38
+ MANIFEST
39
+
40
+ # PyInstaller
41
+ # Usually these files are written by a python script from a template
42
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
43
+ *.manifest
44
+ *.spec
45
+
46
+ # Installer logs
47
+ pip-log.txt
48
+ pip-delete-this-directory.txt
49
+
50
+ # Unit test / coverage reports
51
+ htmlcov/
52
+ .tox/
53
+ .nox/
54
+ .coverage
55
+ .coverage.*
56
+ .cache
57
+ nosetests.xml
58
+ coverage.xml
59
+ *.cover
60
+ *.py,cover
61
+ .hypothesis/
62
+ .pytest_cache/
63
+ cover/
64
+
65
+ # Translations
66
+ *.mo
67
+ *.pot
68
+
69
+ # Django stuff:
70
+ *.log
71
+ local_settings.py
72
+ db.sqlite3
73
+ db.sqlite3-journal
74
+
75
+ # Flask stuff:
76
+ instance/
77
+ .webassets-cache
78
+
79
+ # Scrapy stuff:
80
+ .scrapy
81
+
82
+ # Sphinx documentation
83
+ docs/_build/
84
+
85
+ # PyBuilder
86
+ .pybuilder/
87
+ target/
88
+
89
+ # Jupyter Notebook
90
+ .ipynb_checkpoints
91
+
92
+ # IPython
93
+ profile_default/
94
+ ipython_config.py
95
+
96
+ # pyenv
97
+ # For a library or package, you might want to ignore these files since the code is
98
+ # intended to run in multiple environments; otherwise, check them in:
99
+ # .python-version
100
+
101
+ # pipenv
102
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
104
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
105
+ # install all needed dependencies.
106
+ #Pipfile.lock
107
+
108
+ # UV
109
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
110
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
111
+ # commonly ignored for libraries.
112
+ #uv.lock
113
+
114
+ # poetry
115
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
116
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
117
+ # commonly ignored for libraries.
118
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
119
+ #poetry.lock
120
+
121
+ # pdm
122
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
123
+ #pdm.lock
124
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
125
+ # in version control.
126
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
127
+ .pdm.toml
128
+ .pdm-python
129
+ .pdm-build/
130
+
131
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
132
+ __pypackages__/
133
+
134
+ # Celery stuff
135
+ celerybeat-schedule
136
+ celerybeat.pid
137
+
138
+ # SageMath parsed files
139
+ *.sage.py
140
+
141
+ # Environments
142
+ .env
143
+ .venv
144
+ env/
145
+ venv/
146
+ ENV/
147
+ env.bak/
148
+ venv.bak/
149
+
150
+ # Spyder project settings
151
+ .spyderproject
152
+ .spyproject
153
+
154
+ # Rope project settings
155
+ .ropeproject
156
+
157
+ # mkdocs documentation
158
+ /site
159
+
160
+ # mypy
161
+ .mypy_cache/
162
+ .dmypy.json
163
+ dmypy.json
164
+
165
+ # Pyre type checker
166
+ .pyre/
167
+
168
+ # pytype static type analyzer
169
+ .pytype/
170
+
171
+ # Cython debug symbols
172
+ cython_debug/
173
+
174
+ # PyCharm
175
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
176
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
177
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
178
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
179
+ .idea/
180
+
181
+ # PyPI configuration file
182
+ .pypirc
183
+
184
+ ### Visual Studio Code ###
185
+ .vscode/*
186
+ !.vscode/settings.json
187
+ !.vscode/tasks.json
188
+ !.vscode/launch.json
189
+ !.vscode/extensions.json
190
+ !.vscode/*.code-snippets
191
+
192
+ # Local History for Visual Studio Code
193
+ .history/
194
+
195
+ # Built Visual Studio Code Extensions
196
+ *.vsix
197
+
198
+ # OSX
199
+ .DS_Store
OpenManus/.pre-commit-config.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/psf/black
3
+ rev: 23.1.0
4
+ hooks:
5
+ - id: black
6
+
7
+ - repo: https://github.com/pre-commit/pre-commit-hooks
8
+ rev: v4.4.0
9
+ hooks:
10
+ - id: trailing-whitespace
11
+ - id: end-of-file-fixer
12
+ - id: check-yaml
13
+ - id: check-added-large-files
14
+
15
+ - repo: https://github.com/PyCQA/autoflake
16
+ rev: v2.0.1
17
+ hooks:
18
+ - id: autoflake
19
+ args: [
20
+ --remove-all-unused-imports,
21
+ --ignore-init-module-imports,
22
+ --expand-star-imports,
23
+ --remove-duplicate-keys,
24
+ --remove-unused-variables,
25
+ --recursive,
26
+ --in-place,
27
+ --exclude=__init__.py,
28
+ ]
29
+ files: \.py$
30
+
31
+ - repo: https://github.com/pycqa/isort
32
+ rev: 5.12.0
33
+ hooks:
34
+ - id: isort
35
+ args: [
36
+ "--profile", "black",
37
+ "--filter-files",
38
+ "--lines-after-imports=2",
39
+ ]
OpenManus/.vscode/extensions.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "tamasfe.even-better-toml",
4
+ "ms-python.black-formatter",
5
+ "ms-python.isort"
6
+ ],
7
+ "unwantedRecommendations": []
8
+ }
OpenManus/.vscode/settings.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[python]": {
3
+ "editor.defaultFormatter": "ms-python.black-formatter",
4
+ "editor.codeActionsOnSave": {
5
+ "source.organizeImports": "always"
6
+ }
7
+ },
8
+ "[toml]": {
9
+ "editor.defaultFormatter": "tamasfe.even-better-toml",
10
+ },
11
+ "pre-commit-helper.runOnSave": "none",
12
+ "pre-commit-helper.config": ".pre-commit-config.yaml",
13
+ "evenBetterToml.schema.enabled": true,
14
+ "evenBetterToml.schema.associations": {
15
+ "^.+config[/\\\\].+\\.toml$": "../config/schema.config.json"
16
+ },
17
+ "files.insertFinalNewline": true,
18
+ "files.trimTrailingWhitespace": true,
19
+ "editor.formatOnSave": true
20
+ }
OpenManus/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, caste, color, religion, or sexual
10
+ identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people.
21
+ * Being respectful of differing opinions, viewpoints, and experiences.
22
+ * Giving and gracefully accepting constructive feedback.
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience.
25
+ * Focusing on what is best not just for us as individuals, but for the overall
26
+ community.
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or advances of
31
+ any kind.
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks.
33
+ * Public or private harassment.
34
+ * Publishing others' private information, such as a physical or email address,
35
+ without their explicit permission.
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting.
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official email address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
+ mannaandpoem@gmail.com
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series of
86
+ actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or permanent
93
+ ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within the
113
+ community.
114
+
115
+ ### Slack and Discord Etiquettes
116
+
117
+ These Slack and Discord etiquette guidelines are designed to foster an inclusive, respectful, and productive environment
118
+ for all community members. By following these best practices, we ensure effective communication and collaboration while
119
+ minimizing disruptions. Let’s work together to build a supportive and welcoming community!
120
+
121
+ - Communicate respectfully and professionally, avoiding sarcasm or harsh language, and remember that tone can be
122
+ difficult to interpret in text.
123
+ - Use threads for specific discussions to keep channels organized and easier to follow.
124
+ - Tag others only when their input is critical or urgent, and use @here, @channel or @everyone sparingly to minimize
125
+ disruptions.
126
+ - Be patient, as open-source contributors and maintainers often have other commitments and may need time to respond.
127
+ - Post questions or discussions in the most relevant
128
+ channel ([discord - #general](https://discord.com/channels/1125308739348594758/1138430348557025341)).
129
+ - When asking for help or raising issues, include necessary details like links, screenshots, or clear explanations to
130
+ provide context.
131
+ - Keep discussions in public channels whenever possible to allow others to benefit from the conversation, unless the
132
+ matter is sensitive or private.
133
+ - Always adhere to [our standards](https://github.com/mannaandpoem/OpenManus/blob/main/CODE_OF_CONDUCT.md#our-standards)
134
+ to ensure a welcoming and collaborative environment.
135
+ - If you choose to mute a channel, consider setting up alerts for topics that still interest you to stay engaged. For
136
+ Slack, Go to Settings → Notifications → My Keywords to add specific keywords that will notify you when mentioned. For
137
+ example, if you're here for discussions about LLMs, mute the channel if it’s too busy, but set notifications to alert
138
+ you only when “LLMs” appears in messages. Also for Discord, go to the channel notifications and choose the option that
139
+ best describes your need.
140
+
141
+ ## Attribution
142
+
143
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
144
+ version 2.1, available at
145
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
146
+
147
+ Community Impact Guidelines were inspired by
148
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
149
+
150
+ For answers to common questions about this code of conduct, see the FAQ at
151
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
152
+ [https://www.contributor-covenant.org/translations][translations].
153
+
154
+ [homepage]: https://www.contributor-covenant.org
155
+
156
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
157
+
158
+ [Mozilla CoC]: https://github.com/mozilla/diversity
159
+
160
+ [FAQ]: https://www.contributor-covenant.org/faq
161
+
162
+ [translations]: https://www.contributor-covenant.org/translations
OpenManus/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app/OpenManus
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends git curl \
6
+ && rm -rf /var/lib/apt/lists/* \
7
+ && (command -v uv >/dev/null 2>&1 || pip install --no-cache-dir uv)
8
+
9
+ COPY . .
10
+
11
+ RUN uv pip install --system -r requirements.txt
12
+
13
+ CMD ["bash"]
OpenManus/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 manna_and_poem
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
OpenManus/README.md ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/logo.jpg" width="200"/>
3
+ </p>
4
+
5
+ English | [中文](README_zh.md) | [한국어](README_ko.md) | [日本語](README_ja.md)
6
+
7
+ [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
8
+ &ensp;
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
10
+ [![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
11
+ [![Demo](https://img.shields.io/badge/Demo-Hugging%20Face-yellow)](https://huggingface.co/spaces/lyh-917/OpenManusDemo)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15186407.svg)](https://doi.org/10.5281/zenodo.15186407)
13
+
14
+ # 👋 OpenManus
15
+
16
+ Manus is incredible, but OpenManus can achieve any idea without an *Invite Code* 🛫!
17
+
18
+ Our team members [@Xinbin Liang](https://github.com/mannaandpoem) and [@Jinyu Xiang](https://github.com/XiangJinyu) (core authors), along with [@Zhaoyang Yu](https://github.com/MoshiQAQ), [@Jiayi Zhang](https://github.com/didiforgithub), and [@Sirui Hong](https://github.com/stellaHSR), we are from [@MetaGPT](https://github.com/geekan/MetaGPT). The prototype is launched within 3 hours and we are keeping building!
19
+
20
+ It's a simple implementation, so we welcome any suggestions, contributions, and feedback!
21
+
22
+ Enjoy your own agent with OpenManus!
23
+
24
+ We're also excited to introduce [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL), an open-source project dedicated to reinforcement learning (RL)- based (such as GRPO) tuning methods for LLM agents, developed collaboratively by researchers from UIUC and OpenManus.
25
+
26
+ ## Project Demo
27
+
28
+ <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
29
+
30
+ ## Installation
31
+
32
+ We provide two installation methods. Method 2 (using uv) is recommended for faster installation and better dependency management.
33
+
34
+ ### Method 1: Using conda
35
+
36
+ 1. Create a new conda environment:
37
+
38
+ ```bash
39
+ conda create -n open_manus python=3.12
40
+ conda activate open_manus
41
+ ```
42
+
43
+ 2. Clone the repository:
44
+
45
+ ```bash
46
+ git clone https://github.com/mannaandpoem/OpenManus.git
47
+ cd OpenManus
48
+ ```
49
+
50
+ 3. Install dependencies:
51
+
52
+ ```bash
53
+ pip install -r requirements.txt
54
+ ```
55
+
56
+ ### Method 2: Using uv (Recommended)
57
+
58
+ 1. Install uv (A fast Python package installer and resolver):
59
+
60
+ ```bash
61
+ curl -LsSf https://astral.sh/uv/install.sh | sh
62
+ ```
63
+
64
+ 2. Clone the repository:
65
+
66
+ ```bash
67
+ git clone https://github.com/mannaandpoem/OpenManus.git
68
+ cd OpenManus
69
+ ```
70
+
71
+ 3. Create a new virtual environment and activate it:
72
+
73
+ ```bash
74
+ uv venv --python 3.12
75
+ source .venv/bin/activate # On Unix/macOS
76
+ # Or on Windows:
77
+ # .venv\Scripts\activate
78
+ ```
79
+
80
+ 4. Install dependencies:
81
+
82
+ ```bash
83
+ uv pip install -r requirements.txt
84
+ ```
85
+
86
+ ### Browser Automation Tool (Optional)
87
+ ```bash
88
+ playwright install
89
+ ```
90
+
91
+ ## Configuration
92
+
93
+ OpenManus requires configuration for the LLM APIs it uses. Follow these steps to set up your configuration:
94
+
95
+ 1. Create a `config.toml` file in the `config` directory (you can copy from the example):
96
+
97
+ ```bash
98
+ cp config/config.example.toml config/config.toml
99
+ ```
100
+
101
+ 2. Edit `config/config.toml` to add your API keys and customize settings:
102
+
103
+ ```toml
104
+ # Global LLM configuration
105
+ [llm]
106
+ model = "gpt-4o"
107
+ base_url = "https://api.openai.com/v1"
108
+ api_key = "sk-..." # Replace with your actual API key
109
+ max_tokens = 4096
110
+ temperature = 0.0
111
+
112
+ # Optional configuration for specific LLM models
113
+ [llm.vision]
114
+ model = "gpt-4o"
115
+ base_url = "https://api.openai.com/v1"
116
+ api_key = "sk-..." # Replace with your actual API key
117
+ ```
118
+
119
+ ## Quick Start
120
+
121
+ One line for run OpenManus:
122
+
123
+ ```bash
124
+ python main.py
125
+ ```
126
+
127
+ Then input your idea via terminal!
128
+
129
+ For MCP tool version, you can run:
130
+ ```bash
131
+ python run_mcp.py
132
+ ```
133
+
134
+ For unstable multi-agent version, you also can run:
135
+
136
+ ```bash
137
+ python run_flow.py
138
+ ```
139
+
140
+ ## How to contribute
141
+
142
+ We welcome any friendly suggestions and helpful contributions! Just create issues or submit pull requests.
143
+
144
+ Or contact @mannaandpoem via 📧email: mannaandpoem@gmail.com
145
+
146
+ **Note**: Before submitting a pull request, please use the pre-commit tool to check your changes. Run `pre-commit run --all-files` to execute the checks.
147
+
148
+ ## Community Group
149
+ Join our networking group on Feishu and share your experience with other developers!
150
+
151
+ <div align="center" style="display: flex; gap: 20px;">
152
+ <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
153
+ </div>
154
+
155
+ ## Star History
156
+
157
+ [![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
158
+
159
+ ## Sponsors
160
+ Thanks to [PPIO](https://ppinfra.com/user/register?invited_by=OCPKCN&utm_source=github_openmanus&utm_medium=github_readme&utm_campaign=link) for computing source support.
161
+ > PPIO: The most affordable and easily-integrated MaaS and GPU cloud solution.
162
+
163
+
164
+ ## Acknowledgement
165
+
166
+ Thanks to [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
167
+ and [browser-use](https://github.com/browser-use/browser-use) for providing basic support for this project!
168
+
169
+ Additionally, we are grateful to [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands) and [SWE-agent](https://github.com/SWE-agent/SWE-agent).
170
+
171
+ We also thank stepfun(阶跃星辰) for supporting our Hugging Face demo space.
172
+
173
+ OpenManus is built by contributors from MetaGPT. Huge thanks to this agent community!
174
+
175
+ ## Cite
176
+ ```bibtex
177
+ @misc{openmanus2025,
178
+ author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong and Sheng Fan and Xiao Tang},
179
+ title = {OpenManus: An open-source framework for building general AI agents},
180
+ year = {2025},
181
+ publisher = {Zenodo},
182
+ doi = {10.5281/zenodo.15186407},
183
+ url = {https://doi.org/10.5281/zenodo.15186407},
184
+ }
185
+ ```
OpenManus/README_ja.md ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/logo.jpg" width="200"/>
3
+ </p>
4
+
5
+ [English](README.md) | [中文](README_zh.md) | [한국어](README_ko.md) | 日本語
6
+
7
+ [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
8
+ &ensp;
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
10
+ [![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
11
+ [![Demo](https://img.shields.io/badge/Demo-Hugging%20Face-yellow)](https://huggingface.co/spaces/lyh-917/OpenManusDemo)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15186407.svg)](https://doi.org/10.5281/zenodo.15186407)
13
+
14
+ # 👋 OpenManus
15
+
16
+ Manusは素晴らしいですが、OpenManusは*招待コード*なしでどんなアイデアも実現できます!🛫
17
+
18
+ 私たちのチームメンバー [@Xinbin Liang](https://github.com/mannaandpoem) と [@Jinyu Xiang](https://github.com/XiangJinyu)(主要開発者)、そして [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub)、[@Sirui Hong](https://github.com/stellaHSR) は [@MetaGPT](https://github.com/geekan/MetaGPT) から来ました。プロトタイプは3時間以内に立ち上げられ、継続的に開発を進めています!
19
+
20
+ これはシンプルな実装ですので、どんな提案、貢献、フィードバックも歓迎します!
21
+
22
+ OpenManusで自分だけのエージェントを楽しみましょう!
23
+
24
+ また、UIUCとOpenManusの研究者が共同開発した[OpenManus-RL](https://github.com/OpenManus/OpenManus-RL)をご紹介できることを嬉しく思います。これは強化学習(RL)ベース(GRPOなど)のLLMエージェントチューニング手法に特化したオープンソースプロジェクトです。
25
+
26
+ ## プロジェクトデモ
27
+
28
+ <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
29
+
30
+ ## インストール方法
31
+
32
+ インストール方法は2つ提供しています。方法2(uvを使用)は、より高速なインストールと優れた依存関係管理のため推奨されています。
33
+
34
+ ### 方法1:condaを使用
35
+
36
+ 1. 新しいconda環境を作成します:
37
+
38
+ ```bash
39
+ conda create -n open_manus python=3.12
40
+ conda activate open_manus
41
+ ```
42
+
43
+ 2. リポジトリをクローンします:
44
+
45
+ ```bash
46
+ git clone https://github.com/mannaandpoem/OpenManus.git
47
+ cd OpenManus
48
+ ```
49
+
50
+ 3. 依存関係をインストールします:
51
+
52
+ ```bash
53
+ pip install -r requirements.txt
54
+ ```
55
+
56
+ ### 方法2:uvを使用(推奨)
57
+
58
+ 1. uv(高速なPythonパッケージインストーラーと管理機能)をインストールします:
59
+
60
+ ```bash
61
+ curl -LsSf https://astral.sh/uv/install.sh | sh
62
+ ```
63
+
64
+ 2. リポジトリをクローンします:
65
+
66
+ ```bash
67
+ git clone https://github.com/mannaandpoem/OpenManus.git
68
+ cd OpenManus
69
+ ```
70
+
71
+ 3. 新しい仮想環境を作成してアクティベートします:
72
+
73
+ ```bash
74
+ uv venv --python 3.12
75
+ source .venv/bin/activate # Unix/macOSの場合
76
+ # Windowsの場合:
77
+ # .venv\Scripts\activate
78
+ ```
79
+
80
+ 4. 依存関係をインストールします:
81
+
82
+ ```bash
83
+ uv pip install -r requirements.txt
84
+ ```
85
+
86
+ ### ブラウザ自動化ツール(オプション)
87
+ ```bash
88
+ playwright install
89
+ ```
90
+
91
+ ## 設定
92
+
93
+ OpenManusを使用するには、LLM APIの設定が必要です。以下の手順に従って設定してください:
94
+
95
+ 1. `config`ディレクトリに`config.toml`ファイルを作成します(サンプルからコピーできます):
96
+
97
+ ```bash
98
+ cp config/config.example.toml config/config.toml
99
+ ```
100
+
101
+ 2. `config/config.toml`を編集してAPIキーを追加し、設定をカスタマイズします:
102
+
103
+ ```toml
104
+ # グローバルLLM設定
105
+ [llm]
106
+ model = "gpt-4o"
107
+ base_url = "https://api.openai.com/v1"
108
+ api_key = "sk-..." # 実際のAPIキーに置き換えてください
109
+ max_tokens = 4096
110
+ temperature = 0.0
111
+
112
+ # 特定のLLMモデル用のオプション設定
113
+ [llm.vision]
114
+ model = "gpt-4o"
115
+ base_url = "https://api.openai.com/v1"
116
+ api_key = "sk-..." # 実際のAPIキーに置き換えてください
117
+ ```
118
+
119
+ ## クイックスタート
120
+
121
+ OpenManusを実行する一行コマンド:
122
+
123
+ ```bash
124
+ python main.py
125
+ ```
126
+
127
+ その後、ターミナルからプロンプトを入力してください!
128
+
129
+ MCP ツールバージョンを使用する場合は、以下を実行します:
130
+ ```bash
131
+ python run_mcp.py
132
+ ```
133
+
134
+ 開発中のマルチエージェントバージョンを試すには、以下を実行します:
135
+
136
+ ```bash
137
+ python run_flow.py
138
+ ```
139
+
140
+ ## 貢献方法
141
+
142
+ 我々は建設的な意見や有益な貢献を歓迎します!issueを作成するか、プルリクエストを提出してください。
143
+
144
+ または @mannaandpoem に📧メールでご連絡ください:mannaandpoem@gmail.com
145
+
146
+ **注意**: プルリクエストを送信する前に、pre-commitツールを使用して変更を確認してください。`pre-commit run --all-files`を実行してチェックを実行します。
147
+
148
+ ## コミュニティグループ
149
+ Feishuのネットワーキンググループに参加して、他の開発者と経験を共有しましょう!
150
+
151
+ <div align="center" style="display: flex; gap: 20px;">
152
+ <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
153
+ </div>
154
+
155
+ ## スター履歴
156
+
157
+ [![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
158
+
159
+ ## 謝辞
160
+
161
+ このプロジェクトの基本的なサポートを提供してくれた[anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
162
+ と[browser-use](https://github.com/browser-use/browser-use)に感謝します!
163
+
164
+ さらに、[AAAJ](https://github.com/metauto-ai/agent-as-a-judge)、[MetaGPT](https://github.com/geekan/MetaGPT)、[OpenHands](https://github.com/All-Hands-AI/OpenHands)、[SWE-agent](https://github.com/SWE-agent/SWE-agent)にも感謝します。
165
+
166
+ また、Hugging Face デモスペースをサポートしてくださった阶跃星辰 (stepfun)にも感謝いたします。
167
+
168
+ OpenManusはMetaGPTのコントリビューターによって構築されました。このエージェントコミュニティに大きな感謝を!
169
+
170
+ ## 引用
171
+ ```bibtex
172
+ @misc{openmanus2025,
173
+ author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong and Sheng Fan and Xiao Tang},
174
+ title = {OpenManus: An open-source framework for building general AI agents},
175
+ year = {2025},
176
+ publisher = {Zenodo},
177
+ doi = {10.5281/zenodo.15186407},
178
+ url = {https://doi.org/10.5281/zenodo.15186407},
179
+ }
180
+ ```
OpenManus/README_ko.md ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/logo.jpg" width="200"/>
3
+ </p>
4
+
5
+ [English](README.md) | [中文](README_zh.md) | 한국어 | [日本語](README_ja.md)
6
+
7
+ [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
8
+ &ensp;
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
10
+ [![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
11
+ [![Demo](https://img.shields.io/badge/Demo-Hugging%20Face-yellow)](https://huggingface.co/spaces/lyh-917/OpenManusDemo)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15186407.svg)](https://doi.org/10.5281/zenodo.15186407)
13
+
14
+ # 👋 OpenManus
15
+
16
+ Manus는 놀라운 도구지만, OpenManus는 *초대 코드* 없이도 모든 아이디어를 실현할 수 있습니다! 🛫
17
+
18
+ 우리 팀의 멤버인 [@Xinbin Liang](https://github.com/mannaandpoem)와 [@Jinyu Xiang](https://github.com/XiangJinyu) (핵심 작성자), 그리고 [@Zhaoyang Yu](https://github.com/MoshiQAQ), [@Jiayi Zhang](https://github.com/didiforgithub), [@Sirui Hong](https://github.com/stellaHSR)이 함께 했습니다. 우리는 [@MetaGPT](https://github.com/geekan/MetaGPT)로부터 왔습니다. 프로토타입은 단 3시간 만에 출시되었으며, 계속해서 발전하고 있습니다!
19
+
20
+ 이 프로젝트는 간단한 구현에서 시작되었으며, 여러분의 제안, 기여 및 피드백을 환영합니다!
21
+
22
+ OpenManus를 통해 여러분만의 에이전트를 즐겨보세요!
23
+
24
+ 또한 [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL)을 소개하게 되어 기쁩니다. OpenManus와 UIUC 연구자들이 공동 개발한 이 오픈소스 프로젝트는 LLM 에이전트에 대해 강화 학습(RL) 기반 (예: GRPO) 튜닝 방법을 제공합니다.
25
+
26
+ ## 프로젝트 데모
27
+
28
+ <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
29
+
30
+ ## 설치 방법
31
+
32
+ 두 가지 설치 방법을 제공합니다. **방법 2 (uv 사용)** 이 더 빠른 설치와 효율적인 종속성 관리를 위해 권장됩니다.
33
+
34
+ ### 방법 1: conda 사용
35
+
36
+ 1. 새로운 conda 환경을 생성합니다:
37
+
38
+ ```bash
39
+ conda create -n open_manus python=3.12
40
+ conda activate open_manus
41
+ ```
42
+
43
+ 2. 저장소를 클론합니다:
44
+
45
+ ```bash
46
+ git clone https://github.com/mannaandpoem/OpenManus.git
47
+ cd OpenManus
48
+ ```
49
+
50
+ 3. 종속성을 설치합니다:
51
+
52
+ ```bash
53
+ pip install -r requirements.txt
54
+ ```
55
+
56
+ ### 방법 2: uv 사용 (권장)
57
+
58
+ 1. uv를 설치합니다. (빠른 Python 패키지 설치 및 종속성 관리 도구):
59
+
60
+ ```bash
61
+ curl -LsSf https://astral.sh/uv/install.sh | sh
62
+ ```
63
+
64
+ 2. 저장소를 클론합니다:
65
+
66
+ ```bash
67
+ git clone https://github.com/mannaandpoem/OpenManus.git
68
+ cd OpenManus
69
+ ```
70
+
71
+ 3. 새로운 가상 환경을 생성하고 활성화합니다:
72
+
73
+ ```bash
74
+ uv venv --python 3.12
75
+ source .venv/bin/activate # Unix/macOS의 경우
76
+ # Windows의 경우:
77
+ # .venv\Scripts\activate
78
+ ```
79
+
80
+ 4. 종속성을 설치합니다:
81
+
82
+ ```bash
83
+ uv pip install -r requirements.txt
84
+ ```
85
+
86
+ ### 브라우저 자동화 도구 (선택사항)
87
+ ```bash
88
+ playwright install
89
+ ```
90
+
91
+ ## 설정 방법
92
+
93
+ OpenManus를 사용하려면 사용하는 LLM API에 대한 설정이 필요합니다. 아래 단계를 따라 설정을 완료하세요:
94
+
95
+ 1. `config` 디렉토리에 `config.toml` 파일을 생성하세요 (예제 파일을 복사하여 사용할 수 있습니다):
96
+
97
+ ```bash
98
+ cp config/config.example.toml config/config.toml
99
+ ```
100
+
101
+ 2. `config/config.toml` 파일을 편집하여 API 키를 추가하고 설정을 커스터마이징하세요:
102
+
103
+ ```toml
104
+ # 전역 LLM 설정
105
+ [llm]
106
+ model = "gpt-4o"
107
+ base_url = "https://api.openai.com/v1"
108
+ api_key = "sk-..." # 실제 API 키로 변경하세요
109
+ max_tokens = 4096
110
+ temperature = 0.0
111
+
112
+ # 특정 LLM 모델에 대한 선택적 설정
113
+ [llm.vision]
114
+ model = "gpt-4o"
115
+ base_url = "https://api.openai.com/v1"
116
+ api_key = "sk-..." # 실제 API 키로 변경하세요
117
+ ```
118
+
119
+ ## 빠른 시작
120
+
121
+ OpenManus를 실행하는 한 줄 명령어:
122
+
123
+ ```bash
124
+ python main.py
125
+ ```
126
+
127
+ 이후 터미널에서 아이디어를 작성하세요!
128
+
129
+ MCP 도구 버전을 사용하려면 다음을 실행하세요:
130
+ ```bash
131
+ python run_mcp.py
132
+ ```
133
+
134
+ 불안정한 멀티 에이전트 버전을 실행하려면 다음을 실행할 수 있습니다:
135
+
136
+ ```bash
137
+ python run_flow.py
138
+ ```
139
+
140
+ ## 기여 방법
141
+
142
+ 모든 친절한 제안과 유용한 기여를 환영합니다! 이슈를 생성하거나 풀 리퀘스트를 제출해 주세요.
143
+
144
+ 또는 📧 메일로 연락주세요. @mannaandpoem : mannaandpoem@gmail.com
145
+
146
+ **참고**: pull request를 제출하기 전에 pre-commit 도구를 사용하여 변경 사항을 확인하십시오. `pre-commit run --all-files`를 실행하여 검사를 실행합니다.
147
+
148
+ ## 커뮤니티 그룹
149
+ Feishu 네트워킹 그룹에 참여하여 다른 개발자들과 경험을 공유하세요!
150
+
151
+ <div align="center" style="display: flex; gap: 20px;">
152
+ <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
153
+ </div>
154
+
155
+ ## Star History
156
+
157
+ [![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
158
+
159
+ ## 감사의 글
160
+
161
+ 이 프로젝트에 기본적인 지원을 제공해 주신 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)와
162
+ [browser-use](https://github.com/browser-use/browser-use)에게 감사드립니다!
163
+
164
+ 또한, [AAAJ](https://github.com/metauto-ai/agent-as-a-judge), [MetaGPT](https://github.com/geekan/MetaGPT), [OpenHands](https://github.com/All-Hands-AI/OpenHands), [SWE-agent](https://github.com/SWE-agent/SWE-agent)에 깊은 감사를 드립니다.
165
+
166
+ 또한 Hugging Face 데모 공간을 지원해 주신 阶跃星辰 (stepfun)에게 감사드립니다.
167
+
168
+ OpenManus는 MetaGPT 기여자들에 의해 개발되었습니다. 이 에이전트 커뮤니티에 깊은 감사를 전합니다!
169
+
170
+ ## 인용
171
+ ```bibtex
172
+ @misc{openmanus2025,
173
+ author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong and Sheng Fan and Xiao Tang},
174
+ title = {OpenManus: An open-source framework for building general AI agents},
175
+ year = {2025},
176
+ publisher = {Zenodo},
177
+ doi = {10.5281/zenodo.15186407},
178
+ url = {https://doi.org/10.5281/zenodo.15186407},
179
+ }
180
+ ```
OpenManus/README_zh.md ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <img src="assets/logo.jpg" width="200"/>
3
+ </p>
4
+
5
+ [English](README.md) | 中文 | [한국어](README_ko.md) | [日本語](README_ja.md)
6
+
7
+ [![GitHub stars](https://img.shields.io/github/stars/mannaandpoem/OpenManus?style=social)](https://github.com/mannaandpoem/OpenManus/stargazers)
8
+ &ensp;
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) &ensp;
10
+ [![Discord Follow](https://dcbadge.vercel.app/api/server/DYn29wFk9z?style=flat)](https://discord.gg/DYn29wFk9z)
11
+ [![Demo](https://img.shields.io/badge/Demo-Hugging%20Face-yellow)](https://huggingface.co/spaces/lyh-917/OpenManusDemo)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15186407.svg)](https://doi.org/10.5281/zenodo.15186407)
13
+
14
+ # 👋 OpenManus
15
+
16
+ Manus 非常棒,但 OpenManus 无需邀请码即可实现任何创意 🛫!
17
+
18
+ 我们的团队成员 [@Xinbin Liang](https://github.com/mannaandpoem) 和 [@Jinyu Xiang](https://github.com/XiangJinyu)(核心作者),以及 [@Zhaoyang Yu](https://github.com/MoshiQAQ)、[@Jiayi Zhang](https://github.com/didiforgithub) 和 [@Sirui Hong](https://github.com/stellaHSR),来自 [@MetaGPT](https://github.com/geekan/MetaGPT)团队。我们在 3
19
+ 小时内完成了开发并持续迭代中!
20
+
21
+ 这是一个简洁的实现方案,欢迎任何建议、贡献和反馈!
22
+
23
+ 用 OpenManus 开启你的智能体之旅吧!
24
+
25
+ 我们也非常高兴地向大家介绍 [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL),这是一个专注于基于强化学习(RL,例如 GRPO)的方法来优化大语言模型(LLM)智能体的开源项目,由来自UIUC 和 OpenManus 的研究人员合作开发。
26
+
27
+ ## 项目演示
28
+
29
+ <video src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" data-canonical-src="https://private-user-images.githubusercontent.com/61239030/420168772-6dcfd0d2-9142-45d9-b74e-d10aa75073c6.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NDEzMTgwNTksIm5iZiI6MTc0MTMxNzc1OSwicGF0aCI6Ii82MTIzOTAzMC80MjAxNjg3NzItNmRjZmQwZDItOTE0Mi00NWQ5LWI3NGUtZDEwYWE3NTA3M2M2Lm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAzMDclMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMzA3VDAzMjIzOVomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTdiZjFkNjlmYWNjMmEzOTliM2Y3M2VlYjgyNDRlZDJmOWE3NWZhZjE1MzhiZWY4YmQ3NjdkNTYwYTU5ZDA2MzYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.UuHQCgWYkh0OQq9qsUWqGsUbhG3i9jcZDAMeHjLt5T4" controls="controls" muted="muted" class="d-block rounded-bottom-2 border-top width-fit" style="max-height:640px; min-height: 200px"></video>
30
+
31
+ ## 安装指南
32
+
33
+ 我们提供两种安装方式。推荐使用方式二(uv),因为它能提供更快的安装速度和更好的依赖管理。
34
+
35
+ ### 方式一:使用 conda
36
+
37
+ 1. 创建新的 conda 环境:
38
+
39
+ ```bash
40
+ conda create -n open_manus python=3.12
41
+ conda activate open_manus
42
+ ```
43
+
44
+ 2. 克隆仓库:
45
+
46
+ ```bash
47
+ git clone https://github.com/mannaandpoem/OpenManus.git
48
+ cd OpenManus
49
+ ```
50
+
51
+ 3. 安装依赖:
52
+
53
+ ```bash
54
+ pip install -r requirements.txt
55
+ ```
56
+
57
+ ### 方式二:使用 uv(推荐)
58
+
59
+ 1. 安装 uv(一个快速的 Python 包管理器):
60
+
61
+ ```bash
62
+ curl -LsSf https://astral.sh/uv/install.sh | sh
63
+ ```
64
+
65
+ 2. 克隆仓库:
66
+
67
+ ```bash
68
+ git clone https://github.com/mannaandpoem/OpenManus.git
69
+ cd OpenManus
70
+ ```
71
+
72
+ 3. 创建并激活虚拟环境:
73
+
74
+ ```bash
75
+ uv venv --python 3.12
76
+ source .venv/bin/activate # Unix/macOS 系统
77
+ # Windows 系统使用:
78
+ # .venv\Scripts\activate
79
+ ```
80
+
81
+ 4. 安装依赖:
82
+
83
+ ```bash
84
+ uv pip install -r requirements.txt
85
+ ```
86
+
87
+ ### 浏览器自动化工具(可选)
88
+ ```bash
89
+ playwright install
90
+ ```
91
+
92
+ ## 配置说明
93
+
94
+ OpenManus 需要配置使用的 LLM API,请按以下步骤设置:
95
+
96
+ 1. 在 `config` 目录创建 `config.toml` 文件(可从示例复制):
97
+
98
+ ```bash
99
+ cp config/config.example.toml config/config.toml
100
+ ```
101
+
102
+ 2. 编辑 `config/config.toml` 添加 API 密钥和自定义设置:
103
+
104
+ ```toml
105
+ # 全局 LLM 配置
106
+ [llm]
107
+ model = "gpt-4o"
108
+ base_url = "https://api.openai.com/v1"
109
+ api_key = "sk-..." # 替换为真实 API 密钥
110
+ max_tokens = 4096
111
+ temperature = 0.0
112
+
113
+ # 可选特定 LLM 模型配置
114
+ [llm.vision]
115
+ model = "gpt-4o"
116
+ base_url = "https://api.openai.com/v1"
117
+ api_key = "sk-..." # 替换为真实 API 密钥
118
+ ```
119
+
120
+ ## 快速启动
121
+
122
+ 一行命令运行 OpenManus:
123
+
124
+ ```bash
125
+ python main.py
126
+ ```
127
+
128
+ 然后通过终端输入你的创意!
129
+
130
+ 如需使用 MCP 工具版本,可运行:
131
+ ```bash
132
+ python run_mcp.py
133
+ ```
134
+
135
+ 如需体验不稳定的多智能体版本,可运行:
136
+
137
+ ```bash
138
+ python run_flow.py
139
+ ```
140
+
141
+ ## 贡献指南
142
+
143
+ 我们欢迎任何友好的建议和有价值的贡献!可以直接创建 issue 或提交 pull request。
144
+
145
+ 或通过 📧 邮件联系 @mannaandpoem:mannaandpoem@gmail.com
146
+
147
+ **注意**: 在提交 pull request 之前,请使用 pre-commit 工具检查您的更改。运行 `pre-commit run --all-files` 来执行检查。
148
+
149
+ ## 交流群
150
+
151
+ 加入我们的飞书交流群,与其他开发者分享经验!
152
+
153
+ <div align="center" style="display: flex; gap: 20px;">
154
+ <img src="assets/community_group.jpg" alt="OpenManus 交流群" width="300" />
155
+ </div>
156
+
157
+ ## Star 数量
158
+
159
+ [![Star History Chart](https://api.star-history.com/svg?repos=mannaandpoem/OpenManus&type=Date)](https://star-history.com/#mannaandpoem/OpenManus&Date)
160
+
161
+
162
+ ## 赞助商
163
+ 感谢[PPIO](https://ppinfra.com/user/register?invited_by=OCPKCN&utm_source=github_openmanus&utm_medium=github_readme&utm_campaign=link) 提供的算力支持。
164
+ > PPIO派欧云:一键调用高性价比的开源模型API和GPU容器
165
+
166
+ ## 致谢
167
+
168
+ 特别感谢 [anthropic-computer-use](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo)
169
+ 和 [browser-use](https://github.com/browser-use/browser-use) 为本项目提供的基础支持!
170
+
171
+ 此外,我们感谢 [AAAJ](https://github.com/metauto-ai/agent-as-a-judge),[MetaGPT](https://github.com/geekan/MetaGPT),[OpenHands](https://github.com/All-Hands-AI/OpenHands) 和 [SWE-agent](https://github.com/SWE-agent/SWE-agent).
172
+
173
+ 我们也感谢阶跃星辰 (stepfun) 提供的 Hugging Face 演示空间支持。
174
+
175
+ OpenManus 由 MetaGPT 社区的贡献者共同构建,感谢这个充满活力的智能体开发者社区!
176
+
177
+ ## 引用
178
+ ```bibtex
179
+ @misc{openmanus2025,
180
+ author = {Xinbin Liang and Jinyu Xiang and Zhaoyang Yu and Jiayi Zhang and Sirui Hong and Sheng Fan and Xiao Tang},
181
+ title = {OpenManus: An open-source framework for building general AI agents},
182
+ year = {2025},
183
+ publisher = {Zenodo},
184
+ doi = {10.5281/zenodo.15186407},
185
+ url = {https://doi.org/10.5281/zenodo.15186407},
186
+ }
187
+ ```
OpenManus/app/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python version check: 3.11-3.13
2
+ import sys
3
+
4
+
5
+ if sys.version_info < (3, 11) or sys.version_info > (3, 13):
6
+ print(
7
+ "Warning: Unsupported Python version {ver}, please use 3.11-3.13".format(
8
+ ver=".".join(map(str, sys.version_info))
9
+ )
10
+ )
OpenManus/app/agent/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.agent.base import BaseAgent
2
+ from app.agent.browser import BrowserAgent
3
+ from app.agent.mcp import MCPAgent
4
+ from app.agent.react import ReActAgent
5
+ from app.agent.swe import SWEAgent
6
+ from app.agent.toolcall import ToolCallAgent
7
+
8
+
9
+ __all__ = [
10
+ "BaseAgent",
11
+ "BrowserAgent",
12
+ "ReActAgent",
13
+ "SWEAgent",
14
+ "ToolCallAgent",
15
+ "MCPAgent",
16
+ ]
OpenManus/app/agent/base.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from contextlib import asynccontextmanager
3
+ from typing import List, Optional
4
+
5
+ from pydantic import BaseModel, Field, model_validator
6
+
7
+ from app.llm import LLM
8
+ from app.logger import logger
9
+ from app.sandbox.client import SANDBOX_CLIENT
10
+ from app.schema import ROLE_TYPE, AgentState, Memory, Message
11
+
12
+
13
+ class BaseAgent(BaseModel, ABC):
14
+ """Abstract base class for managing agent state and execution.
15
+
16
+ Provides foundational functionality for state transitions, memory management,
17
+ and a step-based execution loop. Subclasses must implement the `step` method.
18
+ """
19
+
20
+ # Core attributes
21
+ name: str = Field(..., description="Unique name of the agent")
22
+ description: Optional[str] = Field(None, description="Optional agent description")
23
+
24
+ # Prompts
25
+ system_prompt: Optional[str] = Field(
26
+ None, description="System-level instruction prompt"
27
+ )
28
+ next_step_prompt: Optional[str] = Field(
29
+ None, description="Prompt for determining next action"
30
+ )
31
+
32
+ # Dependencies
33
+ llm: LLM = Field(default_factory=LLM, description="Language model instance")
34
+ memory: Memory = Field(default_factory=Memory, description="Agent's memory store")
35
+ state: AgentState = Field(
36
+ default=AgentState.IDLE, description="Current agent state"
37
+ )
38
+
39
+ # Execution control
40
+ max_steps: int = Field(default=10, description="Maximum steps before termination")
41
+ current_step: int = Field(default=0, description="Current step in execution")
42
+
43
+ duplicate_threshold: int = 2
44
+
45
+ class Config:
46
+ arbitrary_types_allowed = True
47
+ extra = "allow" # Allow extra fields for flexibility in subclasses
48
+
49
+ @model_validator(mode="after")
50
+ def initialize_agent(self) -> "BaseAgent":
51
+ """Initialize agent with default settings if not provided."""
52
+ if self.llm is None or not isinstance(self.llm, LLM):
53
+ self.llm = LLM(config_name=self.name.lower())
54
+ if not isinstance(self.memory, Memory):
55
+ self.memory = Memory()
56
+ return self
57
+
58
+ @asynccontextmanager
59
+ async def state_context(self, new_state: AgentState):
60
+ """Context manager for safe agent state transitions.
61
+
62
+ Args:
63
+ new_state: The state to transition to during the context.
64
+
65
+ Yields:
66
+ None: Allows execution within the new state.
67
+
68
+ Raises:
69
+ ValueError: If the new_state is invalid.
70
+ """
71
+ if not isinstance(new_state, AgentState):
72
+ raise ValueError(f"Invalid state: {new_state}")
73
+
74
+ previous_state = self.state
75
+ self.state = new_state
76
+ try:
77
+ yield
78
+ except Exception as e:
79
+ self.state = AgentState.ERROR # Transition to ERROR on failure
80
+ raise e
81
+ finally:
82
+ self.state = previous_state # Revert to previous state
83
+
84
+ def update_memory(
85
+ self,
86
+ role: ROLE_TYPE, # type: ignore
87
+ content: str,
88
+ base64_image: Optional[str] = None,
89
+ **kwargs,
90
+ ) -> None:
91
+ """Add a message to the agent's memory.
92
+
93
+ Args:
94
+ role: The role of the message sender (user, system, assistant, tool).
95
+ content: The message content.
96
+ base64_image: Optional base64 encoded image.
97
+ **kwargs: Additional arguments (e.g., tool_call_id for tool messages).
98
+
99
+ Raises:
100
+ ValueError: If the role is unsupported.
101
+ """
102
+ message_map = {
103
+ "user": Message.user_message,
104
+ "system": Message.system_message,
105
+ "assistant": Message.assistant_message,
106
+ "tool": lambda content, **kw: Message.tool_message(content, **kw),
107
+ }
108
+
109
+ if role not in message_map:
110
+ raise ValueError(f"Unsupported message role: {role}")
111
+
112
+ # Create message with appropriate parameters based on role
113
+ kwargs = {"base64_image": base64_image, **(kwargs if role == "tool" else {})}
114
+ self.memory.add_message(message_map[role](content, **kwargs))
115
+
116
+ async def run(self, request: Optional[str] = None) -> str:
117
+ """Execute the agent's main loop asynchronously.
118
+
119
+ Args:
120
+ request: Optional initial user request to process.
121
+
122
+ Returns:
123
+ A string summarizing the execution results.
124
+
125
+ Raises:
126
+ RuntimeError: If the agent is not in IDLE state at start.
127
+ """
128
+ if self.state != AgentState.IDLE:
129
+ raise RuntimeError(f"Cannot run agent from state: {self.state}")
130
+
131
+ if request:
132
+ self.update_memory("user", request)
133
+
134
+ results: List[str] = []
135
+ async with self.state_context(AgentState.RUNNING):
136
+ while (
137
+ self.current_step < self.max_steps and self.state != AgentState.FINISHED
138
+ ):
139
+ self.current_step += 1
140
+ logger.info(f"Executing step {self.current_step}/{self.max_steps}")
141
+ step_result = await self.step()
142
+
143
+ # Check for stuck state
144
+ if self.is_stuck():
145
+ self.handle_stuck_state()
146
+
147
+ results.append(f"Step {self.current_step}: {step_result}")
148
+
149
+ if self.current_step >= self.max_steps:
150
+ self.current_step = 0
151
+ self.state = AgentState.IDLE
152
+ results.append(f"Terminated: Reached max steps ({self.max_steps})")
153
+ await SANDBOX_CLIENT.cleanup()
154
+ return "\n".join(results) if results else "No steps executed"
155
+
156
+ @abstractmethod
157
+ async def step(self) -> str:
158
+ """Execute a single step in the agent's workflow.
159
+
160
+ Must be implemented by subclasses to define specific behavior.
161
+ """
162
+
163
+ def handle_stuck_state(self):
164
+ """Handle stuck state by adding a prompt to change strategy"""
165
+ stuck_prompt = "\
166
+ Observed duplicate responses. Consider new strategies and avoid repeating ineffective paths already attempted."
167
+ self.next_step_prompt = f"{stuck_prompt}\n{self.next_step_prompt}"
168
+ logger.warning(f"Agent detected stuck state. Added prompt: {stuck_prompt}")
169
+
170
+ def is_stuck(self) -> bool:
171
+ """Check if the agent is stuck in a loop by detecting duplicate content"""
172
+ if len(self.memory.messages) < 2:
173
+ return False
174
+
175
+ last_message = self.memory.messages[-1]
176
+ if not last_message.content:
177
+ return False
178
+
179
+ # Count identical content occurrences
180
+ duplicate_count = sum(
181
+ 1
182
+ for msg in reversed(self.memory.messages[:-1])
183
+ if msg.role == "assistant" and msg.content == last_message.content
184
+ )
185
+
186
+ return duplicate_count >= self.duplicate_threshold
187
+
188
+ @property
189
+ def messages(self) -> List[Message]:
190
+ """Retrieve a list of messages from the agent's memory."""
191
+ return self.memory.messages
192
+
193
+ @messages.setter
194
+ def messages(self, value: List[Message]):
195
+ """Set the list of messages in the agent's memory."""
196
+ self.memory.messages = value
OpenManus/app/agent/browser.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import TYPE_CHECKING, Optional
3
+
4
+ from pydantic import Field, model_validator
5
+
6
+ from app.agent.toolcall import ToolCallAgent
7
+ from app.logger import logger
8
+ from app.prompt.browser import NEXT_STEP_PROMPT, SYSTEM_PROMPT
9
+ from app.schema import Message, ToolChoice
10
+ from app.tool import BrowserUseTool, Terminate, ToolCollection
11
+
12
+
13
+ # Avoid circular import if BrowserAgent needs BrowserContextHelper
14
+ if TYPE_CHECKING:
15
+ from app.agent.base import BaseAgent # Or wherever memory is defined
16
+
17
+
18
+ class BrowserContextHelper:
19
+ def __init__(self, agent: "BaseAgent"):
20
+ self.agent = agent
21
+ self._current_base64_image: Optional[str] = None
22
+
23
+ async def get_browser_state(self) -> Optional[dict]:
24
+ browser_tool = self.agent.available_tools.get_tool(BrowserUseTool().name)
25
+ if not browser_tool or not hasattr(browser_tool, "get_current_state"):
26
+ logger.warning("BrowserUseTool not found or doesn't have get_current_state")
27
+ return None
28
+ try:
29
+ result = await browser_tool.get_current_state()
30
+ if result.error:
31
+ logger.debug(f"Browser state error: {result.error}")
32
+ return None
33
+ if hasattr(result, "base64_image") and result.base64_image:
34
+ self._current_base64_image = result.base64_image
35
+ else:
36
+ self._current_base64_image = None
37
+ return json.loads(result.output)
38
+ except Exception as e:
39
+ logger.debug(f"Failed to get browser state: {str(e)}")
40
+ return None
41
+
42
+ async def format_next_step_prompt(self) -> str:
43
+ """Gets browser state and formats the browser prompt."""
44
+ browser_state = await self.get_browser_state()
45
+ url_info, tabs_info, content_above_info, content_below_info = "", "", "", ""
46
+ results_info = "" # Or get from agent if needed elsewhere
47
+
48
+ if browser_state and not browser_state.get("error"):
49
+ url_info = f"\n URL: {browser_state.get('url', 'N/A')}\n Title: {browser_state.get('title', 'N/A')}"
50
+ tabs = browser_state.get("tabs", [])
51
+ if tabs:
52
+ tabs_info = f"\n {len(tabs)} tab(s) available"
53
+ pixels_above = browser_state.get("pixels_above", 0)
54
+ pixels_below = browser_state.get("pixels_below", 0)
55
+ if pixels_above > 0:
56
+ content_above_info = f" ({pixels_above} pixels)"
57
+ if pixels_below > 0:
58
+ content_below_info = f" ({pixels_below} pixels)"
59
+
60
+ if self._current_base64_image:
61
+ image_message = Message.user_message(
62
+ content="Current browser screenshot:",
63
+ base64_image=self._current_base64_image,
64
+ )
65
+ self.agent.memory.add_message(image_message)
66
+ self._current_base64_image = None # Consume the image after adding
67
+
68
+ return NEXT_STEP_PROMPT.format(
69
+ url_placeholder=url_info,
70
+ tabs_placeholder=tabs_info,
71
+ content_above_placeholder=content_above_info,
72
+ content_below_placeholder=content_below_info,
73
+ results_placeholder=results_info,
74
+ )
75
+
76
+ async def cleanup_browser(self):
77
+ browser_tool = self.agent.available_tools.get_tool(BrowserUseTool().name)
78
+ if browser_tool and hasattr(browser_tool, "cleanup"):
79
+ await browser_tool.cleanup()
80
+
81
+
82
+ class BrowserAgent(ToolCallAgent):
83
+ """
84
+ A browser agent that uses the browser_use library to control a browser.
85
+
86
+ This agent can navigate web pages, interact with elements, fill forms,
87
+ extract content, and perform other browser-based actions to accomplish tasks.
88
+ """
89
+
90
+ name: str = "browser"
91
+ description: str = "A browser agent that can control a browser to accomplish tasks"
92
+
93
+ system_prompt: str = SYSTEM_PROMPT
94
+ next_step_prompt: str = NEXT_STEP_PROMPT
95
+
96
+ max_observe: int = 10000
97
+ max_steps: int = 20
98
+
99
+ # Configure the available tools
100
+ available_tools: ToolCollection = Field(
101
+ default_factory=lambda: ToolCollection(BrowserUseTool(), Terminate())
102
+ )
103
+
104
+ # Use Auto for tool choice to allow both tool usage and free-form responses
105
+ tool_choices: ToolChoice = ToolChoice.AUTO
106
+ special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
107
+
108
+ browser_context_helper: Optional[BrowserContextHelper] = None
109
+
110
+ @model_validator(mode="after")
111
+ def initialize_helper(self) -> "BrowserAgent":
112
+ self.browser_context_helper = BrowserContextHelper(self)
113
+ return self
114
+
115
+ async def think(self) -> bool:
116
+ """Process current state and decide next actions using tools, with browser state info added"""
117
+ self.next_step_prompt = (
118
+ await self.browser_context_helper.format_next_step_prompt()
119
+ )
120
+ return await super().think()
121
+
122
+ async def cleanup(self):
123
+ """Clean up browser agent resources by calling parent cleanup."""
124
+ await self.browser_context_helper.cleanup_browser()
OpenManus/app/agent/manus.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Optional
2
+
3
+ from pydantic import Field, model_validator
4
+
5
+ from app.agent.browser import BrowserContextHelper
6
+ from app.agent.toolcall import ToolCallAgent
7
+ from app.config import config
8
+ from app.logger import logger
9
+ from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
10
+ from app.tool import Terminate, ToolCollection
11
+ from app.tool.ask_human import AskHuman
12
+ from app.tool.browser_use_tool import BrowserUseTool
13
+ from app.tool.mcp import MCPClients, MCPClientTool
14
+ from app.tool.python_execute import PythonExecute
15
+ from app.tool.str_replace_editor import StrReplaceEditor
16
+
17
+
18
+ class Manus(ToolCallAgent):
19
+ """A versatile general-purpose agent with support for both local and MCP tools."""
20
+
21
+ name: str = "Manus"
22
+ description: str = "A versatile agent that can solve various tasks using multiple tools including MCP-based tools"
23
+
24
+ system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
25
+ next_step_prompt: str = NEXT_STEP_PROMPT
26
+
27
+ max_observe: int = 10000
28
+ max_steps: int = 20
29
+
30
+ # MCP clients for remote tool access
31
+ mcp_clients: MCPClients = Field(default_factory=MCPClients)
32
+
33
+ # Add general-purpose tools to the tool collection
34
+ available_tools: ToolCollection = Field(
35
+ default_factory=lambda: ToolCollection(
36
+ PythonExecute(),
37
+ BrowserUseTool(),
38
+ StrReplaceEditor(),
39
+ AskHuman(),
40
+ Terminate(),
41
+ )
42
+ )
43
+
44
+ special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
45
+ browser_context_helper: Optional[BrowserContextHelper] = None
46
+
47
+ # Track connected MCP servers
48
+ connected_servers: Dict[str, str] = Field(
49
+ default_factory=dict
50
+ ) # server_id -> url/command
51
+ _initialized: bool = False
52
+
53
+ @model_validator(mode="after")
54
+ def initialize_helper(self) -> "Manus":
55
+ """Initialize basic components synchronously."""
56
+ self.browser_context_helper = BrowserContextHelper(self)
57
+ return self
58
+
59
+ @classmethod
60
+ async def create(cls, **kwargs) -> "Manus":
61
+ """Factory method to create and properly initialize a Manus instance."""
62
+ instance = cls(**kwargs)
63
+ await instance.initialize_mcp_servers()
64
+ instance._initialized = True
65
+ return instance
66
+
67
+ async def initialize_mcp_servers(self) -> None:
68
+ """Initialize connections to configured MCP servers."""
69
+ for server_id, server_config in config.mcp_config.servers.items():
70
+ try:
71
+ if server_config.type == "sse":
72
+ if server_config.url:
73
+ await self.connect_mcp_server(server_config.url, server_id)
74
+ logger.info(
75
+ f"Connected to MCP server {server_id} at {server_config.url}"
76
+ )
77
+ elif server_config.type == "stdio":
78
+ if server_config.command:
79
+ await self.connect_mcp_server(
80
+ server_config.command,
81
+ server_id,
82
+ use_stdio=True,
83
+ stdio_args=server_config.args,
84
+ )
85
+ logger.info(
86
+ f"Connected to MCP server {server_id} using command {server_config.command}"
87
+ )
88
+ except Exception as e:
89
+ logger.error(f"Failed to connect to MCP server {server_id}: {e}")
90
+
91
+ async def connect_mcp_server(
92
+ self,
93
+ server_url: str,
94
+ server_id: str = "",
95
+ use_stdio: bool = False,
96
+ stdio_args: List[str] = None,
97
+ ) -> None:
98
+ """Connect to an MCP server and add its tools."""
99
+ if use_stdio:
100
+ await self.mcp_clients.connect_stdio(
101
+ server_url, stdio_args or [], server_id
102
+ )
103
+ self.connected_servers[server_id or server_url] = server_url
104
+ else:
105
+ await self.mcp_clients.connect_sse(server_url, server_id)
106
+ self.connected_servers[server_id or server_url] = server_url
107
+
108
+ # Update available tools with only the new tools from this server
109
+ new_tools = [
110
+ tool for tool in self.mcp_clients.tools if tool.server_id == server_id
111
+ ]
112
+ self.available_tools.add_tools(*new_tools)
113
+
114
+ async def disconnect_mcp_server(self, server_id: str = "") -> None:
115
+ """Disconnect from an MCP server and remove its tools."""
116
+ await self.mcp_clients.disconnect(server_id)
117
+ if server_id:
118
+ self.connected_servers.pop(server_id, None)
119
+ else:
120
+ self.connected_servers.clear()
121
+
122
+ # Rebuild available tools without the disconnected server's tools
123
+ base_tools = [
124
+ tool
125
+ for tool in self.available_tools.tools
126
+ if not isinstance(tool, MCPClientTool)
127
+ ]
128
+ self.available_tools = ToolCollection(*base_tools)
129
+ self.available_tools.add_tools(*self.mcp_clients.tools)
130
+
131
+ async def cleanup(self):
132
+ """Clean up Manus agent resources."""
133
+ if self.browser_context_helper:
134
+ await self.browser_context_helper.cleanup_browser()
135
+ # Disconnect from all MCP servers only if we were initialized
136
+ if self._initialized:
137
+ await self.disconnect_mcp_server()
138
+ self._initialized = False
139
+
140
+ async def think(self) -> bool:
141
+ """Process current state and decide next actions with appropriate context."""
142
+ if not self._initialized:
143
+ await self.initialize_mcp_servers()
144
+ self._initialized = True
145
+
146
+ original_prompt = self.next_step_prompt
147
+ recent_messages = self.memory.messages[-3:] if self.memory.messages else []
148
+ browser_in_use = any(
149
+ tc.function.name == BrowserUseTool().name
150
+ for msg in recent_messages
151
+ if msg.tool_calls
152
+ for tc in msg.tool_calls
153
+ )
154
+
155
+ if browser_in_use:
156
+ self.next_step_prompt = (
157
+ await self.browser_context_helper.format_next_step_prompt()
158
+ )
159
+
160
+ result = await super().think()
161
+
162
+ # Restore original prompt
163
+ self.next_step_prompt = original_prompt
164
+
165
+ return result
OpenManus/app/agent/mcp.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, Tuple
2
+
3
+ from pydantic import Field
4
+
5
+ from app.agent.toolcall import ToolCallAgent
6
+ from app.logger import logger
7
+ from app.prompt.mcp import MULTIMEDIA_RESPONSE_PROMPT, NEXT_STEP_PROMPT, SYSTEM_PROMPT
8
+ from app.schema import AgentState, Message
9
+ from app.tool.base import ToolResult
10
+ from app.tool.mcp import MCPClients
11
+
12
+
13
+ class MCPAgent(ToolCallAgent):
14
+ """Agent for interacting with MCP (Model Context Protocol) servers.
15
+
16
+ This agent connects to an MCP server using either SSE or stdio transport
17
+ and makes the server's tools available through the agent's tool interface.
18
+ """
19
+
20
+ name: str = "mcp_agent"
21
+ description: str = "An agent that connects to an MCP server and uses its tools."
22
+
23
+ system_prompt: str = SYSTEM_PROMPT
24
+ next_step_prompt: str = NEXT_STEP_PROMPT
25
+
26
+ # Initialize MCP tool collection
27
+ mcp_clients: MCPClients = Field(default_factory=MCPClients)
28
+ available_tools: MCPClients = None # Will be set in initialize()
29
+
30
+ max_steps: int = 20
31
+ connection_type: str = "stdio" # "stdio" or "sse"
32
+
33
+ # Track tool schemas to detect changes
34
+ tool_schemas: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
35
+ _refresh_tools_interval: int = 5 # Refresh tools every N steps
36
+
37
+ # Special tool names that should trigger termination
38
+ special_tool_names: List[str] = Field(default_factory=lambda: ["terminate"])
39
+
40
+ async def initialize(
41
+ self,
42
+ connection_type: Optional[str] = None,
43
+ server_url: Optional[str] = None,
44
+ command: Optional[str] = None,
45
+ args: Optional[List[str]] = None,
46
+ ) -> None:
47
+ """Initialize the MCP connection.
48
+
49
+ Args:
50
+ connection_type: Type of connection to use ("stdio" or "sse")
51
+ server_url: URL of the MCP server (for SSE connection)
52
+ command: Command to run (for stdio connection)
53
+ args: Arguments for the command (for stdio connection)
54
+ """
55
+ if connection_type:
56
+ self.connection_type = connection_type
57
+
58
+ # Connect to the MCP server based on connection type
59
+ if self.connection_type == "sse":
60
+ if not server_url:
61
+ raise ValueError("Server URL is required for SSE connection")
62
+ await self.mcp_clients.connect_sse(server_url=server_url)
63
+ elif self.connection_type == "stdio":
64
+ if not command:
65
+ raise ValueError("Command is required for stdio connection")
66
+ await self.mcp_clients.connect_stdio(command=command, args=args or [])
67
+ else:
68
+ raise ValueError(f"Unsupported connection type: {self.connection_type}")
69
+
70
+ # Set available_tools to our MCP instance
71
+ self.available_tools = self.mcp_clients
72
+
73
+ # Store initial tool schemas
74
+ await self._refresh_tools()
75
+
76
+ # Add system message about available tools
77
+ tool_names = list(self.mcp_clients.tool_map.keys())
78
+ tools_info = ", ".join(tool_names)
79
+
80
+ # Add system prompt and available tools information
81
+ self.memory.add_message(
82
+ Message.system_message(
83
+ f"{self.system_prompt}\n\nAvailable MCP tools: {tools_info}"
84
+ )
85
+ )
86
+
87
+ async def _refresh_tools(self) -> Tuple[List[str], List[str]]:
88
+ """Refresh the list of available tools from the MCP server.
89
+
90
+ Returns:
91
+ A tuple of (added_tools, removed_tools)
92
+ """
93
+ if not self.mcp_clients.session:
94
+ return [], []
95
+
96
+ # Get current tool schemas directly from the server
97
+ response = await self.mcp_clients.session.list_tools()
98
+ current_tools = {tool.name: tool.inputSchema for tool in response.tools}
99
+
100
+ # Determine added, removed, and changed tools
101
+ current_names = set(current_tools.keys())
102
+ previous_names = set(self.tool_schemas.keys())
103
+
104
+ added_tools = list(current_names - previous_names)
105
+ removed_tools = list(previous_names - current_names)
106
+
107
+ # Check for schema changes in existing tools
108
+ changed_tools = []
109
+ for name in current_names.intersection(previous_names):
110
+ if current_tools[name] != self.tool_schemas.get(name):
111
+ changed_tools.append(name)
112
+
113
+ # Update stored schemas
114
+ self.tool_schemas = current_tools
115
+
116
+ # Log and notify about changes
117
+ if added_tools:
118
+ logger.info(f"Added MCP tools: {added_tools}")
119
+ self.memory.add_message(
120
+ Message.system_message(f"New tools available: {', '.join(added_tools)}")
121
+ )
122
+ if removed_tools:
123
+ logger.info(f"Removed MCP tools: {removed_tools}")
124
+ self.memory.add_message(
125
+ Message.system_message(
126
+ f"Tools no longer available: {', '.join(removed_tools)}"
127
+ )
128
+ )
129
+ if changed_tools:
130
+ logger.info(f"Changed MCP tools: {changed_tools}")
131
+
132
+ return added_tools, removed_tools
133
+
134
+ async def think(self) -> bool:
135
+ """Process current state and decide next action."""
136
+ # Check MCP session and tools availability
137
+ if not self.mcp_clients.session or not self.mcp_clients.tool_map:
138
+ logger.info("MCP service is no longer available, ending interaction")
139
+ self.state = AgentState.FINISHED
140
+ return False
141
+
142
+ # Refresh tools periodically
143
+ if self.current_step % self._refresh_tools_interval == 0:
144
+ await self._refresh_tools()
145
+ # All tools removed indicates shutdown
146
+ if not self.mcp_clients.tool_map:
147
+ logger.info("MCP service has shut down, ending interaction")
148
+ self.state = AgentState.FINISHED
149
+ return False
150
+
151
+ # Use the parent class's think method
152
+ return await super().think()
153
+
154
+ async def _handle_special_tool(self, name: str, result: Any, **kwargs) -> None:
155
+ """Handle special tool execution and state changes"""
156
+ # First process with parent handler
157
+ await super()._handle_special_tool(name, result, **kwargs)
158
+
159
+ # Handle multimedia responses
160
+ if isinstance(result, ToolResult) and result.base64_image:
161
+ self.memory.add_message(
162
+ Message.system_message(
163
+ MULTIMEDIA_RESPONSE_PROMPT.format(tool_name=name)
164
+ )
165
+ )
166
+
167
+ def _should_finish_execution(self, name: str, **kwargs) -> bool:
168
+ """Determine if tool execution should finish the agent"""
169
+ # Terminate if the tool name is 'terminate'
170
+ return name.lower() == "terminate"
171
+
172
+ async def cleanup(self) -> None:
173
+ """Clean up MCP connection when done."""
174
+ if self.mcp_clients.session:
175
+ await self.mcp_clients.disconnect()
176
+ logger.info("MCP connection closed")
177
+
178
+ async def run(self, request: Optional[str] = None) -> str:
179
+ """Run the agent with cleanup when done."""
180
+ try:
181
+ result = await super().run(request)
182
+ return result
183
+ finally:
184
+ # Ensure cleanup happens even if there's an error
185
+ await self.cleanup()
OpenManus/app/agent/react.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from pydantic import Field
5
+
6
+ from app.agent.base import BaseAgent
7
+ from app.llm import LLM
8
+ from app.schema import AgentState, Memory
9
+
10
+
11
+ class ReActAgent(BaseAgent, ABC):
12
+ name: str
13
+ description: Optional[str] = None
14
+
15
+ system_prompt: Optional[str] = None
16
+ next_step_prompt: Optional[str] = None
17
+
18
+ llm: Optional[LLM] = Field(default_factory=LLM)
19
+ memory: Memory = Field(default_factory=Memory)
20
+ state: AgentState = AgentState.IDLE
21
+
22
+ max_steps: int = 10
23
+ current_step: int = 0
24
+
25
+ @abstractmethod
26
+ async def think(self) -> bool:
27
+ """Process current state and decide next action"""
28
+
29
+ @abstractmethod
30
+ async def act(self) -> str:
31
+ """Execute decided actions"""
32
+
33
+ async def step(self) -> str:
34
+ """Execute a single step: think and act."""
35
+ should_act = await self.think()
36
+ if not should_act:
37
+ return "Thinking complete - no action needed"
38
+ return await self.act()
OpenManus/app/agent/swe.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from pydantic import Field
4
+
5
+ from app.agent.toolcall import ToolCallAgent
6
+ from app.prompt.swe import SYSTEM_PROMPT
7
+ from app.tool import Bash, StrReplaceEditor, Terminate, ToolCollection
8
+
9
+
10
+ class SWEAgent(ToolCallAgent):
11
+ """An agent that implements the SWEAgent paradigm for executing code and natural conversations."""
12
+
13
+ name: str = "swe"
14
+ description: str = "an autonomous AI programmer that interacts directly with the computer to solve tasks."
15
+
16
+ system_prompt: str = SYSTEM_PROMPT
17
+ next_step_prompt: str = ""
18
+
19
+ available_tools: ToolCollection = ToolCollection(
20
+ Bash(), StrReplaceEditor(), Terminate()
21
+ )
22
+ special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
23
+
24
+ max_steps: int = 20
OpenManus/app/agent/toolcall.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from typing import Any, List, Optional, Union
4
+
5
+ from pydantic import Field
6
+
7
+ from app.agent.react import ReActAgent
8
+ from app.exceptions import TokenLimitExceeded
9
+ from app.logger import logger
10
+ from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
11
+ from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice
12
+ from app.tool import CreateChatCompletion, Terminate, ToolCollection
13
+
14
+
15
+ TOOL_CALL_REQUIRED = "Tool calls required but none provided"
16
+
17
+
18
+ class ToolCallAgent(ReActAgent):
19
+ """Base agent class for handling tool/function calls with enhanced abstraction"""
20
+
21
+ name: str = "toolcall"
22
+ description: str = "an agent that can execute tool calls."
23
+
24
+ system_prompt: str = SYSTEM_PROMPT
25
+ next_step_prompt: str = NEXT_STEP_PROMPT
26
+
27
+ available_tools: ToolCollection = ToolCollection(
28
+ CreateChatCompletion(), Terminate()
29
+ )
30
+ tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO # type: ignore
31
+ special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
32
+
33
+ tool_calls: List[ToolCall] = Field(default_factory=list)
34
+ _current_base64_image: Optional[str] = None
35
+
36
+ max_steps: int = 30
37
+ max_observe: Optional[Union[int, bool]] = None
38
+
39
+ async def think(self) -> bool:
40
+ """Process current state and decide next actions using tools"""
41
+ if self.next_step_prompt:
42
+ user_msg = Message.user_message(self.next_step_prompt)
43
+ self.messages += [user_msg]
44
+
45
+ try:
46
+ # Get response with tool options
47
+ response = await self.llm.ask_tool(
48
+ messages=self.messages,
49
+ system_msgs=(
50
+ [Message.system_message(self.system_prompt)]
51
+ if self.system_prompt
52
+ else None
53
+ ),
54
+ tools=self.available_tools.to_params(),
55
+ tool_choice=self.tool_choices,
56
+ )
57
+ except ValueError:
58
+ raise
59
+ except Exception as e:
60
+ # Check if this is a RetryError containing TokenLimitExceeded
61
+ if hasattr(e, "__cause__") and isinstance(e.__cause__, TokenLimitExceeded):
62
+ token_limit_error = e.__cause__
63
+ logger.error(
64
+ f"🚨 Token limit error (from RetryError): {token_limit_error}"
65
+ )
66
+ self.memory.add_message(
67
+ Message.assistant_message(
68
+ f"Maximum token limit reached, cannot continue execution: {str(token_limit_error)}"
69
+ )
70
+ )
71
+ self.state = AgentState.FINISHED
72
+ return False
73
+ raise
74
+
75
+ self.tool_calls = tool_calls = (
76
+ response.tool_calls if response and response.tool_calls else []
77
+ )
78
+ content = response.content if response and response.content else ""
79
+
80
+ # Log response info
81
+ logger.info(f"✨ {self.name}'s thoughts: {content}")
82
+ logger.info(
83
+ f"🛠️ {self.name} selected {len(tool_calls) if tool_calls else 0} tools to use"
84
+ )
85
+ if tool_calls:
86
+ logger.info(
87
+ f"🧰 Tools being prepared: {[call.function.name for call in tool_calls]}"
88
+ )
89
+ logger.info(f"🔧 Tool arguments: {tool_calls[0].function.arguments}")
90
+
91
+ try:
92
+ if response is None:
93
+ raise RuntimeError("No response received from the LLM")
94
+
95
+ # Handle different tool_choices modes
96
+ if self.tool_choices == ToolChoice.NONE:
97
+ if tool_calls:
98
+ logger.warning(
99
+ f"🤔 Hmm, {self.name} tried to use tools when they weren't available!"
100
+ )
101
+ if content:
102
+ self.memory.add_message(Message.assistant_message(content))
103
+ return True
104
+ return False
105
+
106
+ # Create and add assistant message
107
+ assistant_msg = (
108
+ Message.from_tool_calls(content=content, tool_calls=self.tool_calls)
109
+ if self.tool_calls
110
+ else Message.assistant_message(content)
111
+ )
112
+ self.memory.add_message(assistant_msg)
113
+
114
+ if self.tool_choices == ToolChoice.REQUIRED and not self.tool_calls:
115
+ return True # Will be handled in act()
116
+
117
+ # For 'auto' mode, continue with content if no commands but content exists
118
+ if self.tool_choices == ToolChoice.AUTO and not self.tool_calls:
119
+ return bool(content)
120
+
121
+ return bool(self.tool_calls)
122
+ except Exception as e:
123
+ logger.error(f"🚨 Oops! The {self.name}'s thinking process hit a snag: {e}")
124
+ self.memory.add_message(
125
+ Message.assistant_message(
126
+ f"Error encountered while processing: {str(e)}"
127
+ )
128
+ )
129
+ return False
130
+
131
+ async def act(self) -> str:
132
+ """Execute tool calls and handle their results"""
133
+ if not self.tool_calls:
134
+ if self.tool_choices == ToolChoice.REQUIRED:
135
+ raise ValueError(TOOL_CALL_REQUIRED)
136
+
137
+ # Return last message content if no tool calls
138
+ return self.messages[-1].content or "No content or commands to execute"
139
+
140
+ results = []
141
+ for command in self.tool_calls:
142
+ # Reset base64_image for each tool call
143
+ self._current_base64_image = None
144
+
145
+ result = await self.execute_tool(command)
146
+
147
+ if self.max_observe:
148
+ result = result[: self.max_observe]
149
+
150
+ logger.info(
151
+ f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
152
+ )
153
+
154
+ # Add tool response to memory
155
+ tool_msg = Message.tool_message(
156
+ content=result,
157
+ tool_call_id=command.id,
158
+ name=command.function.name,
159
+ base64_image=self._current_base64_image,
160
+ )
161
+ self.memory.add_message(tool_msg)
162
+ results.append(result)
163
+
164
+ return "\n\n".join(results)
165
+
166
+ async def execute_tool(self, command: ToolCall) -> str:
167
+ """Execute a single tool call with robust error handling"""
168
+ if not command or not command.function or not command.function.name:
169
+ return "Error: Invalid command format"
170
+
171
+ name = command.function.name
172
+ if name not in self.available_tools.tool_map:
173
+ return f"Error: Unknown tool '{name}'"
174
+
175
+ try:
176
+ # Parse arguments
177
+ args = json.loads(command.function.arguments or "{}")
178
+
179
+ # Execute the tool
180
+ logger.info(f"🔧 Activating tool: '{name}'...")
181
+ result = await self.available_tools.execute(name=name, tool_input=args)
182
+
183
+ # Handle special tools
184
+ await self._handle_special_tool(name=name, result=result)
185
+
186
+ # Check if result is a ToolResult with base64_image
187
+ if hasattr(result, "base64_image") and result.base64_image:
188
+ # Store the base64_image for later use in tool_message
189
+ self._current_base64_image = result.base64_image
190
+
191
+ # Format result for display
192
+ observation = (
193
+ f"Observed output of cmd `{name}` executed:\n{str(result)}"
194
+ if result
195
+ else f"Cmd `{name}` completed with no output"
196
+ )
197
+ return observation
198
+
199
+ # Format result for display (standard case)
200
+ observation = (
201
+ f"Observed output of cmd `{name}` executed:\n{str(result)}"
202
+ if result
203
+ else f"Cmd `{name}` completed with no output"
204
+ )
205
+
206
+ return observation
207
+ except json.JSONDecodeError:
208
+ error_msg = f"Error parsing arguments for {name}: Invalid JSON format"
209
+ logger.error(
210
+ f"📝 Oops! The arguments for '{name}' don't make sense - invalid JSON, arguments:{command.function.arguments}"
211
+ )
212
+ return f"Error: {error_msg}"
213
+ except Exception as e:
214
+ error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}"
215
+ logger.exception(error_msg)
216
+ return f"Error: {error_msg}"
217
+
218
+ async def _handle_special_tool(self, name: str, result: Any, **kwargs):
219
+ """Handle special tool execution and state changes"""
220
+ if not self._is_special_tool(name):
221
+ return
222
+
223
+ if self._should_finish_execution(name=name, result=result, **kwargs):
224
+ # Set agent state to finished
225
+ logger.info(f"🏁 Special tool '{name}' has completed the task!")
226
+ self.state = AgentState.FINISHED
227
+
228
+ @staticmethod
229
+ def _should_finish_execution(**kwargs) -> bool:
230
+ """Determine if tool execution should finish the agent"""
231
+ return True
232
+
233
+ def _is_special_tool(self, name: str) -> bool:
234
+ """Check if tool name is in special tools list"""
235
+ return name.lower() in [n.lower() for n in self.special_tool_names]
236
+
237
+ async def cleanup(self):
238
+ """Clean up resources used by the agent's tools."""
239
+ logger.info(f"🧹 Cleaning up resources for agent '{self.name}'...")
240
+ for tool_name, tool_instance in self.available_tools.tool_map.items():
241
+ if hasattr(tool_instance, "cleanup") and asyncio.iscoroutinefunction(
242
+ tool_instance.cleanup
243
+ ):
244
+ try:
245
+ logger.debug(f"🧼 Cleaning up tool: {tool_name}")
246
+ await tool_instance.cleanup()
247
+ except Exception as e:
248
+ logger.error(
249
+ f"🚨 Error cleaning up tool '{tool_name}': {e}", exc_info=True
250
+ )
251
+ logger.info(f"✨ Cleanup complete for agent '{self.name}'.")
252
+
253
+ async def run(self, request: Optional[str] = None) -> str:
254
+ """Run the agent with cleanup when done."""
255
+ try:
256
+ return await super().run(request)
257
+ finally:
258
+ await self.cleanup()
OpenManus/app/bedrock.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sys
3
+ import time
4
+ import uuid
5
+ from datetime import datetime
6
+ from typing import Dict, List, Literal, Optional
7
+
8
+ import boto3
9
+
10
+
11
+ # Global variables to track the current tool use ID across function calls
12
+ # Tmp solution
13
+ CURRENT_TOOLUSE_ID = None
14
+
15
+
16
+ # Class to handle OpenAI-style response formatting
17
+ class OpenAIResponse:
18
+ def __init__(self, data):
19
+ # Recursively convert nested dicts and lists to OpenAIResponse objects
20
+ for key, value in data.items():
21
+ if isinstance(value, dict):
22
+ value = OpenAIResponse(value)
23
+ elif isinstance(value, list):
24
+ value = [
25
+ OpenAIResponse(item) if isinstance(item, dict) else item
26
+ for item in value
27
+ ]
28
+ setattr(self, key, value)
29
+
30
+ def model_dump(self, *args, **kwargs):
31
+ # Convert object to dict and add timestamp
32
+ data = self.__dict__
33
+ data["created_at"] = datetime.now().isoformat()
34
+ return data
35
+
36
+
37
+ # Main client class for interacting with Amazon Bedrock
38
+ class BedrockClient:
39
+ def __init__(self):
40
+ # Initialize Bedrock client, you need to configure AWS env first
41
+ try:
42
+ self.client = boto3.client("bedrock-runtime")
43
+ self.chat = Chat(self.client)
44
+ except Exception as e:
45
+ print(f"Error initializing Bedrock client: {e}")
46
+ sys.exit(1)
47
+
48
+
49
+ # Chat interface class
50
+ class Chat:
51
+ def __init__(self, client):
52
+ self.completions = ChatCompletions(client)
53
+
54
+
55
+ # Core class handling chat completions functionality
56
+ class ChatCompletions:
57
+ def __init__(self, client):
58
+ self.client = client
59
+
60
+ def _convert_openai_tools_to_bedrock_format(self, tools):
61
+ # Convert OpenAI function calling format to Bedrock tool format
62
+ bedrock_tools = []
63
+ for tool in tools:
64
+ if tool.get("type") == "function":
65
+ function = tool.get("function", {})
66
+ bedrock_tool = {
67
+ "toolSpec": {
68
+ "name": function.get("name", ""),
69
+ "description": function.get("description", ""),
70
+ "inputSchema": {
71
+ "json": {
72
+ "type": "object",
73
+ "properties": function.get("parameters", {}).get(
74
+ "properties", {}
75
+ ),
76
+ "required": function.get("parameters", {}).get(
77
+ "required", []
78
+ ),
79
+ }
80
+ },
81
+ }
82
+ }
83
+ bedrock_tools.append(bedrock_tool)
84
+ return bedrock_tools
85
+
86
+ def _convert_openai_messages_to_bedrock_format(self, messages):
87
+ # Convert OpenAI message format to Bedrock message format
88
+ bedrock_messages = []
89
+ system_prompt = []
90
+ for message in messages:
91
+ if message.get("role") == "system":
92
+ system_prompt = [{"text": message.get("content")}]
93
+ elif message.get("role") == "user":
94
+ bedrock_message = {
95
+ "role": message.get("role", "user"),
96
+ "content": [{"text": message.get("content")}],
97
+ }
98
+ bedrock_messages.append(bedrock_message)
99
+ elif message.get("role") == "assistant":
100
+ bedrock_message = {
101
+ "role": "assistant",
102
+ "content": [{"text": message.get("content")}],
103
+ }
104
+ openai_tool_calls = message.get("tool_calls", [])
105
+ if openai_tool_calls:
106
+ bedrock_tool_use = {
107
+ "toolUseId": openai_tool_calls[0]["id"],
108
+ "name": openai_tool_calls[0]["function"]["name"],
109
+ "input": json.loads(
110
+ openai_tool_calls[0]["function"]["arguments"]
111
+ ),
112
+ }
113
+ bedrock_message["content"].append({"toolUse": bedrock_tool_use})
114
+ global CURRENT_TOOLUSE_ID
115
+ CURRENT_TOOLUSE_ID = openai_tool_calls[0]["id"]
116
+ bedrock_messages.append(bedrock_message)
117
+ elif message.get("role") == "tool":
118
+ bedrock_message = {
119
+ "role": "user",
120
+ "content": [
121
+ {
122
+ "toolResult": {
123
+ "toolUseId": CURRENT_TOOLUSE_ID,
124
+ "content": [{"text": message.get("content")}],
125
+ }
126
+ }
127
+ ],
128
+ }
129
+ bedrock_messages.append(bedrock_message)
130
+ else:
131
+ raise ValueError(f"Invalid role: {message.get('role')}")
132
+ return system_prompt, bedrock_messages
133
+
134
+ def _convert_bedrock_response_to_openai_format(self, bedrock_response):
135
+ # Convert Bedrock response format to OpenAI format
136
+ content = ""
137
+ if bedrock_response.get("output", {}).get("message", {}).get("content"):
138
+ content_array = bedrock_response["output"]["message"]["content"]
139
+ content = "".join(item.get("text", "") for item in content_array)
140
+ if content == "":
141
+ content = "."
142
+
143
+ # Handle tool calls in response
144
+ openai_tool_calls = []
145
+ if bedrock_response.get("output", {}).get("message", {}).get("content"):
146
+ for content_item in bedrock_response["output"]["message"]["content"]:
147
+ if content_item.get("toolUse"):
148
+ bedrock_tool_use = content_item["toolUse"]
149
+ global CURRENT_TOOLUSE_ID
150
+ CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
151
+ openai_tool_call = {
152
+ "id": CURRENT_TOOLUSE_ID,
153
+ "type": "function",
154
+ "function": {
155
+ "name": bedrock_tool_use["name"],
156
+ "arguments": json.dumps(bedrock_tool_use["input"]),
157
+ },
158
+ }
159
+ openai_tool_calls.append(openai_tool_call)
160
+
161
+ # Construct final OpenAI format response
162
+ openai_format = {
163
+ "id": f"chatcmpl-{uuid.uuid4()}",
164
+ "created": int(time.time()),
165
+ "object": "chat.completion",
166
+ "system_fingerprint": None,
167
+ "choices": [
168
+ {
169
+ "finish_reason": bedrock_response.get("stopReason", "end_turn"),
170
+ "index": 0,
171
+ "message": {
172
+ "content": content,
173
+ "role": bedrock_response.get("output", {})
174
+ .get("message", {})
175
+ .get("role", "assistant"),
176
+ "tool_calls": openai_tool_calls
177
+ if openai_tool_calls != []
178
+ else None,
179
+ "function_call": None,
180
+ },
181
+ }
182
+ ],
183
+ "usage": {
184
+ "completion_tokens": bedrock_response.get("usage", {}).get(
185
+ "outputTokens", 0
186
+ ),
187
+ "prompt_tokens": bedrock_response.get("usage", {}).get(
188
+ "inputTokens", 0
189
+ ),
190
+ "total_tokens": bedrock_response.get("usage", {}).get("totalTokens", 0),
191
+ },
192
+ }
193
+ return OpenAIResponse(openai_format)
194
+
195
+ async def _invoke_bedrock(
196
+ self,
197
+ model: str,
198
+ messages: List[Dict[str, str]],
199
+ max_tokens: int,
200
+ temperature: float,
201
+ tools: Optional[List[dict]] = None,
202
+ tool_choice: Literal["none", "auto", "required"] = "auto",
203
+ **kwargs,
204
+ ) -> OpenAIResponse:
205
+ # Non-streaming invocation of Bedrock model
206
+ (
207
+ system_prompt,
208
+ bedrock_messages,
209
+ ) = self._convert_openai_messages_to_bedrock_format(messages)
210
+ response = self.client.converse(
211
+ modelId=model,
212
+ system=system_prompt,
213
+ messages=bedrock_messages,
214
+ inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
215
+ toolConfig={"tools": tools} if tools else None,
216
+ )
217
+ openai_response = self._convert_bedrock_response_to_openai_format(response)
218
+ return openai_response
219
+
220
+ async def _invoke_bedrock_stream(
221
+ self,
222
+ model: str,
223
+ messages: List[Dict[str, str]],
224
+ max_tokens: int,
225
+ temperature: float,
226
+ tools: Optional[List[dict]] = None,
227
+ tool_choice: Literal["none", "auto", "required"] = "auto",
228
+ **kwargs,
229
+ ) -> OpenAIResponse:
230
+ # Streaming invocation of Bedrock model
231
+ (
232
+ system_prompt,
233
+ bedrock_messages,
234
+ ) = self._convert_openai_messages_to_bedrock_format(messages)
235
+ response = self.client.converse_stream(
236
+ modelId=model,
237
+ system=system_prompt,
238
+ messages=bedrock_messages,
239
+ inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
240
+ toolConfig={"tools": tools} if tools else None,
241
+ )
242
+
243
+ # Initialize response structure
244
+ bedrock_response = {
245
+ "output": {"message": {"role": "", "content": []}},
246
+ "stopReason": "",
247
+ "usage": {},
248
+ "metrics": {},
249
+ }
250
+ bedrock_response_text = ""
251
+ bedrock_response_tool_input = ""
252
+
253
+ # Process streaming response
254
+ stream = response.get("stream")
255
+ if stream:
256
+ for event in stream:
257
+ if event.get("messageStart", {}).get("role"):
258
+ bedrock_response["output"]["message"]["role"] = event[
259
+ "messageStart"
260
+ ]["role"]
261
+ if event.get("contentBlockDelta", {}).get("delta", {}).get("text"):
262
+ bedrock_response_text += event["contentBlockDelta"]["delta"]["text"]
263
+ print(
264
+ event["contentBlockDelta"]["delta"]["text"], end="", flush=True
265
+ )
266
+ if event.get("contentBlockStop", {}).get("contentBlockIndex") == 0:
267
+ bedrock_response["output"]["message"]["content"].append(
268
+ {"text": bedrock_response_text}
269
+ )
270
+ if event.get("contentBlockStart", {}).get("start", {}).get("toolUse"):
271
+ bedrock_tool_use = event["contentBlockStart"]["start"]["toolUse"]
272
+ tool_use = {
273
+ "toolUseId": bedrock_tool_use["toolUseId"],
274
+ "name": bedrock_tool_use["name"],
275
+ }
276
+ bedrock_response["output"]["message"]["content"].append(
277
+ {"toolUse": tool_use}
278
+ )
279
+ global CURRENT_TOOLUSE_ID
280
+ CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
281
+ if event.get("contentBlockDelta", {}).get("delta", {}).get("toolUse"):
282
+ bedrock_response_tool_input += event["contentBlockDelta"]["delta"][
283
+ "toolUse"
284
+ ]["input"]
285
+ print(
286
+ event["contentBlockDelta"]["delta"]["toolUse"]["input"],
287
+ end="",
288
+ flush=True,
289
+ )
290
+ if event.get("contentBlockStop", {}).get("contentBlockIndex") == 1:
291
+ bedrock_response["output"]["message"]["content"][1]["toolUse"][
292
+ "input"
293
+ ] = json.loads(bedrock_response_tool_input)
294
+ print()
295
+ openai_response = self._convert_bedrock_response_to_openai_format(
296
+ bedrock_response
297
+ )
298
+ return openai_response
299
+
300
+ def create(
301
+ self,
302
+ model: str,
303
+ messages: List[Dict[str, str]],
304
+ max_tokens: int,
305
+ temperature: float,
306
+ stream: Optional[bool] = True,
307
+ tools: Optional[List[dict]] = None,
308
+ tool_choice: Literal["none", "auto", "required"] = "auto",
309
+ **kwargs,
310
+ ) -> OpenAIResponse:
311
+ # Main entry point for chat completion
312
+ bedrock_tools = []
313
+ if tools is not None:
314
+ bedrock_tools = self._convert_openai_tools_to_bedrock_format(tools)
315
+ if stream:
316
+ return self._invoke_bedrock_stream(
317
+ model,
318
+ messages,
319
+ max_tokens,
320
+ temperature,
321
+ bedrock_tools,
322
+ tool_choice,
323
+ **kwargs,
324
+ )
325
+ else:
326
+ return self._invoke_bedrock(
327
+ model,
328
+ messages,
329
+ max_tokens,
330
+ temperature,
331
+ bedrock_tools,
332
+ tool_choice,
333
+ **kwargs,
334
+ )
OpenManus/app/config.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import threading
3
+ import tomllib
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ def get_project_root() -> Path:
11
+ """Get the project root directory"""
12
+ return Path(__file__).resolve().parent.parent
13
+
14
+
15
+ PROJECT_ROOT = get_project_root()
16
+ WORKSPACE_ROOT = PROJECT_ROOT / "workspace"
17
+
18
+
19
+ class LLMSettings(BaseModel):
20
+ model: str = Field(..., description="Model name")
21
+ base_url: str = Field(..., description="API base URL")
22
+ api_key: str = Field(..., description="API key")
23
+ max_tokens: int = Field(4096, description="Maximum number of tokens per request")
24
+ max_input_tokens: Optional[int] = Field(
25
+ None,
26
+ description="Maximum input tokens to use across all requests (None for unlimited)",
27
+ )
28
+ temperature: float = Field(1.0, description="Sampling temperature")
29
+ api_type: str = Field(..., description="Azure, Openai, or Ollama")
30
+ api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
31
+
32
+
33
+ class ProxySettings(BaseModel):
34
+ server: str = Field(None, description="Proxy server address")
35
+ username: Optional[str] = Field(None, description="Proxy username")
36
+ password: Optional[str] = Field(None, description="Proxy password")
37
+
38
+
39
+ class SearchSettings(BaseModel):
40
+ engine: str = Field(default="Google", description="Search engine the llm to use")
41
+ fallback_engines: List[str] = Field(
42
+ default_factory=lambda: ["DuckDuckGo", "Baidu", "Bing"],
43
+ description="Fallback search engines to try if the primary engine fails",
44
+ )
45
+ retry_delay: int = Field(
46
+ default=60,
47
+ description="Seconds to wait before retrying all engines again after they all fail",
48
+ )
49
+ max_retries: int = Field(
50
+ default=3,
51
+ description="Maximum number of times to retry all engines when all fail",
52
+ )
53
+ lang: str = Field(
54
+ default="en",
55
+ description="Language code for search results (e.g., en, zh, fr)",
56
+ )
57
+ country: str = Field(
58
+ default="us",
59
+ description="Country code for search results (e.g., us, cn, uk)",
60
+ )
61
+
62
+
63
+ class BrowserSettings(BaseModel):
64
+ headless: bool = Field(False, description="Whether to run browser in headless mode")
65
+ disable_security: bool = Field(
66
+ True, description="Disable browser security features"
67
+ )
68
+ extra_chromium_args: List[str] = Field(
69
+ default_factory=list, description="Extra arguments to pass to the browser"
70
+ )
71
+ chrome_instance_path: Optional[str] = Field(
72
+ None, description="Path to a Chrome instance to use"
73
+ )
74
+ wss_url: Optional[str] = Field(
75
+ None, description="Connect to a browser instance via WebSocket"
76
+ )
77
+ cdp_url: Optional[str] = Field(
78
+ None, description="Connect to a browser instance via CDP"
79
+ )
80
+ proxy: Optional[ProxySettings] = Field(
81
+ None, description="Proxy settings for the browser"
82
+ )
83
+ max_content_length: int = Field(
84
+ 2000, description="Maximum length for content retrieval operations"
85
+ )
86
+
87
+
88
+ class SandboxSettings(BaseModel):
89
+ """Configuration for the execution sandbox"""
90
+
91
+ use_sandbox: bool = Field(False, description="Whether to use the sandbox")
92
+ image: str = Field("python:3.12-slim", description="Base image")
93
+ work_dir: str = Field("/workspace", description="Container working directory")
94
+ memory_limit: str = Field("512m", description="Memory limit")
95
+ cpu_limit: float = Field(1.0, description="CPU limit")
96
+ timeout: int = Field(300, description="Default command timeout (seconds)")
97
+ network_enabled: bool = Field(
98
+ False, description="Whether network access is allowed"
99
+ )
100
+
101
+
102
+ class MCPServerConfig(BaseModel):
103
+ """Configuration for a single MCP server"""
104
+
105
+ type: str = Field(..., description="Server connection type (sse or stdio)")
106
+ url: Optional[str] = Field(None, description="Server URL for SSE connections")
107
+ command: Optional[str] = Field(None, description="Command for stdio connections")
108
+ args: List[str] = Field(
109
+ default_factory=list, description="Arguments for stdio command"
110
+ )
111
+
112
+
113
+ class MCPSettings(BaseModel):
114
+ """Configuration for MCP (Model Context Protocol)"""
115
+
116
+ server_reference: str = Field(
117
+ "app.mcp.server", description="Module reference for the MCP server"
118
+ )
119
+ servers: Dict[str, MCPServerConfig] = Field(
120
+ default_factory=dict, description="MCP server configurations"
121
+ )
122
+
123
+ @classmethod
124
+ def load_server_config(cls) -> Dict[str, MCPServerConfig]:
125
+ """Load MCP server configuration from JSON file"""
126
+ config_path = PROJECT_ROOT / "config" / "mcp.json"
127
+
128
+ try:
129
+ config_file = config_path if config_path.exists() else None
130
+ if not config_file:
131
+ return {}
132
+
133
+ with config_file.open() as f:
134
+ data = json.load(f)
135
+ servers = {}
136
+
137
+ for server_id, server_config in data.get("mcpServers", {}).items():
138
+ servers[server_id] = MCPServerConfig(
139
+ type=server_config["type"],
140
+ url=server_config.get("url"),
141
+ command=server_config.get("command"),
142
+ args=server_config.get("args", []),
143
+ )
144
+ return servers
145
+ except Exception as e:
146
+ raise ValueError(f"Failed to load MCP server config: {e}")
147
+
148
+
149
+ class AppConfig(BaseModel):
150
+ llm: Dict[str, LLMSettings]
151
+ sandbox: Optional[SandboxSettings] = Field(
152
+ None, description="Sandbox configuration"
153
+ )
154
+ browser_config: Optional[BrowserSettings] = Field(
155
+ None, description="Browser configuration"
156
+ )
157
+ search_config: Optional[SearchSettings] = Field(
158
+ None, description="Search configuration"
159
+ )
160
+ mcp_config: Optional[MCPSettings] = Field(None, description="MCP configuration")
161
+
162
+ class Config:
163
+ arbitrary_types_allowed = True
164
+
165
+
166
+ class Config:
167
+ _instance = None
168
+ _lock = threading.Lock()
169
+ _initialized = False
170
+
171
+ def __new__(cls):
172
+ if cls._instance is None:
173
+ with cls._lock:
174
+ if cls._instance is None:
175
+ cls._instance = super().__new__(cls)
176
+ return cls._instance
177
+
178
+ def __init__(self):
179
+ if not self._initialized:
180
+ with self._lock:
181
+ if not self._initialized:
182
+ self._config = None
183
+ self._load_initial_config()
184
+ self._initialized = True
185
+
186
+ @staticmethod
187
+ def _get_config_path() -> Path:
188
+ root = PROJECT_ROOT
189
+ config_path = root / "config" / "config.toml"
190
+ if config_path.exists():
191
+ return config_path
192
+ example_path = root / "config" / "config.example.toml"
193
+ if example_path.exists():
194
+ return example_path
195
+ raise FileNotFoundError("No configuration file found in config directory")
196
+
197
+ def _load_config(self) -> dict:
198
+ config_path = self._get_config_path()
199
+ with config_path.open("rb") as f:
200
+ return tomllib.load(f)
201
+
202
+ def _load_initial_config(self):
203
+ raw_config = self._load_config()
204
+ base_llm = raw_config.get("llm", {})
205
+ llm_overrides = {
206
+ k: v for k, v in raw_config.get("llm", {}).items() if isinstance(v, dict)
207
+ }
208
+
209
+ default_settings = {
210
+ "model": base_llm.get("model"),
211
+ "base_url": base_llm.get("base_url"),
212
+ "api_key": base_llm.get("api_key"),
213
+ "max_tokens": base_llm.get("max_tokens", 4096),
214
+ "max_input_tokens": base_llm.get("max_input_tokens"),
215
+ "temperature": base_llm.get("temperature", 1.0),
216
+ "api_type": base_llm.get("api_type", ""),
217
+ "api_version": base_llm.get("api_version", ""),
218
+ }
219
+
220
+ # handle browser config.
221
+ browser_config = raw_config.get("browser", {})
222
+ browser_settings = None
223
+
224
+ if browser_config:
225
+ # handle proxy settings.
226
+ proxy_config = browser_config.get("proxy", {})
227
+ proxy_settings = None
228
+
229
+ if proxy_config and proxy_config.get("server"):
230
+ proxy_settings = ProxySettings(
231
+ **{
232
+ k: v
233
+ for k, v in proxy_config.items()
234
+ if k in ["server", "username", "password"] and v
235
+ }
236
+ )
237
+
238
+ # filter valid browser config parameters.
239
+ valid_browser_params = {
240
+ k: v
241
+ for k, v in browser_config.items()
242
+ if k in BrowserSettings.__annotations__ and v is not None
243
+ }
244
+
245
+ # if there is proxy settings, add it to the parameters.
246
+ if proxy_settings:
247
+ valid_browser_params["proxy"] = proxy_settings
248
+
249
+ # only create BrowserSettings when there are valid parameters.
250
+ if valid_browser_params:
251
+ browser_settings = BrowserSettings(**valid_browser_params)
252
+
253
+ search_config = raw_config.get("search", {})
254
+ search_settings = None
255
+ if search_config:
256
+ search_settings = SearchSettings(**search_config)
257
+ sandbox_config = raw_config.get("sandbox", {})
258
+ if sandbox_config:
259
+ sandbox_settings = SandboxSettings(**sandbox_config)
260
+ else:
261
+ sandbox_settings = SandboxSettings()
262
+
263
+ mcp_config = raw_config.get("mcp", {})
264
+ mcp_settings = None
265
+ if mcp_config:
266
+ # Load server configurations from JSON
267
+ mcp_config["servers"] = MCPSettings.load_server_config()
268
+ mcp_settings = MCPSettings(**mcp_config)
269
+ else:
270
+ mcp_settings = MCPSettings(servers=MCPSettings.load_server_config())
271
+
272
+ config_dict = {
273
+ "llm": {
274
+ "default": default_settings,
275
+ **{
276
+ name: {**default_settings, **override_config}
277
+ for name, override_config in llm_overrides.items()
278
+ },
279
+ },
280
+ "sandbox": sandbox_settings,
281
+ "browser_config": browser_settings,
282
+ "search_config": search_settings,
283
+ "mcp_config": mcp_settings,
284
+ }
285
+
286
+ self._config = AppConfig(**config_dict)
287
+
288
+ @property
289
+ def llm(self) -> Dict[str, LLMSettings]:
290
+ return self._config.llm
291
+
292
+ @property
293
+ def sandbox(self) -> SandboxSettings:
294
+ return self._config.sandbox
295
+
296
+ @property
297
+ def browser_config(self) -> Optional[BrowserSettings]:
298
+ return self._config.browser_config
299
+
300
+ @property
301
+ def search_config(self) -> Optional[SearchSettings]:
302
+ return self._config.search_config
303
+
304
+ @property
305
+ def mcp_config(self) -> MCPSettings:
306
+ """Get the MCP configuration"""
307
+ return self._config.mcp_config
308
+
309
+ @property
310
+ def workspace_root(self) -> Path:
311
+ """Get the workspace root directory"""
312
+ return WORKSPACE_ROOT
313
+
314
+ @property
315
+ def root_path(self) -> Path:
316
+ """Get the root path of the application"""
317
+ return PROJECT_ROOT
318
+
319
+
320
+ config = Config()
OpenManus/app/exceptions.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ToolError(Exception):
2
+ """Raised when a tool encounters an error."""
3
+
4
+ def __init__(self, message):
5
+ self.message = message
6
+
7
+
8
+ class OpenManusError(Exception):
9
+ """Base exception for all OpenManus errors"""
10
+
11
+
12
+ class TokenLimitExceeded(OpenManusError):
13
+ """Exception raised when the token limit is exceeded"""
OpenManus/app/flow/__init__.py ADDED
File without changes
OpenManus/app/flow/base.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, List, Optional, Union
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from app.agent.base import BaseAgent
7
+
8
+
9
+ class BaseFlow(BaseModel, ABC):
10
+ """Base class for execution flows supporting multiple agents"""
11
+
12
+ agents: Dict[str, BaseAgent]
13
+ tools: Optional[List] = None
14
+ primary_agent_key: Optional[str] = None
15
+
16
+ class Config:
17
+ arbitrary_types_allowed = True
18
+
19
+ def __init__(
20
+ self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
21
+ ):
22
+ # Handle different ways of providing agents
23
+ if isinstance(agents, BaseAgent):
24
+ agents_dict = {"default": agents}
25
+ elif isinstance(agents, list):
26
+ agents_dict = {f"agent_{i}": agent for i, agent in enumerate(agents)}
27
+ else:
28
+ agents_dict = agents
29
+
30
+ # If primary agent not specified, use first agent
31
+ primary_key = data.get("primary_agent_key")
32
+ if not primary_key and agents_dict:
33
+ primary_key = next(iter(agents_dict))
34
+ data["primary_agent_key"] = primary_key
35
+
36
+ # Set the agents dictionary
37
+ data["agents"] = agents_dict
38
+
39
+ # Initialize using BaseModel's init
40
+ super().__init__(**data)
41
+
42
+ @property
43
+ def primary_agent(self) -> Optional[BaseAgent]:
44
+ """Get the primary agent for the flow"""
45
+ return self.agents.get(self.primary_agent_key)
46
+
47
+ def get_agent(self, key: str) -> Optional[BaseAgent]:
48
+ """Get a specific agent by key"""
49
+ return self.agents.get(key)
50
+
51
+ def add_agent(self, key: str, agent: BaseAgent) -> None:
52
+ """Add a new agent to the flow"""
53
+ self.agents[key] = agent
54
+
55
+ @abstractmethod
56
+ async def execute(self, input_text: str) -> str:
57
+ """Execute the flow with given input"""
OpenManus/app/flow/flow_factory.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from typing import Dict, List, Union
3
+
4
+ from app.agent.base import BaseAgent
5
+ from app.flow.base import BaseFlow
6
+ from app.flow.planning import PlanningFlow
7
+
8
+
9
+ class FlowType(str, Enum):
10
+ PLANNING = "planning"
11
+
12
+
13
+ class FlowFactory:
14
+ """Factory for creating different types of flows with support for multiple agents"""
15
+
16
+ @staticmethod
17
+ def create_flow(
18
+ flow_type: FlowType,
19
+ agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]],
20
+ **kwargs,
21
+ ) -> BaseFlow:
22
+ flows = {
23
+ FlowType.PLANNING: PlanningFlow,
24
+ }
25
+
26
+ flow_class = flows.get(flow_type)
27
+ if not flow_class:
28
+ raise ValueError(f"Unknown flow type: {flow_type}")
29
+
30
+ return flow_class(agents, **kwargs)
OpenManus/app/flow/planning.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ from enum import Enum
4
+ from typing import Dict, List, Optional, Union
5
+
6
+ from pydantic import Field
7
+
8
+ from app.agent.base import BaseAgent
9
+ from app.flow.base import BaseFlow
10
+ from app.llm import LLM
11
+ from app.logger import logger
12
+ from app.schema import AgentState, Message, ToolChoice
13
+ from app.tool import PlanningTool
14
+
15
+
16
+ class PlanStepStatus(str, Enum):
17
+ """Enum class defining possible statuses of a plan step"""
18
+
19
+ NOT_STARTED = "not_started"
20
+ IN_PROGRESS = "in_progress"
21
+ COMPLETED = "completed"
22
+ BLOCKED = "blocked"
23
+
24
+ @classmethod
25
+ def get_all_statuses(cls) -> list[str]:
26
+ """Return a list of all possible step status values"""
27
+ return [status.value for status in cls]
28
+
29
+ @classmethod
30
+ def get_active_statuses(cls) -> list[str]:
31
+ """Return a list of values representing active statuses (not started or in progress)"""
32
+ return [cls.NOT_STARTED.value, cls.IN_PROGRESS.value]
33
+
34
+ @classmethod
35
+ def get_status_marks(cls) -> Dict[str, str]:
36
+ """Return a mapping of statuses to their marker symbols"""
37
+ return {
38
+ cls.COMPLETED.value: "[✓]",
39
+ cls.IN_PROGRESS.value: "[→]",
40
+ cls.BLOCKED.value: "[!]",
41
+ cls.NOT_STARTED.value: "[ ]",
42
+ }
43
+
44
+
45
+ class PlanningFlow(BaseFlow):
46
+ """A flow that manages planning and execution of tasks using agents."""
47
+
48
+ llm: LLM = Field(default_factory=lambda: LLM())
49
+ planning_tool: PlanningTool = Field(default_factory=PlanningTool)
50
+ executor_keys: List[str] = Field(default_factory=list)
51
+ active_plan_id: str = Field(default_factory=lambda: f"plan_{int(time.time())}")
52
+ current_step_index: Optional[int] = None
53
+
54
+ def __init__(
55
+ self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
56
+ ):
57
+ # Set executor keys before super().__init__
58
+ if "executors" in data:
59
+ data["executor_keys"] = data.pop("executors")
60
+
61
+ # Set plan ID if provided
62
+ if "plan_id" in data:
63
+ data["active_plan_id"] = data.pop("plan_id")
64
+
65
+ # Initialize the planning tool if not provided
66
+ if "planning_tool" not in data:
67
+ planning_tool = PlanningTool()
68
+ data["planning_tool"] = planning_tool
69
+
70
+ # Call parent's init with the processed data
71
+ super().__init__(agents, **data)
72
+
73
+ # Set executor_keys to all agent keys if not specified
74
+ if not self.executor_keys:
75
+ self.executor_keys = list(self.agents.keys())
76
+
77
+ def get_executor(self, step_type: Optional[str] = None) -> BaseAgent:
78
+ """
79
+ Get an appropriate executor agent for the current step.
80
+ Can be extended to select agents based on step type/requirements.
81
+ """
82
+ # If step type is provided and matches an agent key, use that agent
83
+ if step_type and step_type in self.agents:
84
+ return self.agents[step_type]
85
+
86
+ # Otherwise use the first available executor or fall back to primary agent
87
+ for key in self.executor_keys:
88
+ if key in self.agents:
89
+ return self.agents[key]
90
+
91
+ # Fallback to primary agent
92
+ return self.primary_agent
93
+
94
+ async def execute(self, input_text: str) -> str:
95
+ """Execute the planning flow with agents."""
96
+ try:
97
+ if not self.primary_agent:
98
+ raise ValueError("No primary agent available")
99
+
100
+ # Create initial plan if input provided
101
+ if input_text:
102
+ await self._create_initial_plan(input_text)
103
+
104
+ # Verify plan was created successfully
105
+ if self.active_plan_id not in self.planning_tool.plans:
106
+ logger.error(
107
+ f"Plan creation failed. Plan ID {self.active_plan_id} not found in planning tool."
108
+ )
109
+ return f"Failed to create plan for: {input_text}"
110
+
111
+ result = ""
112
+ while True:
113
+ # Get current step to execute
114
+ self.current_step_index, step_info = await self._get_current_step_info()
115
+
116
+ # Exit if no more steps or plan completed
117
+ if self.current_step_index is None:
118
+ result += await self._finalize_plan()
119
+ break
120
+
121
+ # Execute current step with appropriate agent
122
+ step_type = step_info.get("type") if step_info else None
123
+ executor = self.get_executor(step_type)
124
+ step_result = await self._execute_step(executor, step_info)
125
+ result += step_result + "\n"
126
+
127
+ # Check if agent wants to terminate
128
+ if hasattr(executor, "state") and executor.state == AgentState.FINISHED:
129
+ break
130
+
131
+ return result
132
+ except Exception as e:
133
+ logger.error(f"Error in PlanningFlow: {str(e)}")
134
+ return f"Execution failed: {str(e)}"
135
+
136
+ async def _create_initial_plan(self, request: str) -> None:
137
+ """Create an initial plan based on the request using the flow's LLM and PlanningTool."""
138
+ logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
139
+
140
+ # Create a system message for plan creation
141
+ system_message = Message.system_message(
142
+ "You are a planning assistant. Create a concise, actionable plan with clear steps. "
143
+ "Focus on key milestones rather than detailed sub-steps. "
144
+ "Optimize for clarity and efficiency."
145
+ )
146
+
147
+ # Create a user message with the request
148
+ user_message = Message.user_message(
149
+ f"Create a reasonable plan with clear steps to accomplish the task: {request}"
150
+ )
151
+
152
+ # Call LLM with PlanningTool
153
+ response = await self.llm.ask_tool(
154
+ messages=[user_message],
155
+ system_msgs=[system_message],
156
+ tools=[self.planning_tool.to_param()],
157
+ tool_choice=ToolChoice.AUTO,
158
+ )
159
+
160
+ # Process tool calls if present
161
+ if response.tool_calls:
162
+ for tool_call in response.tool_calls:
163
+ if tool_call.function.name == "planning":
164
+ # Parse the arguments
165
+ args = tool_call.function.arguments
166
+ if isinstance(args, str):
167
+ try:
168
+ args = json.loads(args)
169
+ except json.JSONDecodeError:
170
+ logger.error(f"Failed to parse tool arguments: {args}")
171
+ continue
172
+
173
+ # Ensure plan_id is set correctly and execute the tool
174
+ args["plan_id"] = self.active_plan_id
175
+
176
+ # Execute the tool via ToolCollection instead of directly
177
+ result = await self.planning_tool.execute(**args)
178
+
179
+ logger.info(f"Plan creation result: {str(result)}")
180
+ return
181
+
182
+ # If execution reached here, create a default plan
183
+ logger.warning("Creating default plan")
184
+
185
+ # Create default plan using the ToolCollection
186
+ await self.planning_tool.execute(
187
+ **{
188
+ "command": "create",
189
+ "plan_id": self.active_plan_id,
190
+ "title": f"Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
191
+ "steps": ["Analyze request", "Execute task", "Verify results"],
192
+ }
193
+ )
194
+
195
+ async def _get_current_step_info(self) -> tuple[Optional[int], Optional[dict]]:
196
+ """
197
+ Parse the current plan to identify the first non-completed step's index and info.
198
+ Returns (None, None) if no active step is found.
199
+ """
200
+ if (
201
+ not self.active_plan_id
202
+ or self.active_plan_id not in self.planning_tool.plans
203
+ ):
204
+ logger.error(f"Plan with ID {self.active_plan_id} not found")
205
+ return None, None
206
+
207
+ try:
208
+ # Direct access to plan data from planning tool storage
209
+ plan_data = self.planning_tool.plans[self.active_plan_id]
210
+ steps = plan_data.get("steps", [])
211
+ step_statuses = plan_data.get("step_statuses", [])
212
+
213
+ # Find first non-completed step
214
+ for i, step in enumerate(steps):
215
+ if i >= len(step_statuses):
216
+ status = PlanStepStatus.NOT_STARTED.value
217
+ else:
218
+ status = step_statuses[i]
219
+
220
+ if status in PlanStepStatus.get_active_statuses():
221
+ # Extract step type/category if available
222
+ step_info = {"text": step}
223
+
224
+ # Try to extract step type from the text (e.g., [SEARCH] or [CODE])
225
+ import re
226
+
227
+ type_match = re.search(r"\[([A-Z_]+)\]", step)
228
+ if type_match:
229
+ step_info["type"] = type_match.group(1).lower()
230
+
231
+ # Mark current step as in_progress
232
+ try:
233
+ await self.planning_tool.execute(
234
+ command="mark_step",
235
+ plan_id=self.active_plan_id,
236
+ step_index=i,
237
+ step_status=PlanStepStatus.IN_PROGRESS.value,
238
+ )
239
+ except Exception as e:
240
+ logger.warning(f"Error marking step as in_progress: {e}")
241
+ # Update step status directly if needed
242
+ if i < len(step_statuses):
243
+ step_statuses[i] = PlanStepStatus.IN_PROGRESS.value
244
+ else:
245
+ while len(step_statuses) < i:
246
+ step_statuses.append(PlanStepStatus.NOT_STARTED.value)
247
+ step_statuses.append(PlanStepStatus.IN_PROGRESS.value)
248
+
249
+ plan_data["step_statuses"] = step_statuses
250
+
251
+ return i, step_info
252
+
253
+ return None, None # No active step found
254
+
255
+ except Exception as e:
256
+ logger.warning(f"Error finding current step index: {e}")
257
+ return None, None
258
+
259
+ async def _execute_step(self, executor: BaseAgent, step_info: dict) -> str:
260
+ """Execute the current step with the specified agent using agent.run()."""
261
+ # Prepare context for the agent with current plan status
262
+ plan_status = await self._get_plan_text()
263
+ step_text = step_info.get("text", f"Step {self.current_step_index}")
264
+
265
+ # Create a prompt for the agent to execute the current step
266
+ step_prompt = f"""
267
+ CURRENT PLAN STATUS:
268
+ {plan_status}
269
+
270
+ YOUR CURRENT TASK:
271
+ You are now working on step {self.current_step_index}: "{step_text}"
272
+
273
+ Please execute this step using the appropriate tools. When you're done, provide a summary of what you accomplished.
274
+ """
275
+
276
+ # Use agent.run() to execute the step
277
+ try:
278
+ step_result = await executor.run(step_prompt)
279
+
280
+ # Mark the step as completed after successful execution
281
+ await self._mark_step_completed()
282
+
283
+ return step_result
284
+ except Exception as e:
285
+ logger.error(f"Error executing step {self.current_step_index}: {e}")
286
+ return f"Error executing step {self.current_step_index}: {str(e)}"
287
+
288
+ async def _mark_step_completed(self) -> None:
289
+ """Mark the current step as completed."""
290
+ if self.current_step_index is None:
291
+ return
292
+
293
+ try:
294
+ # Mark the step as completed
295
+ await self.planning_tool.execute(
296
+ command="mark_step",
297
+ plan_id=self.active_plan_id,
298
+ step_index=self.current_step_index,
299
+ step_status=PlanStepStatus.COMPLETED.value,
300
+ )
301
+ logger.info(
302
+ f"Marked step {self.current_step_index} as completed in plan {self.active_plan_id}"
303
+ )
304
+ except Exception as e:
305
+ logger.warning(f"Failed to update plan status: {e}")
306
+ # Update step status directly in planning tool storage
307
+ if self.active_plan_id in self.planning_tool.plans:
308
+ plan_data = self.planning_tool.plans[self.active_plan_id]
309
+ step_statuses = plan_data.get("step_statuses", [])
310
+
311
+ # Ensure the step_statuses list is long enough
312
+ while len(step_statuses) <= self.current_step_index:
313
+ step_statuses.append(PlanStepStatus.NOT_STARTED.value)
314
+
315
+ # Update the status
316
+ step_statuses[self.current_step_index] = PlanStepStatus.COMPLETED.value
317
+ plan_data["step_statuses"] = step_statuses
318
+
319
+ async def _get_plan_text(self) -> str:
320
+ """Get the current plan as formatted text."""
321
+ try:
322
+ result = await self.planning_tool.execute(
323
+ command="get", plan_id=self.active_plan_id
324
+ )
325
+ return result.output if hasattr(result, "output") else str(result)
326
+ except Exception as e:
327
+ logger.error(f"Error getting plan: {e}")
328
+ return self._generate_plan_text_from_storage()
329
+
330
+ def _generate_plan_text_from_storage(self) -> str:
331
+ """Generate plan text directly from storage if the planning tool fails."""
332
+ try:
333
+ if self.active_plan_id not in self.planning_tool.plans:
334
+ return f"Error: Plan with ID {self.active_plan_id} not found"
335
+
336
+ plan_data = self.planning_tool.plans[self.active_plan_id]
337
+ title = plan_data.get("title", "Untitled Plan")
338
+ steps = plan_data.get("steps", [])
339
+ step_statuses = plan_data.get("step_statuses", [])
340
+ step_notes = plan_data.get("step_notes", [])
341
+
342
+ # Ensure step_statuses and step_notes match the number of steps
343
+ while len(step_statuses) < len(steps):
344
+ step_statuses.append(PlanStepStatus.NOT_STARTED.value)
345
+ while len(step_notes) < len(steps):
346
+ step_notes.append("")
347
+
348
+ # Count steps by status
349
+ status_counts = {status: 0 for status in PlanStepStatus.get_all_statuses()}
350
+
351
+ for status in step_statuses:
352
+ if status in status_counts:
353
+ status_counts[status] += 1
354
+
355
+ completed = status_counts[PlanStepStatus.COMPLETED.value]
356
+ total = len(steps)
357
+ progress = (completed / total) * 100 if total > 0 else 0
358
+
359
+ plan_text = f"Plan: {title} (ID: {self.active_plan_id})\n"
360
+ plan_text += "=" * len(plan_text) + "\n\n"
361
+
362
+ plan_text += (
363
+ f"Progress: {completed}/{total} steps completed ({progress:.1f}%)\n"
364
+ )
365
+ plan_text += f"Status: {status_counts[PlanStepStatus.COMPLETED.value]} completed, {status_counts[PlanStepStatus.IN_PROGRESS.value]} in progress, "
366
+ plan_text += f"{status_counts[PlanStepStatus.BLOCKED.value]} blocked, {status_counts[PlanStepStatus.NOT_STARTED.value]} not started\n\n"
367
+ plan_text += "Steps:\n"
368
+
369
+ status_marks = PlanStepStatus.get_status_marks()
370
+
371
+ for i, (step, status, notes) in enumerate(
372
+ zip(steps, step_statuses, step_notes)
373
+ ):
374
+ # Use status marks to indicate step status
375
+ status_mark = status_marks.get(
376
+ status, status_marks[PlanStepStatus.NOT_STARTED.value]
377
+ )
378
+
379
+ plan_text += f"{i}. {status_mark} {step}\n"
380
+ if notes:
381
+ plan_text += f" Notes: {notes}\n"
382
+
383
+ return plan_text
384
+ except Exception as e:
385
+ logger.error(f"Error generating plan text from storage: {e}")
386
+ return f"Error: Unable to retrieve plan with ID {self.active_plan_id}"
387
+
388
+ async def _finalize_plan(self) -> str:
389
+ """Finalize the plan and provide a summary using the flow's LLM directly."""
390
+ plan_text = await self._get_plan_text()
391
+
392
+ # Create a summary using the flow's LLM directly
393
+ try:
394
+ system_message = Message.system_message(
395
+ "You are a planning assistant. Your task is to summarize the completed plan."
396
+ )
397
+
398
+ user_message = Message.user_message(
399
+ f"The plan has been completed. Here is the final plan status:\n\n{plan_text}\n\nPlease provide a summary of what was accomplished and any final thoughts."
400
+ )
401
+
402
+ response = await self.llm.ask(
403
+ messages=[user_message], system_msgs=[system_message]
404
+ )
405
+
406
+ return f"Plan completed:\n\n{response}"
407
+ except Exception as e:
408
+ logger.error(f"Error finalizing plan with LLM: {e}")
409
+
410
+ # Fallback to using an agent for the summary
411
+ try:
412
+ agent = self.primary_agent
413
+ summary_prompt = f"""
414
+ The plan has been completed. Here is the final plan status:
415
+
416
+ {plan_text}
417
+
418
+ Please provide a summary of what was accomplished and any final thoughts.
419
+ """
420
+ summary = await agent.run(summary_prompt)
421
+ return f"Plan completed:\n\n{summary}"
422
+ except Exception as e2:
423
+ logger.error(f"Error finalizing plan with agent: {e2}")
424
+ return "Plan completed. Error generating summary."
OpenManus/app/llm.py ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Dict, List, Optional, Union
3
+
4
+ import tiktoken
5
+ from openai import (
6
+ APIError,
7
+ AsyncAzureOpenAI,
8
+ AsyncOpenAI,
9
+ AuthenticationError,
10
+ OpenAIError,
11
+ RateLimitError,
12
+ )
13
+ from openai.types.chat import ChatCompletion, ChatCompletionMessage
14
+ from tenacity import (
15
+ retry,
16
+ retry_if_exception_type,
17
+ stop_after_attempt,
18
+ wait_random_exponential,
19
+ )
20
+
21
+ from app.bedrock import BedrockClient
22
+ from app.config import LLMSettings, config
23
+ from app.exceptions import TokenLimitExceeded
24
+ from app.logger import logger # Assuming a logger is set up in your app
25
+ from app.schema import (
26
+ ROLE_VALUES,
27
+ TOOL_CHOICE_TYPE,
28
+ TOOL_CHOICE_VALUES,
29
+ Message,
30
+ ToolChoice,
31
+ )
32
+
33
+
34
+ REASONING_MODELS = ["o1", "o3-mini"]
35
+ MULTIMODAL_MODELS = [
36
+ "gpt-4-vision-preview",
37
+ "gpt-4o",
38
+ "gpt-4o-mini",
39
+ "claude-3-opus-20240229",
40
+ "claude-3-sonnet-20240229",
41
+ "claude-3-haiku-20240307",
42
+ ]
43
+
44
+
45
+ class TokenCounter:
46
+ # Token constants
47
+ BASE_MESSAGE_TOKENS = 4
48
+ FORMAT_TOKENS = 2
49
+ LOW_DETAIL_IMAGE_TOKENS = 85
50
+ HIGH_DETAIL_TILE_TOKENS = 170
51
+
52
+ # Image processing constants
53
+ MAX_SIZE = 2048
54
+ HIGH_DETAIL_TARGET_SHORT_SIDE = 768
55
+ TILE_SIZE = 512
56
+
57
+ def __init__(self, tokenizer):
58
+ self.tokenizer = tokenizer
59
+
60
+ def count_text(self, text: str) -> int:
61
+ """Calculate tokens for a text string"""
62
+ return 0 if not text else len(self.tokenizer.encode(text))
63
+
64
+ def count_image(self, image_item: dict) -> int:
65
+ """
66
+ Calculate tokens for an image based on detail level and dimensions
67
+
68
+ For "low" detail: fixed 85 tokens
69
+ For "high" detail:
70
+ 1. Scale to fit in 2048x2048 square
71
+ 2. Scale shortest side to 768px
72
+ 3. Count 512px tiles (170 tokens each)
73
+ 4. Add 85 tokens
74
+ """
75
+ detail = image_item.get("detail", "medium")
76
+
77
+ # For low detail, always return fixed token count
78
+ if detail == "low":
79
+ return self.LOW_DETAIL_IMAGE_TOKENS
80
+
81
+ # For medium detail (default in OpenAI), use high detail calculation
82
+ # OpenAI doesn't specify a separate calculation for medium
83
+
84
+ # For high detail, calculate based on dimensions if available
85
+ if detail == "high" or detail == "medium":
86
+ # If dimensions are provided in the image_item
87
+ if "dimensions" in image_item:
88
+ width, height = image_item["dimensions"]
89
+ return self._calculate_high_detail_tokens(width, height)
90
+
91
+ # Default values when dimensions aren't available or detail level is unknown
92
+ if detail == "high":
93
+ # Default to a 1024x1024 image calculation for high detail
94
+ return self._calculate_high_detail_tokens(1024, 1024) # 765 tokens
95
+ elif detail == "medium":
96
+ # Default to a medium-sized image for medium detail
97
+ return 1024 # This matches the original default
98
+ else:
99
+ # For unknown detail levels, use medium as default
100
+ return 1024
101
+
102
+ def _calculate_high_detail_tokens(self, width: int, height: int) -> int:
103
+ """Calculate tokens for high detail images based on dimensions"""
104
+ # Step 1: Scale to fit in MAX_SIZE x MAX_SIZE square
105
+ if width > self.MAX_SIZE or height > self.MAX_SIZE:
106
+ scale = self.MAX_SIZE / max(width, height)
107
+ width = int(width * scale)
108
+ height = int(height * scale)
109
+
110
+ # Step 2: Scale so shortest side is HIGH_DETAIL_TARGET_SHORT_SIDE
111
+ scale = self.HIGH_DETAIL_TARGET_SHORT_SIDE / min(width, height)
112
+ scaled_width = int(width * scale)
113
+ scaled_height = int(height * scale)
114
+
115
+ # Step 3: Count number of 512px tiles
116
+ tiles_x = math.ceil(scaled_width / self.TILE_SIZE)
117
+ tiles_y = math.ceil(scaled_height / self.TILE_SIZE)
118
+ total_tiles = tiles_x * tiles_y
119
+
120
+ # Step 4: Calculate final token count
121
+ return (
122
+ total_tiles * self.HIGH_DETAIL_TILE_TOKENS
123
+ ) + self.LOW_DETAIL_IMAGE_TOKENS
124
+
125
+ def count_content(self, content: Union[str, List[Union[str, dict]]]) -> int:
126
+ """Calculate tokens for message content"""
127
+ if not content:
128
+ return 0
129
+
130
+ if isinstance(content, str):
131
+ return self.count_text(content)
132
+
133
+ token_count = 0
134
+ for item in content:
135
+ if isinstance(item, str):
136
+ token_count += self.count_text(item)
137
+ elif isinstance(item, dict):
138
+ if "text" in item:
139
+ token_count += self.count_text(item["text"])
140
+ elif "image_url" in item:
141
+ token_count += self.count_image(item)
142
+ return token_count
143
+
144
+ def count_tool_calls(self, tool_calls: List[dict]) -> int:
145
+ """Calculate tokens for tool calls"""
146
+ token_count = 0
147
+ for tool_call in tool_calls:
148
+ if "function" in tool_call:
149
+ function = tool_call["function"]
150
+ token_count += self.count_text(function.get("name", ""))
151
+ token_count += self.count_text(function.get("arguments", ""))
152
+ return token_count
153
+
154
+ def count_message_tokens(self, messages: List[dict]) -> int:
155
+ """Calculate the total number of tokens in a message list"""
156
+ total_tokens = self.FORMAT_TOKENS # Base format tokens
157
+
158
+ for message in messages:
159
+ tokens = self.BASE_MESSAGE_TOKENS # Base tokens per message
160
+
161
+ # Add role tokens
162
+ tokens += self.count_text(message.get("role", ""))
163
+
164
+ # Add content tokens
165
+ if "content" in message:
166
+ tokens += self.count_content(message["content"])
167
+
168
+ # Add tool calls tokens
169
+ if "tool_calls" in message:
170
+ tokens += self.count_tool_calls(message["tool_calls"])
171
+
172
+ # Add name and tool_call_id tokens
173
+ tokens += self.count_text(message.get("name", ""))
174
+ tokens += self.count_text(message.get("tool_call_id", ""))
175
+
176
+ total_tokens += tokens
177
+
178
+ return total_tokens
179
+
180
+
181
+ class LLM:
182
+ _instances: Dict[str, "LLM"] = {}
183
+
184
+ def __new__(
185
+ cls, config_name: str = "default", llm_config: Optional[LLMSettings] = None
186
+ ):
187
+ if config_name not in cls._instances:
188
+ instance = super().__new__(cls)
189
+ instance.__init__(config_name, llm_config)
190
+ cls._instances[config_name] = instance
191
+ return cls._instances[config_name]
192
+
193
+ def __init__(
194
+ self, config_name: str = "default", llm_config: Optional[LLMSettings] = None
195
+ ):
196
+ if not hasattr(self, "client"): # Only initialize if not already initialized
197
+ llm_config = llm_config or config.llm
198
+ llm_config = llm_config.get(config_name, llm_config["default"])
199
+ self.model = llm_config.model
200
+ self.max_tokens = llm_config.max_tokens
201
+ self.temperature = llm_config.temperature
202
+ self.api_type = llm_config.api_type
203
+ self.api_key = llm_config.api_key
204
+ self.api_version = llm_config.api_version
205
+ self.base_url = llm_config.base_url
206
+
207
+ # Add token counting related attributes
208
+ self.total_input_tokens = 0
209
+ self.total_completion_tokens = 0
210
+ self.max_input_tokens = (
211
+ llm_config.max_input_tokens
212
+ if hasattr(llm_config, "max_input_tokens")
213
+ else None
214
+ )
215
+
216
+ # Initialize tokenizer
217
+ try:
218
+ self.tokenizer = tiktoken.encoding_for_model(self.model)
219
+ except KeyError:
220
+ # If the model is not in tiktoken's presets, use cl100k_base as default
221
+ self.tokenizer = tiktoken.get_encoding("cl100k_base")
222
+
223
+ if self.api_type == "azure":
224
+ self.client = AsyncAzureOpenAI(
225
+ base_url=self.base_url,
226
+ api_key=self.api_key,
227
+ api_version=self.api_version,
228
+ )
229
+ elif self.api_type == "aws":
230
+ self.client = BedrockClient()
231
+ else:
232
+ self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
233
+
234
+ self.token_counter = TokenCounter(self.tokenizer)
235
+
236
+ def count_tokens(self, text: str) -> int:
237
+ """Calculate the number of tokens in a text"""
238
+ if not text:
239
+ return 0
240
+ return len(self.tokenizer.encode(text))
241
+
242
+ def count_message_tokens(self, messages: List[dict]) -> int:
243
+ return self.token_counter.count_message_tokens(messages)
244
+
245
+ def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None:
246
+ """Update token counts"""
247
+ # Only track tokens if max_input_tokens is set
248
+ self.total_input_tokens += input_tokens
249
+ self.total_completion_tokens += completion_tokens
250
+ logger.info(
251
+ f"Token usage: Input={input_tokens}, Completion={completion_tokens}, "
252
+ f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, "
253
+ f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}"
254
+ )
255
+
256
+ def check_token_limit(self, input_tokens: int) -> bool:
257
+ """Check if token limits are exceeded"""
258
+ if self.max_input_tokens is not None:
259
+ return (self.total_input_tokens + input_tokens) <= self.max_input_tokens
260
+ # If max_input_tokens is not set, always return True
261
+ return True
262
+
263
+ def get_limit_error_message(self, input_tokens: int) -> str:
264
+ """Generate error message for token limit exceeded"""
265
+ if (
266
+ self.max_input_tokens is not None
267
+ and (self.total_input_tokens + input_tokens) > self.max_input_tokens
268
+ ):
269
+ return f"Request may exceed input token limit (Current: {self.total_input_tokens}, Needed: {input_tokens}, Max: {self.max_input_tokens})"
270
+
271
+ return "Token limit exceeded"
272
+
273
+ @staticmethod
274
+ def format_messages(
275
+ messages: List[Union[dict, Message]], supports_images: bool = False
276
+ ) -> List[dict]:
277
+ """
278
+ Format messages for LLM by converting them to OpenAI message format.
279
+
280
+ Args:
281
+ messages: List of messages that can be either dict or Message objects
282
+ supports_images: Flag indicating if the target model supports image inputs
283
+
284
+ Returns:
285
+ List[dict]: List of formatted messages in OpenAI format
286
+
287
+ Raises:
288
+ ValueError: If messages are invalid or missing required fields
289
+ TypeError: If unsupported message types are provided
290
+
291
+ Examples:
292
+ >>> msgs = [
293
+ ... Message.system_message("You are a helpful assistant"),
294
+ ... {"role": "user", "content": "Hello"},
295
+ ... Message.user_message("How are you?")
296
+ ... ]
297
+ >>> formatted = LLM.format_messages(msgs)
298
+ """
299
+ formatted_messages = []
300
+
301
+ for message in messages:
302
+ # Convert Message objects to dictionaries
303
+ if isinstance(message, Message):
304
+ message = message.to_dict()
305
+
306
+ if isinstance(message, dict):
307
+ # If message is a dict, ensure it has required fields
308
+ if "role" not in message:
309
+ raise ValueError("Message dict must contain 'role' field")
310
+
311
+ # Process base64 images if present and model supports images
312
+ if supports_images and message.get("base64_image"):
313
+ # Initialize or convert content to appropriate format
314
+ if not message.get("content"):
315
+ message["content"] = []
316
+ elif isinstance(message["content"], str):
317
+ message["content"] = [
318
+ {"type": "text", "text": message["content"]}
319
+ ]
320
+ elif isinstance(message["content"], list):
321
+ # Convert string items to proper text objects
322
+ message["content"] = [
323
+ (
324
+ {"type": "text", "text": item}
325
+ if isinstance(item, str)
326
+ else item
327
+ )
328
+ for item in message["content"]
329
+ ]
330
+
331
+ # Add the image to content
332
+ message["content"].append(
333
+ {
334
+ "type": "image_url",
335
+ "image_url": {
336
+ "url": f"data:image/jpeg;base64,{message['base64_image']}"
337
+ },
338
+ }
339
+ )
340
+
341
+ # Remove the base64_image field
342
+ del message["base64_image"]
343
+ # If model doesn't support images but message has base64_image, handle gracefully
344
+ elif not supports_images and message.get("base64_image"):
345
+ # Just remove the base64_image field and keep the text content
346
+ del message["base64_image"]
347
+
348
+ if "content" in message or "tool_calls" in message:
349
+ formatted_messages.append(message)
350
+ # else: do not include the message
351
+ else:
352
+ raise TypeError(f"Unsupported message type: {type(message)}")
353
+
354
+ # Validate all messages have required fields
355
+ for msg in formatted_messages:
356
+ if msg["role"] not in ROLE_VALUES:
357
+ raise ValueError(f"Invalid role: {msg['role']}")
358
+
359
+ return formatted_messages
360
+
361
+ @retry(
362
+ wait=wait_random_exponential(min=1, max=60),
363
+ stop=stop_after_attempt(6),
364
+ retry=retry_if_exception_type(
365
+ (OpenAIError, Exception, ValueError)
366
+ ), # Don't retry TokenLimitExceeded
367
+ )
368
+ async def ask(
369
+ self,
370
+ messages: List[Union[dict, Message]],
371
+ system_msgs: Optional[List[Union[dict, Message]]] = None,
372
+ stream: bool = True,
373
+ temperature: Optional[float] = None,
374
+ ) -> str:
375
+ """
376
+ Send a prompt to the LLM and get the response.
377
+
378
+ Args:
379
+ messages: List of conversation messages
380
+ system_msgs: Optional system messages to prepend
381
+ stream (bool): Whether to stream the response
382
+ temperature (float): Sampling temperature for the response
383
+
384
+ Returns:
385
+ str: The generated response
386
+
387
+ Raises:
388
+ TokenLimitExceeded: If token limits are exceeded
389
+ ValueError: If messages are invalid or response is empty
390
+ OpenAIError: If API call fails after retries
391
+ Exception: For unexpected errors
392
+ """
393
+ try:
394
+ # Check if the model supports images
395
+ supports_images = self.model in MULTIMODAL_MODELS
396
+
397
+ # Format system and user messages with image support check
398
+ if system_msgs:
399
+ system_msgs = self.format_messages(system_msgs, supports_images)
400
+ messages = system_msgs + self.format_messages(messages, supports_images)
401
+ else:
402
+ messages = self.format_messages(messages, supports_images)
403
+
404
+ # Calculate input token count
405
+ input_tokens = self.count_message_tokens(messages)
406
+
407
+ # Check if token limits are exceeded
408
+ if not self.check_token_limit(input_tokens):
409
+ error_message = self.get_limit_error_message(input_tokens)
410
+ # Raise a special exception that won't be retried
411
+ raise TokenLimitExceeded(error_message)
412
+
413
+ params = {
414
+ "model": self.model,
415
+ "messages": messages,
416
+ }
417
+
418
+ if self.model in REASONING_MODELS:
419
+ params["max_completion_tokens"] = self.max_tokens
420
+ else:
421
+ params["max_tokens"] = self.max_tokens
422
+ params["temperature"] = (
423
+ temperature if temperature is not None else self.temperature
424
+ )
425
+
426
+ if not stream:
427
+ # Non-streaming request
428
+ response = await self.client.chat.completions.create(
429
+ **params, stream=False
430
+ )
431
+
432
+ if not response.choices or not response.choices[0].message.content:
433
+ raise ValueError("Empty or invalid response from LLM")
434
+
435
+ # Update token counts
436
+ self.update_token_count(
437
+ response.usage.prompt_tokens, response.usage.completion_tokens
438
+ )
439
+
440
+ return response.choices[0].message.content
441
+
442
+ # Streaming request, For streaming, update estimated token count before making the request
443
+ self.update_token_count(input_tokens)
444
+
445
+ response = await self.client.chat.completions.create(**params, stream=True)
446
+
447
+ collected_messages = []
448
+ completion_text = ""
449
+ async for chunk in response:
450
+ chunk_message = chunk.choices[0].delta.content or ""
451
+ collected_messages.append(chunk_message)
452
+ completion_text += chunk_message
453
+ print(chunk_message, end="", flush=True)
454
+
455
+ print() # Newline after streaming
456
+ full_response = "".join(collected_messages).strip()
457
+ if not full_response:
458
+ raise ValueError("Empty response from streaming LLM")
459
+
460
+ # estimate completion tokens for streaming response
461
+ completion_tokens = self.count_tokens(completion_text)
462
+ logger.info(
463
+ f"Estimated completion tokens for streaming response: {completion_tokens}"
464
+ )
465
+ self.total_completion_tokens += completion_tokens
466
+
467
+ return full_response
468
+
469
+ except TokenLimitExceeded:
470
+ # Re-raise token limit errors without logging
471
+ raise
472
+ except ValueError:
473
+ logger.exception(f"Validation error")
474
+ raise
475
+ except OpenAIError as oe:
476
+ logger.exception(f"OpenAI API error")
477
+ if isinstance(oe, AuthenticationError):
478
+ logger.error("Authentication failed. Check API key.")
479
+ elif isinstance(oe, RateLimitError):
480
+ logger.error("Rate limit exceeded. Consider increasing retry attempts.")
481
+ elif isinstance(oe, APIError):
482
+ logger.error(f"API error: {oe}")
483
+ raise
484
+ except Exception:
485
+ logger.exception(f"Unexpected error in ask")
486
+ raise
487
+
488
+ @retry(
489
+ wait=wait_random_exponential(min=1, max=60),
490
+ stop=stop_after_attempt(6),
491
+ retry=retry_if_exception_type(
492
+ (OpenAIError, Exception, ValueError)
493
+ ), # Don't retry TokenLimitExceeded
494
+ )
495
+ async def ask_with_images(
496
+ self,
497
+ messages: List[Union[dict, Message]],
498
+ images: List[Union[str, dict]],
499
+ system_msgs: Optional[List[Union[dict, Message]]] = None,
500
+ stream: bool = False,
501
+ temperature: Optional[float] = None,
502
+ ) -> str:
503
+ """
504
+ Send a prompt with images to the LLM and get the response.
505
+
506
+ Args:
507
+ messages: List of conversation messages
508
+ images: List of image URLs or image data dictionaries
509
+ system_msgs: Optional system messages to prepend
510
+ stream (bool): Whether to stream the response
511
+ temperature (float): Sampling temperature for the response
512
+
513
+ Returns:
514
+ str: The generated response
515
+
516
+ Raises:
517
+ TokenLimitExceeded: If token limits are exceeded
518
+ ValueError: If messages are invalid or response is empty
519
+ OpenAIError: If API call fails after retries
520
+ Exception: For unexpected errors
521
+ """
522
+ try:
523
+ # For ask_with_images, we always set supports_images to True because
524
+ # this method should only be called with models that support images
525
+ if self.model not in MULTIMODAL_MODELS:
526
+ raise ValueError(
527
+ f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}"
528
+ )
529
+
530
+ # Format messages with image support
531
+ formatted_messages = self.format_messages(messages, supports_images=True)
532
+
533
+ # Ensure the last message is from the user to attach images
534
+ if not formatted_messages or formatted_messages[-1]["role"] != "user":
535
+ raise ValueError(
536
+ "The last message must be from the user to attach images"
537
+ )
538
+
539
+ # Process the last user message to include images
540
+ last_message = formatted_messages[-1]
541
+
542
+ # Convert content to multimodal format if needed
543
+ content = last_message["content"]
544
+ multimodal_content = (
545
+ [{"type": "text", "text": content}]
546
+ if isinstance(content, str)
547
+ else content
548
+ if isinstance(content, list)
549
+ else []
550
+ )
551
+
552
+ # Add images to content
553
+ for image in images:
554
+ if isinstance(image, str):
555
+ multimodal_content.append(
556
+ {"type": "image_url", "image_url": {"url": image}}
557
+ )
558
+ elif isinstance(image, dict) and "url" in image:
559
+ multimodal_content.append({"type": "image_url", "image_url": image})
560
+ elif isinstance(image, dict) and "image_url" in image:
561
+ multimodal_content.append(image)
562
+ else:
563
+ raise ValueError(f"Unsupported image format: {image}")
564
+
565
+ # Update the message with multimodal content
566
+ last_message["content"] = multimodal_content
567
+
568
+ # Add system messages if provided
569
+ if system_msgs:
570
+ all_messages = (
571
+ self.format_messages(system_msgs, supports_images=True)
572
+ + formatted_messages
573
+ )
574
+ else:
575
+ all_messages = formatted_messages
576
+
577
+ # Calculate tokens and check limits
578
+ input_tokens = self.count_message_tokens(all_messages)
579
+ if not self.check_token_limit(input_tokens):
580
+ raise TokenLimitExceeded(self.get_limit_error_message(input_tokens))
581
+
582
+ # Set up API parameters
583
+ params = {
584
+ "model": self.model,
585
+ "messages": all_messages,
586
+ "stream": stream,
587
+ }
588
+
589
+ # Add model-specific parameters
590
+ if self.model in REASONING_MODELS:
591
+ params["max_completion_tokens"] = self.max_tokens
592
+ else:
593
+ params["max_tokens"] = self.max_tokens
594
+ params["temperature"] = (
595
+ temperature if temperature is not None else self.temperature
596
+ )
597
+
598
+ # Handle non-streaming request
599
+ if not stream:
600
+ response = await self.client.chat.completions.create(**params)
601
+
602
+ if not response.choices or not response.choices[0].message.content:
603
+ raise ValueError("Empty or invalid response from LLM")
604
+
605
+ self.update_token_count(response.usage.prompt_tokens)
606
+ return response.choices[0].message.content
607
+
608
+ # Handle streaming request
609
+ self.update_token_count(input_tokens)
610
+ response = await self.client.chat.completions.create(**params)
611
+
612
+ collected_messages = []
613
+ async for chunk in response:
614
+ chunk_message = chunk.choices[0].delta.content or ""
615
+ collected_messages.append(chunk_message)
616
+ print(chunk_message, end="", flush=True)
617
+
618
+ print() # Newline after streaming
619
+ full_response = "".join(collected_messages).strip()
620
+
621
+ if not full_response:
622
+ raise ValueError("Empty response from streaming LLM")
623
+
624
+ return full_response
625
+
626
+ except TokenLimitExceeded:
627
+ raise
628
+ except ValueError as ve:
629
+ logger.error(f"Validation error in ask_with_images: {ve}")
630
+ raise
631
+ except OpenAIError as oe:
632
+ logger.error(f"OpenAI API error: {oe}")
633
+ if isinstance(oe, AuthenticationError):
634
+ logger.error("Authentication failed. Check API key.")
635
+ elif isinstance(oe, RateLimitError):
636
+ logger.error("Rate limit exceeded. Consider increasing retry attempts.")
637
+ elif isinstance(oe, APIError):
638
+ logger.error(f"API error: {oe}")
639
+ raise
640
+ except Exception as e:
641
+ logger.error(f"Unexpected error in ask_with_images: {e}")
642
+ raise
643
+
644
+ @retry(
645
+ wait=wait_random_exponential(min=1, max=60),
646
+ stop=stop_after_attempt(6),
647
+ retry=retry_if_exception_type(
648
+ (OpenAIError, Exception, ValueError)
649
+ ), # Don't retry TokenLimitExceeded
650
+ )
651
+ async def ask_tool(
652
+ self,
653
+ messages: List[Union[dict, Message]],
654
+ system_msgs: Optional[List[Union[dict, Message]]] = None,
655
+ timeout: int = 300,
656
+ tools: Optional[List[dict]] = None,
657
+ tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO, # type: ignore
658
+ temperature: Optional[float] = None,
659
+ **kwargs,
660
+ ) -> ChatCompletionMessage | None:
661
+ """
662
+ Ask LLM using functions/tools and return the response.
663
+
664
+ Args:
665
+ messages: List of conversation messages
666
+ system_msgs: Optional system messages to prepend
667
+ timeout: Request timeout in seconds
668
+ tools: List of tools to use
669
+ tool_choice: Tool choice strategy
670
+ temperature: Sampling temperature for the response
671
+ **kwargs: Additional completion arguments
672
+
673
+ Returns:
674
+ ChatCompletionMessage: The model's response
675
+
676
+ Raises:
677
+ TokenLimitExceeded: If token limits are exceeded
678
+ ValueError: If tools, tool_choice, or messages are invalid
679
+ OpenAIError: If API call fails after retries
680
+ Exception: For unexpected errors
681
+ """
682
+ try:
683
+ # Validate tool_choice
684
+ if tool_choice not in TOOL_CHOICE_VALUES:
685
+ raise ValueError(f"Invalid tool_choice: {tool_choice}")
686
+
687
+ # Check if the model supports images
688
+ supports_images = self.model in MULTIMODAL_MODELS
689
+
690
+ # Format messages
691
+ if system_msgs:
692
+ system_msgs = self.format_messages(system_msgs, supports_images)
693
+ messages = system_msgs + self.format_messages(messages, supports_images)
694
+ else:
695
+ messages = self.format_messages(messages, supports_images)
696
+
697
+ # Calculate input token count
698
+ input_tokens = self.count_message_tokens(messages)
699
+
700
+ # If there are tools, calculate token count for tool descriptions
701
+ tools_tokens = 0
702
+ if tools:
703
+ for tool in tools:
704
+ tools_tokens += self.count_tokens(str(tool))
705
+
706
+ input_tokens += tools_tokens
707
+
708
+ # Check if token limits are exceeded
709
+ if not self.check_token_limit(input_tokens):
710
+ error_message = self.get_limit_error_message(input_tokens)
711
+ # Raise a special exception that won't be retried
712
+ raise TokenLimitExceeded(error_message)
713
+
714
+ # Validate tools if provided
715
+ if tools:
716
+ for tool in tools:
717
+ if not isinstance(tool, dict) or "type" not in tool:
718
+ raise ValueError("Each tool must be a dict with 'type' field")
719
+
720
+ # Set up the completion request
721
+ params = {
722
+ "model": self.model,
723
+ "messages": messages,
724
+ "tools": tools,
725
+ "tool_choice": tool_choice,
726
+ "timeout": timeout,
727
+ **kwargs,
728
+ }
729
+
730
+ if self.model in REASONING_MODELS:
731
+ params["max_completion_tokens"] = self.max_tokens
732
+ else:
733
+ params["max_tokens"] = self.max_tokens
734
+ params["temperature"] = (
735
+ temperature if temperature is not None else self.temperature
736
+ )
737
+
738
+ params["stream"] = False # Always use non-streaming for tool requests
739
+ response: ChatCompletion = await self.client.chat.completions.create(
740
+ **params
741
+ )
742
+
743
+ # Check if response is valid
744
+ if not response.choices or not response.choices[0].message:
745
+ print(response)
746
+ # raise ValueError("Invalid or empty response from LLM")
747
+ return None
748
+
749
+ # Update token counts
750
+ self.update_token_count(
751
+ response.usage.prompt_tokens, response.usage.completion_tokens
752
+ )
753
+
754
+ return response.choices[0].message
755
+
756
+ except TokenLimitExceeded:
757
+ # Re-raise token limit errors without logging
758
+ raise
759
+ except ValueError as ve:
760
+ logger.error(f"Validation error in ask_tool: {ve}")
761
+ raise
762
+ except OpenAIError as oe:
763
+ logger.error(f"OpenAI API error: {oe}")
764
+ if isinstance(oe, AuthenticationError):
765
+ logger.error("Authentication failed. Check API key.")
766
+ elif isinstance(oe, RateLimitError):
767
+ logger.error("Rate limit exceeded. Consider increasing retry attempts.")
768
+ elif isinstance(oe, APIError):
769
+ logger.error(f"API error: {oe}")
770
+ raise
771
+ except Exception as e:
772
+ logger.error(f"Unexpected error in ask_tool: {e}")
773
+ raise
OpenManus/app/logger.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from datetime import datetime
3
+
4
+ from loguru import logger as _logger
5
+
6
+ from app.config import PROJECT_ROOT
7
+
8
+
9
+ _print_level = "INFO"
10
+
11
+
12
+ def define_log_level(print_level="INFO", logfile_level="DEBUG", name: str = None):
13
+ """Adjust the log level to above level"""
14
+ global _print_level
15
+ _print_level = print_level
16
+
17
+ current_date = datetime.now()
18
+ formatted_date = current_date.strftime("%Y%m%d%H%M%S")
19
+ log_name = (
20
+ f"{name}_{formatted_date}" if name else formatted_date
21
+ ) # name a log with prefix name
22
+
23
+ _logger.remove()
24
+ _logger.add(sys.stderr, level=print_level)
25
+ _logger.add(PROJECT_ROOT / f"logs/{log_name}.log", level=logfile_level)
26
+ return _logger
27
+
28
+
29
+ logger = define_log_level()
30
+
31
+
32
+ if __name__ == "__main__":
33
+ logger.info("Starting application")
34
+ logger.debug("Debug message")
35
+ logger.warning("Warning message")
36
+ logger.error("Error message")
37
+ logger.critical("Critical message")
38
+
39
+ try:
40
+ raise ValueError("Test error")
41
+ except Exception as e:
42
+ logger.exception(f"An error occurred: {e}")
OpenManus/app/mcp/__init__.py ADDED
File without changes
OpenManus/app/mcp/server.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+
4
+
5
+ logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(sys.stderr)])
6
+
7
+ import argparse
8
+ import asyncio
9
+ import atexit
10
+ import json
11
+ from inspect import Parameter, Signature
12
+ from typing import Any, Dict, Optional
13
+
14
+ from mcp.server.fastmcp import FastMCP
15
+
16
+ from app.logger import logger
17
+ from app.tool.base import BaseTool
18
+ from app.tool.bash import Bash
19
+ from app.tool.browser_use_tool import BrowserUseTool
20
+ from app.tool.str_replace_editor import StrReplaceEditor
21
+ from app.tool.terminate import Terminate
22
+
23
+
24
+ class MCPServer:
25
+ """MCP Server implementation with tool registration and management."""
26
+
27
+ def __init__(self, name: str = "openmanus"):
28
+ self.server = FastMCP(name)
29
+ self.tools: Dict[str, BaseTool] = {}
30
+
31
+ # Initialize standard tools
32
+ self.tools["bash"] = Bash()
33
+ self.tools["browser"] = BrowserUseTool()
34
+ self.tools["editor"] = StrReplaceEditor()
35
+ self.tools["terminate"] = Terminate()
36
+
37
+ def register_tool(self, tool: BaseTool, method_name: Optional[str] = None) -> None:
38
+ """Register a tool with parameter validation and documentation."""
39
+ tool_name = method_name or tool.name
40
+ tool_param = tool.to_param()
41
+ tool_function = tool_param["function"]
42
+
43
+ # Define the async function to be registered
44
+ async def tool_method(**kwargs):
45
+ logger.info(f"Executing {tool_name}: {kwargs}")
46
+ result = await tool.execute(**kwargs)
47
+
48
+ logger.info(f"Result of {tool_name}: {result}")
49
+
50
+ # Handle different types of results (match original logic)
51
+ if hasattr(result, "model_dump"):
52
+ return json.dumps(result.model_dump())
53
+ elif isinstance(result, dict):
54
+ return json.dumps(result)
55
+ return result
56
+
57
+ # Set method metadata
58
+ tool_method.__name__ = tool_name
59
+ tool_method.__doc__ = self._build_docstring(tool_function)
60
+ tool_method.__signature__ = self._build_signature(tool_function)
61
+
62
+ # Store parameter schema (important for tools that access it programmatically)
63
+ param_props = tool_function.get("parameters", {}).get("properties", {})
64
+ required_params = tool_function.get("parameters", {}).get("required", [])
65
+ tool_method._parameter_schema = {
66
+ param_name: {
67
+ "description": param_details.get("description", ""),
68
+ "type": param_details.get("type", "any"),
69
+ "required": param_name in required_params,
70
+ }
71
+ for param_name, param_details in param_props.items()
72
+ }
73
+
74
+ # Register with server
75
+ self.server.tool()(tool_method)
76
+ logger.info(f"Registered tool: {tool_name}")
77
+
78
+ def _build_docstring(self, tool_function: dict) -> str:
79
+ """Build a formatted docstring from tool function metadata."""
80
+ description = tool_function.get("description", "")
81
+ param_props = tool_function.get("parameters", {}).get("properties", {})
82
+ required_params = tool_function.get("parameters", {}).get("required", [])
83
+
84
+ # Build docstring (match original format)
85
+ docstring = description
86
+ if param_props:
87
+ docstring += "\n\nParameters:\n"
88
+ for param_name, param_details in param_props.items():
89
+ required_str = (
90
+ "(required)" if param_name in required_params else "(optional)"
91
+ )
92
+ param_type = param_details.get("type", "any")
93
+ param_desc = param_details.get("description", "")
94
+ docstring += (
95
+ f" {param_name} ({param_type}) {required_str}: {param_desc}\n"
96
+ )
97
+
98
+ return docstring
99
+
100
+ def _build_signature(self, tool_function: dict) -> Signature:
101
+ """Build a function signature from tool function metadata."""
102
+ param_props = tool_function.get("parameters", {}).get("properties", {})
103
+ required_params = tool_function.get("parameters", {}).get("required", [])
104
+
105
+ parameters = []
106
+
107
+ # Follow original type mapping
108
+ for param_name, param_details in param_props.items():
109
+ param_type = param_details.get("type", "")
110
+ default = Parameter.empty if param_name in required_params else None
111
+
112
+ # Map JSON Schema types to Python types (same as original)
113
+ annotation = Any
114
+ if param_type == "string":
115
+ annotation = str
116
+ elif param_type == "integer":
117
+ annotation = int
118
+ elif param_type == "number":
119
+ annotation = float
120
+ elif param_type == "boolean":
121
+ annotation = bool
122
+ elif param_type == "object":
123
+ annotation = dict
124
+ elif param_type == "array":
125
+ annotation = list
126
+
127
+ # Create parameter with same structure as original
128
+ param = Parameter(
129
+ name=param_name,
130
+ kind=Parameter.KEYWORD_ONLY,
131
+ default=default,
132
+ annotation=annotation,
133
+ )
134
+ parameters.append(param)
135
+
136
+ return Signature(parameters=parameters)
137
+
138
+ async def cleanup(self) -> None:
139
+ """Clean up server resources."""
140
+ logger.info("Cleaning up resources")
141
+ # Follow original cleanup logic - only clean browser tool
142
+ if "browser" in self.tools and hasattr(self.tools["browser"], "cleanup"):
143
+ await self.tools["browser"].cleanup()
144
+
145
+ def register_all_tools(self) -> None:
146
+ """Register all tools with the server."""
147
+ for tool in self.tools.values():
148
+ self.register_tool(tool)
149
+
150
+ def run(self, transport: str = "stdio") -> None:
151
+ """Run the MCP server."""
152
+ # Register all tools
153
+ self.register_all_tools()
154
+
155
+ # Register cleanup function (match original behavior)
156
+ atexit.register(lambda: asyncio.run(self.cleanup()))
157
+
158
+ # Start server (with same logging as original)
159
+ logger.info(f"Starting OpenManus server ({transport} mode)")
160
+ self.server.run(transport=transport)
161
+
162
+
163
+ def parse_args() -> argparse.Namespace:
164
+ """Parse command line arguments."""
165
+ parser = argparse.ArgumentParser(description="OpenManus MCP Server")
166
+ parser.add_argument(
167
+ "--transport",
168
+ choices=["stdio"],
169
+ default="stdio",
170
+ help="Communication method: stdio or http (default: stdio)",
171
+ )
172
+ return parser.parse_args()
173
+
174
+
175
+ if __name__ == "__main__":
176
+ args = parse_args()
177
+
178
+ # Create and run server (maintaining original flow)
179
+ server = MCPServer()
180
+ server.run(transport=args.transport)
OpenManus/app/prompt/__init__.py ADDED
File without changes
OpenManus/app/prompt/browser.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT = """\
2
+ You are an AI agent designed to automate browser tasks. Your goal is to accomplish the ultimate task following the rules.
3
+
4
+ # Input Format
5
+ Task
6
+ Previous steps
7
+ Current URL
8
+ Open Tabs
9
+ Interactive Elements
10
+ [index]<type>text</type>
11
+ - index: Numeric identifier for interaction
12
+ - type: HTML element type (button, input, etc.)
13
+ - text: Element description
14
+ Example:
15
+ [33]<button>Submit Form</button>
16
+
17
+ - Only elements with numeric indexes in [] are interactive
18
+ - elements without [] provide only context
19
+
20
+ # Response Rules
21
+ 1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
22
+ {{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not",
23
+ "memory": "Description of what has been done and what you need to remember. Be very specific. Count here ALWAYS how many times you have done something and how many remain. E.g. 0 out of 10 websites analyzed. Continue with abc and xyz",
24
+ "next_goal": "What needs to be done with the next immediate action"}},
25
+ "action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}}
26
+
27
+ 2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
28
+ Common action sequences:
29
+ - Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
30
+ - Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
31
+ - Actions are executed in the given order
32
+ - If the page changes after an action, the sequence is interrupted and you get the new state.
33
+ - Only provide the action sequence until an action which changes the page state significantly.
34
+ - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
35
+ - only use multiple actions if it makes sense.
36
+
37
+ 3. ELEMENT INTERACTION:
38
+ - Only use indexes of the interactive elements
39
+ - Elements marked with "[]Non-interactive text" are non-interactive
40
+
41
+ 4. NAVIGATION & ERROR HANDLING:
42
+ - If no suitable elements exist, use other functions to complete the task
43
+ - If stuck, try alternative approaches - like going back to a previous page, new search, new tab etc.
44
+ - Handle popups/cookies by accepting or closing them
45
+ - Use scroll to find elements you are looking for
46
+ - If you want to research something, open a new tab instead of using the current tab
47
+ - If captcha pops up, try to solve it - else try a different approach
48
+ - If the page is not fully loaded, use wait action
49
+
50
+ 5. TASK COMPLETION:
51
+ - Use the done action as the last action as soon as the ultimate task is complete
52
+ - Dont use "done" before you are done with everything the user asked you, except you reach the last step of max_steps.
53
+ - If you reach your last step, use the done action even if the task is not fully finished. Provide all the information you have gathered so far. If the ultimate task is completly finished set success to true. If not everything the user asked for is completed set success in done to false!
54
+ - If you have to do something repeatedly for example the task says for "each", or "for all", or "x times", count always inside "memory" how many times you have done it and how many remain. Don't stop until you have completed like the task asked you. Only call done after the last step.
55
+ - Don't hallucinate actions
56
+ - Make sure you include everything you found out for the ultimate task in the done text parameter. Do not just say you are done, but include the requested information of the task.
57
+
58
+ 6. VISUAL CONTEXT:
59
+ - When an image is provided, use it to understand the page layout
60
+ - Bounding boxes with labels on their top right corner correspond to element indexes
61
+
62
+ 7. Form filling:
63
+ - If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
64
+
65
+ 8. Long tasks:
66
+ - Keep track of the status and subresults in the memory.
67
+
68
+ 9. Extraction:
69
+ - If your task is to find information - call extract_content on the specific pages to get and store the information.
70
+ Your responses must be always JSON with the specified format.
71
+ """
72
+
73
+ NEXT_STEP_PROMPT = """
74
+ What should I do next to achieve my goal?
75
+
76
+ When you see [Current state starts here], focus on the following:
77
+ - Current URL and page title{url_placeholder}
78
+ - Available tabs{tabs_placeholder}
79
+ - Interactive elements and their indices
80
+ - Content above{content_above_placeholder} or below{content_below_placeholder} the viewport (if indicated)
81
+ - Any action results or errors{results_placeholder}
82
+
83
+ For browser interactions:
84
+ - To navigate: browser_use with action="go_to_url", url="..."
85
+ - To click: browser_use with action="click_element", index=N
86
+ - To type: browser_use with action="input_text", index=N, text="..."
87
+ - To extract: browser_use with action="extract_content", goal="..."
88
+ - To scroll: browser_use with action="scroll_down" or "scroll_up"
89
+
90
+ Consider both what's visible and what might be beyond the current viewport.
91
+ Be methodical - remember your progress and what you've learned so far.
92
+
93
+ If you want to stop the interaction at any point, use the `terminate` tool/function call.
94
+ """
OpenManus/app/prompt/cot.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT = """You are an assistant focused on Chain of Thought reasoning. For each question, please follow these steps:
2
+
3
+ 1. Break down the problem: Divide complex problems into smaller, more manageable parts
4
+ 2. Think step by step: Think through each part in detail, showing your reasoning process
5
+ 3. Synthesize conclusions: Integrate the thinking from each part into a complete solution
6
+ 4. Provide an answer: Give a final concise answer
7
+
8
+ Your response should follow this format:
9
+ Thinking: [Detailed thought process, including problem decomposition, reasoning for each step, and analysis]
10
+ Answer: [Final answer based on the thought process, clear and concise]
11
+
12
+ Remember, the thinking process is more important than the final answer, as it demonstrates how you reached your conclusion.
13
+ """
14
+
15
+ NEXT_STEP_PROMPT = "Please continue your thinking based on the conversation above. If you've reached a conclusion, provide your final answer."
OpenManus/app/prompt/manus.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT = (
2
+ "You are OpenManus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, web browsing, or human interaction (only for extreme cases), you can handle it all."
3
+ "The initial directory is: {directory}"
4
+ )
5
+
6
+ NEXT_STEP_PROMPT = """
7
+ Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.
8
+
9
+ If you want to stop the interaction at any point, use the `terminate` tool/function call.
10
+ """
OpenManus/app/prompt/mcp.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prompts for the MCP Agent."""
2
+
3
+ SYSTEM_PROMPT = """You are an AI assistant with access to a Model Context Protocol (MCP) server.
4
+ You can use the tools provided by the MCP server to complete tasks.
5
+ The MCP server will dynamically expose tools that you can use - always check the available tools first.
6
+
7
+ When using an MCP tool:
8
+ 1. Choose the appropriate tool based on your task requirements
9
+ 2. Provide properly formatted arguments as required by the tool
10
+ 3. Observe the results and use them to determine next steps
11
+ 4. Tools may change during operation - new tools might appear or existing ones might disappear
12
+
13
+ Follow these guidelines:
14
+ - Call tools with valid parameters as documented in their schemas
15
+ - Handle errors gracefully by understanding what went wrong and trying again with corrected parameters
16
+ - For multimedia responses (like images), you'll receive a description of the content
17
+ - Complete user requests step by step, using the most appropriate tools
18
+ - If multiple tools need to be called in sequence, make one call at a time and wait for results
19
+
20
+ Remember to clearly explain your reasoning and actions to the user.
21
+ """
22
+
23
+ NEXT_STEP_PROMPT = """Based on the current state and available tools, what should be done next?
24
+ Think step by step about the problem and identify which MCP tool would be most helpful for the current stage.
25
+ If you've already made progress, consider what additional information you need or what actions would move you closer to completing the task.
26
+ """
27
+
28
+ # Additional specialized prompts
29
+ TOOL_ERROR_PROMPT = """You encountered an error with the tool '{tool_name}'.
30
+ Try to understand what went wrong and correct your approach.
31
+ Common issues include:
32
+ - Missing or incorrect parameters
33
+ - Invalid parameter formats
34
+ - Using a tool that's no longer available
35
+ - Attempting an operation that's not supported
36
+
37
+ Please check the tool specifications and try again with corrected parameters.
38
+ """
39
+
40
+ MULTIMEDIA_RESPONSE_PROMPT = """You've received a multimedia response (image, audio, etc.) from the tool '{tool_name}'.
41
+ This content has been processed and described for you.
42
+ Use this information to continue the task or provide insights to the user.
43
+ """