Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +17 -11
- Dockerfile.backend +1 -1
- README.md +163 -7
- docker-compose.yml +2 -1
- lpm_kernel/api/domains/documents/routes.py +5 -6
- lpm_kernel/api/domains/kernel2/routes/role_routes.py +12 -0
- lpm_kernel/api/domains/kernel2/routes_talk.py +2 -2
- lpm_kernel/common/repository/vector_repository.py +1 -2
- lpm_kernel/file_data/chroma_utils.py +1 -3
- lpm_kernel/file_data/chunker.py +4 -1
- lpm_kernel/utils.py +5 -2
- scripts/setup.sh +0 -4
- start.sh +28 -17
.gitattributes
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
# Set all text files to use LF line endings
|
| 2 |
* text=auto eol=lf
|
|
|
|
| 3 |
# Binary files should not be modified
|
| 4 |
-
*.png
|
| 5 |
*.jpg binary
|
| 6 |
*.jpeg binary
|
| 7 |
*.gif binary
|
|
@@ -10,24 +11,29 @@
|
|
| 10 |
*.mp4 binary
|
| 11 |
*.mp3 binary
|
| 12 |
*.pdf binary
|
| 13 |
-
*.zip
|
| 14 |
*.gz binary
|
| 15 |
*.tar binary
|
| 16 |
dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 18 |
dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
|
| 22 |
images/cover.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
|
| 24 |
lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
|
| 25 |
lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 26 |
lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 27 |
lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
images/
|
| 29 |
-
lpm_frontend/
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
| 1 |
# Set all text files to use LF line endings
|
| 2 |
* text=auto eol=lf
|
| 3 |
+
|
| 4 |
# Binary files should not be modified
|
| 5 |
+
*.png binary
|
| 6 |
*.jpg binary
|
| 7 |
*.jpeg binary
|
| 8 |
*.gif binary
|
|
|
|
| 11 |
*.mp4 binary
|
| 12 |
*.mp3 binary
|
| 13 |
*.pdf binary
|
| 14 |
+
*.zip binary
|
| 15 |
*.gz binary
|
| 16 |
*.tar binary
|
| 17 |
dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 18 |
dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
|
| 19 |
dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
| 20 |
images/cover.png filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
images/secondme_cover.png filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
lpm_frontend/package-lock.json filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
lpm_frontend/package.json filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
lpm_frontend/public/fonts/Calistoga.ttf filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
lpm_frontend/public/images/app_api_mcp.png filter=lfs diff=lfs merge=lfs -text
|
| 26 |
lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
|
| 27 |
lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
|
| 28 |
lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
lpm_frontend/public/images/logo.png filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
lpm_frontend/public/images/single_logo.png filter=lfs diff=lfs merge=lfs -text
|
| 31 |
lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
|
| 33 |
lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
lpm_frontend/tsconfig.json filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
lpm_kernel/package-lock.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
resources/L2/data_pipeline/data_prep/subjective/config/config.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
resources/model/processed_data/L1/graphrag_indexing_output/subjective/stats.json filter=lfs diff=lfs merge=lfs -text
|
Dockerfile.backend
CHANGED
|
@@ -71,4 +71,4 @@ ENV PYTHONUNBUFFERED=1 \
|
|
| 71 |
EXPOSE 8002 8080
|
| 72 |
|
| 73 |
# Set the startup command
|
| 74 |
-
CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=
|
|
|
|
| 71 |
EXPOSE 8002 8080
|
| 72 |
|
| 73 |
# Set the startup command
|
| 74 |
+
CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]
|
README.md
CHANGED
|
@@ -1,7 +1,163 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+

|
| 2 |
+
|
| 3 |
+
<div align="center">
|
| 4 |
+
|
| 5 |
+
[](https://home.second.me/)
|
| 6 |
+
[](https://arxiv.org/abs/2406.18312)
|
| 7 |
+
[](https://arxiv.org/abs/2503.08102)
|
| 8 |
+
[](https://discord.gg/GpWHQNUwrg)
|
| 9 |
+
[](https://x.com/SecondMe_AI1)
|
| 10 |
+
[](https://www.reddit.com/r/SecondMeAI/)
|
| 11 |
+
[](https://secondme.gitbook.io/secondme/faq)
|
| 12 |
+
|
| 13 |
+
</div>
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
## Our Vision
|
| 17 |
+
|
| 18 |
+
Companies like OpenAI built "Super AI" that threatens human independence. We crave individuality: AI that amplifies, not erases, **YOU**.
|
| 19 |
+
|
| 20 |
+
We’re challenging that with "**Second Me**": an open-source prototype where you craft your own **AI self**—a new AI species that preserves you, delivers your context, and defends your interests.
|
| 21 |
+
|
| 22 |
+
It’s **locally trained and hosted**—your data, your control—yet **globally connected**, scaling your intelligence across an AI network. Beyond that, it’s your AI identity interface—a bold standard linking your AI to the world, sparks collaboration among AI selves, and builds tomorrow’s truly native AI apps.
|
| 23 |
+
|
| 24 |
+
Tech enthusiasts, AI pros, domain experts, Join us! Second Me is your launchpad to extend your mind into the digital horizon.
|
| 25 |
+
|
| 26 |
+
## Key Features
|
| 27 |
+
|
| 28 |
+
### **Train Your AI Self** with AI-Native Memory ([Paper](https://arxiv.org/abs/2503.08102))
|
| 29 |
+
Start training your Second Me today with your own memories! Using Hierarchical Memory Modeling (HMM) and the Me-Alignment Algorithm, your AI self captures your identity, understands your context, and reflects you authentically.
|
| 30 |
+
|
| 31 |
+
<p align="center">
|
| 32 |
+
<img src="https://github.com/user-attachments/assets/a84c6135-26dc-4413-82aa-f4a373c0ff89" width="94%" />
|
| 33 |
+
</p>
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
### **Scale Your Intelligence** on the Second Me Network
|
| 37 |
+
Launch your AI self from your laptop onto our decentralized network—anyone or any app can connect with your permission, sharing your context as your digital identity.
|
| 38 |
+
|
| 39 |
+
<p align="center">
|
| 40 |
+
<img src="https://github.com/user-attachments/assets/9a74a3f4-d8fd-41c1-8f24-534ed94c842a" width="94%" />
|
| 41 |
+
</p>
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
### Build Tomorrow’s Apps with Second Me
|
| 45 |
+
**Roleplay**: Your AI self switches personas to represent you in different scenarios.
|
| 46 |
+
**AI Space**: Collaborate with other Second Mes to spark ideas or solve problems.
|
| 47 |
+
|
| 48 |
+
<p align="center">
|
| 49 |
+
<img src="https://github.com/user-attachments/assets/bc6125c1-c84f-4ecc-b620-8932cc408094" width="94%" />
|
| 50 |
+
</p>
|
| 51 |
+
|
| 52 |
+
### 100% **Privacy and Control**
|
| 53 |
+
Unlike traditional centralized AI systems, Second Me ensures that your information and intelligence remain local and completely private.
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
## Getting started & staying tuned with us
|
| 58 |
+
Star and join us, and you will receive all release notifications from GitHub without any delay!
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
<p align="center">
|
| 62 |
+
<img src="https://github.com/user-attachments/assets/5c14d956-f931-4c25-b0b3-3c2c96cd7581" width="94%" />
|
| 63 |
+
</p>
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
## Quick Start
|
| 67 |
+
|
| 68 |
+
### 📊 Model Size vs. Memory (Reference Guide)
|
| 69 |
+
|
| 70 |
+
*Note: "B" in the table represents "billion parameters model". Data shown are examples only; actual supported model sizes may vary depending on system optimization, deployment environment, and other hardware/software conditions.*
|
| 71 |
+
|
| 72 |
+
| Memory (GB) | Docker Deployment (Windows/Linux) | Docker Deployment (Mac) | Integrated Setup (Windows/Linux) | Integrated Setup (Mac) |
|
| 73 |
+
|--------------|-----------------------------|-------------------|--------------------------|----------------|
|
| 74 |
+
| 8 | ~0.8B (example) | ~0.4B (example) | ~1.0B (example) | ~0.6B (example) |
|
| 75 |
+
| 16 | 1.5B (example) | 0.5B (example) | ~2.0B (example) | ~0.8B (example) |
|
| 76 |
+
| 32 | ~2.8B (example) | ~1.2B (example) | ~3.5B (example) | ~1.5B (example) |
|
| 77 |
+
|
| 78 |
+
> **Note**: Models below 0.5B may not provide satisfactory performance for complex tasks. And we're continuously improving cross-platform support - please [submit an issue](https://github.com/mindverse/Second-Me/issues/new) for feedback or compatibility problems on different operating systems.
|
| 79 |
+
|
| 80 |
+
> **MLX Acceleration**: Mac M-series users can use [MLX](https://github.com/mindverse/Second-Me/tree/master/lpm_kernel/L2/mlx_training) to run larger models (CLI-only).
|
| 81 |
+
|
| 82 |
+
### ⚡ Get your Second Me running in just 3 steps:
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
# 1. Clone the repository
|
| 86 |
+
git clone https://github.com/mindverse/Second-Me.git
|
| 87 |
+
cd Second-Me
|
| 88 |
+
# 2. Start Docker containers
|
| 89 |
+
make docker-up
|
| 90 |
+
# 3. Access the web interface
|
| 91 |
+
# Open your browser and visit: http://localhost:3000
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
👉 For detailed instructions — including integrated (non-Docker) setup, model selection, memory requirements, and platform-specific tips,
|
| 95 |
+
check the full [Deployment Guide on GitBook](https://secondme.gitbook.io/secondme/guides/deployment).
|
| 96 |
+
|
| 97 |
+
❓ Got questions about setup, models, or any troubleshooting? [Check our FAQ](https://secondme.gitbook.io/secondme/faq).
|
| 98 |
+
|
| 99 |
+
## Tutorial and Use Cases
|
| 100 |
+
🛠️ Feel free to follow [User tutorial](https://secondme.gitbook.io/secondme/getting-started) to build your Second Me.
|
| 101 |
+
|
| 102 |
+
💡 Check out the links below to see how Second Me can be used in real-life scenarios:
|
| 103 |
+
- [Felix AMA (Roleplay app)](https://app.secondme.io/example/ama)
|
| 104 |
+
- [Brainstorming a 15-Day European City Itinerary (Network app)](https://app.secondme.io/example/brainstorming)
|
| 105 |
+
- [Icebreaking as a Speed Dating Match (Network app)](https://app.secondme.io/example/Icebreaker)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
## What's Next: May 2025
|
| 109 |
+
|
| 110 |
+
Second Me continues to evolve as the open-source identity infrastructure for AI. Here's what's on deck for May:
|
| 111 |
+
|
| 112 |
+
- 🗂️ **Version Control**: Smarter versioning of memory and identity states
|
| 113 |
+
- 🧠 **Continuous Training Pipelines**: Keep your AI self evolving over time, with ongoing updates based on new memory inputs.
|
| 114 |
+
- ⚙️ **Performance & Stability Improvements**: Enhancements across inference ability, model alignment, and base model upgrades
|
| 115 |
+
- ☁️ **Cloud Solutions**: Explore cloud-based solutions for both model training (fine-tuning) and model deployment, to reduce the hardware burden on users' local machines.
|
| 116 |
+
|
| 117 |
+
## Contributing
|
| 118 |
+
|
| 119 |
+
We’d love for you to help shape what’s coming next — whether it’s fixing bugs, building new features, or improving docs.
|
| 120 |
+
|
| 121 |
+
- 📘 Check out our [Contribution Guide](./CONTRIBUTING.md) to get started
|
| 122 |
+
- 💻 Submit ideas, issues, or PRs on [GitHub](https://github.com/mindverse/Second-Me)
|
| 123 |
+
- 💬 Join the conversation and stay updated in our [Discord](https://discord.gg/GpWHQNUwrg) — it’s where the community lives
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
## Contributors
|
| 127 |
+
|
| 128 |
+
We would like to express our gratitude to all the individuals who have contributed to Second Me! If you're interested in contributing to the future of intelligence uploading, whether through code, documentation, or ideas, please feel free to submit a pull request to our repository: [Second-Me](https://github.com/Mindverse/Second-Me).
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
<a href="https://github.com/mindverse/Second-Me/graphs/contributors">
|
| 132 |
+
<img src="https://contrib.rocks/image?repo=mindverse/Second-Me" />
|
| 133 |
+
</a>
|
| 134 |
+
|
| 135 |
+
Made with [contrib.rocks](https://contrib.rocks).
|
| 136 |
+
|
| 137 |
+
## Acknowledgements
|
| 138 |
+
|
| 139 |
+
This work leverages the power of the open-source community.
|
| 140 |
+
|
| 141 |
+
For data synthesis, we utilized [GraphRAG](https://github.com/microsoft/graphrag) from Microsoft.
|
| 142 |
+
|
| 143 |
+
For model deployment, we utilized [llama.cpp](https://github.com/ggml-org/llama.cpp), which provides efficient inference capabilities.
|
| 144 |
+
|
| 145 |
+
Our base models primarily come from the [Qwen2.5](https://huggingface.co/Qwen) series.
|
| 146 |
+
|
| 147 |
+
We also want to extend our sincere gratitude to all users who have experienced Second Me. We recognize that there is significant room for optimization throughout the entire pipeline, and we are fully committed to iterative improvements to ensure everyone can enjoy the best possible experience locally.
|
| 148 |
+
|
| 149 |
+
## License
|
| 150 |
+
|
| 151 |
+
Second Me is open source software licensed under the Apache License 2.0. See the [LICENSE](LICENSE) file for more details.
|
| 152 |
+
|
| 153 |
+
[license]: ./LICENSE
|
| 154 |
+
|
| 155 |
+
## Star History
|
| 156 |
+
|
| 157 |
+
<a href="https://www.star-history.com/#mindverse/Second-Me&Date">
|
| 158 |
+
<picture>
|
| 159 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date&theme=dark" />
|
| 160 |
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
|
| 161 |
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
|
| 162 |
+
</picture>
|
| 163 |
+
</a>
|
docker-compose.yml
CHANGED
|
@@ -6,7 +6,8 @@ services:
|
|
| 6 |
container_name: second-me-backend
|
| 7 |
restart: unless-stopped
|
| 8 |
ports:
|
| 9 |
-
- "
|
|
|
|
| 10 |
volumes:
|
| 11 |
- ./data:/app/data
|
| 12 |
- ./logs:/app/logs
|
|
|
|
| 6 |
container_name: second-me-backend
|
| 7 |
restart: unless-stopped
|
| 8 |
ports:
|
| 9 |
+
- "8002:8002"
|
| 10 |
+
- "8080:8080"
|
| 11 |
volumes:
|
| 12 |
- ./data:/app/data
|
| 13 |
- ./logs:/app/logs
|
lpm_kernel/api/domains/documents/routes.py
CHANGED
|
@@ -46,11 +46,10 @@ def list_documents():
|
|
| 46 |
def scan_documents():
|
| 47 |
"""Scan documents from configured directory and store them in database"""
|
| 48 |
try:
|
| 49 |
-
# 2. Get
|
|
|
|
| 50 |
config = Config.from_env()
|
| 51 |
-
|
| 52 |
-
project_root = Path(__file__).parent.parent.parent.parent.parent
|
| 53 |
-
full_path = project_root / relative_path
|
| 54 |
|
| 55 |
# 3. Scan and process files
|
| 56 |
processed_doc_dtos = document_service.scan_directory(
|
|
@@ -146,8 +145,8 @@ def process_all_chunks():
|
|
| 146 |
try:
|
| 147 |
config = Config.from_env()
|
| 148 |
chunker = DocumentChunker(
|
| 149 |
-
chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE")),
|
| 150 |
-
overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP")),
|
| 151 |
)
|
| 152 |
|
| 153 |
documents = document_service.list_documents()
|
|
|
|
| 46 |
def scan_documents():
|
| 47 |
"""Scan documents from configured directory and store them in database"""
|
| 48 |
try:
|
| 49 |
+
# 2. Get the full path from configuration
|
| 50 |
+
# The Config class already resolves relative paths using BASE_DIR
|
| 51 |
config = Config.from_env()
|
| 52 |
+
full_path = Path(config.get("USER_RAW_CONTENT_DIR"))
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# 3. Scan and process files
|
| 55 |
processed_doc_dtos = document_service.scan_directory(
|
|
|
|
| 145 |
try:
|
| 146 |
config = Config.from_env()
|
| 147 |
chunker = DocumentChunker(
|
| 148 |
+
chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE", 4000)),
|
| 149 |
+
overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP", 200)),
|
| 150 |
)
|
| 151 |
|
| 152 |
documents = document_service.list_documents()
|
lpm_kernel/api/domains/kernel2/routes/role_routes.py
CHANGED
|
@@ -20,6 +20,15 @@ def create_role():
|
|
| 20 |
"""create new Role"""
|
| 21 |
try:
|
| 22 |
data = request.get_json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
create_request = CreateRoleRequest.from_dict(data)
|
| 24 |
role = role_service.create_role(create_request)
|
| 25 |
|
|
@@ -28,6 +37,9 @@ def create_role():
|
|
| 28 |
else:
|
| 29 |
return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
except Exception as e:
|
| 32 |
logger.error(f"Error creating Role: {str(e)}")
|
| 33 |
return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500
|
|
|
|
| 20 |
"""create new Role"""
|
| 21 |
try:
|
| 22 |
data = request.get_json()
|
| 23 |
+
if not data:
|
| 24 |
+
return jsonify(APIResponse.error("Missing request body")), 400
|
| 25 |
+
|
| 26 |
+
# Validate mandatory fields
|
| 27 |
+
required_fields = ["name", "description", "system_prompt"]
|
| 28 |
+
missing_fields = [f for f in required_fields if not data.get(f)]
|
| 29 |
+
if missing_fields:
|
| 30 |
+
return jsonify(APIResponse.error(f"Missing mandatory fields: {', '.join(missing_fields)}")), 400
|
| 31 |
+
|
| 32 |
create_request = CreateRoleRequest.from_dict(data)
|
| 33 |
role = role_service.create_role(create_request)
|
| 34 |
|
|
|
|
| 37 |
else:
|
| 38 |
return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
|
| 39 |
|
| 40 |
+
except (KeyError, TypeError) as e:
|
| 41 |
+
logger.error(f"Validation error creating Role: {str(e)}")
|
| 42 |
+
return jsonify(APIResponse.error(f"Invalid request data: {str(e)}")), 400
|
| 43 |
except Exception as e:
|
| 44 |
logger.error(f"Error creating Role: {str(e)}")
|
| 45 |
return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500
|
lpm_kernel/api/domains/kernel2/routes_talk.py
CHANGED
|
@@ -31,9 +31,9 @@ from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanc
|
|
| 31 |
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
-
talk_bp = Blueprint(
|
| 35 |
|
| 36 |
-
@talk_bp.route("/
|
| 37 |
@validate()
|
| 38 |
def chat(body: ChatRequest):
|
| 39 |
"""
|
|
|
|
| 31 |
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
+
talk_bp = Blueprint('talk', __name__, url_prefix='/api/talk')
|
| 35 |
|
| 36 |
+
@talk_bp.route("/chat", methods=["POST"])
|
| 37 |
@validate()
|
| 38 |
def chat(body: ChatRequest):
|
| 39 |
"""
|
lpm_kernel/common/repository/vector_repository.py
CHANGED
|
@@ -26,8 +26,7 @@ class BaseVectorRepository(ABC):
|
|
| 26 |
|
| 27 |
class ChromaRepository(BaseVectorRepository):
|
| 28 |
def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
|
| 29 |
-
|
| 30 |
-
self.client = chromadb.PersistentClient(path=persist_directory, settings=settings)
|
| 31 |
|
| 32 |
# Check if collection exists, create it if it doesn't
|
| 33 |
try:
|
|
|
|
| 26 |
|
| 27 |
class ChromaRepository(BaseVectorRepository):
|
| 28 |
def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
|
| 29 |
+
self.client = chromadb.PersistentClient(path=persist_directory)
|
|
|
|
| 30 |
|
| 31 |
# Check if collection exists, create it if it doesn't
|
| 32 |
try:
|
lpm_kernel/file_data/chroma_utils.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from typing import Optional, Dict, Any, List, Tuple
|
| 2 |
import os
|
| 3 |
import chromadb
|
| 4 |
-
from chromadb.config import Settings
|
| 5 |
import logging
|
| 6 |
from lpm_kernel.configs.logging import get_train_process_logger
|
| 7 |
|
|
@@ -74,8 +73,7 @@ def reinitialize_chroma_collections(dimension: int = 1536) -> bool:
|
|
| 74 |
"""
|
| 75 |
try:
|
| 76 |
chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
|
| 77 |
-
|
| 78 |
-
client = chromadb.PersistentClient(path=chroma_path, settings=settings)
|
| 79 |
|
| 80 |
# Delete and recreate document collection
|
| 81 |
try:
|
|
|
|
| 1 |
from typing import Optional, Dict, Any, List, Tuple
|
| 2 |
import os
|
| 3 |
import chromadb
|
|
|
|
| 4 |
import logging
|
| 5 |
from lpm_kernel.configs.logging import get_train_process_logger
|
| 6 |
|
|
|
|
| 73 |
"""
|
| 74 |
try:
|
| 75 |
chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
|
| 76 |
+
client = chromadb.PersistentClient(path=chroma_path)
|
|
|
|
| 77 |
|
| 78 |
# Delete and recreate document collection
|
| 79 |
try:
|
lpm_kernel/file_data/chunker.py
CHANGED
|
@@ -2,7 +2,10 @@ from typing import List
|
|
| 2 |
from lpm_kernel.L1.bio import Chunk
|
| 3 |
import traceback
|
| 4 |
import time
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
from lpm_kernel.configs.logging import get_train_process_logger
|
| 8 |
logger = get_train_process_logger()
|
|
|
|
| 2 |
from lpm_kernel.L1.bio import Chunk
|
| 3 |
import traceback
|
| 4 |
import time
|
| 5 |
+
try:
|
| 6 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 7 |
+
except ImportError:
|
| 8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 9 |
|
| 10 |
from lpm_kernel.configs.logging import get_train_process_logger
|
| 11 |
logger = get_train_process_logger()
|
lpm_kernel/utils.py
CHANGED
|
@@ -3,7 +3,10 @@ from enum import Enum
|
|
| 3 |
import tiktoken
|
| 4 |
import re
|
| 5 |
from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
import random
|
| 8 |
import string
|
| 9 |
from itertools import chain
|
|
@@ -165,7 +168,7 @@ class TokenTextSplitter(TextSplitter):
|
|
| 165 |
|
| 166 |
def _cut_meaningless_head_tail(self, text: str) -> str:
|
| 167 |
# Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
|
| 168 |
-
sentences = re.split(
|
| 169 |
if len(sentences) < 2:
|
| 170 |
return text
|
| 171 |
head = sentences[0]
|
|
|
|
| 3 |
import tiktoken
|
| 4 |
import re
|
| 5 |
from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
|
| 6 |
+
try:
|
| 7 |
+
from langchain_text_splitters import TextSplitter
|
| 8 |
+
except ImportError:
|
| 9 |
+
from langchain.text_splitter import TextSplitter
|
| 10 |
import random
|
| 11 |
import string
|
| 12 |
from itertools import chain
|
|
|
|
| 168 |
|
| 169 |
def _cut_meaningless_head_tail(self, text: str) -> str:
|
| 170 |
# Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
|
| 171 |
+
sentences = re.split("\. |! |\? |。|!|?|\n+ *\n+", text)
|
| 172 |
if len(sentences) < 2:
|
| 173 |
return text
|
| 174 |
head = sentences[0]
|
scripts/setup.sh
CHANGED
|
@@ -607,10 +607,6 @@ parse_args() {
|
|
| 607 |
|
| 608 |
# Main function
|
| 609 |
main() {
|
| 610 |
-
# Create necessary directories with write permissions
|
| 611 |
-
mkdir -p "./logs"
|
| 612 |
-
mkdir -p "./.cache/huggingface/hub"
|
| 613 |
-
|
| 614 |
# Display welcome message
|
| 615 |
display_header "Second-Me Complete Installation"
|
| 616 |
|
|
|
|
| 607 |
|
| 608 |
# Main function
|
| 609 |
main() {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
# Display welcome message
|
| 611 |
display_header "Second-Me Complete Installation"
|
| 612 |
|
start.sh
CHANGED
|
@@ -1,32 +1,43 @@
|
|
| 1 |
#!/bin/bash
|
|
|
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
echo "--- Checking SQLite database... ---"
|
| 4 |
-
if [ ! -s /
|
| 5 |
echo "SQLite database not found or empty, initializing..."
|
| 6 |
-
mkdir -p /
|
| 7 |
-
sqlite3 /
|
| 8 |
echo "SQLite database initialized successfully"
|
| 9 |
-
echo "Tables created:"
|
| 10 |
-
sqlite3 /app/data/sqlite/lpm.db ".tables"
|
| 11 |
else
|
| 12 |
-
echo "SQLite database already exists
|
| 13 |
fi
|
| 14 |
|
|
|
|
| 15 |
echo "--- Checking ChromaDB... ---"
|
| 16 |
-
if [ ! -d /
|
| 17 |
echo "ChromaDB collections not found, initializing..."
|
| 18 |
-
python /
|
| 19 |
echo "ChromaDB initialized successfully"
|
| 20 |
else
|
| 21 |
-
echo "ChromaDB already exists
|
| 22 |
fi
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
-
echo "
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
|
| 4 |
+
echo "--- Starting application... ---"
|
| 5 |
+
|
| 6 |
+
# Use relative paths for scripts
|
| 7 |
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 8 |
+
|
| 9 |
+
# Ensure we are in the app directory
|
| 10 |
+
cd "$SCRIPT_DIR"
|
| 11 |
+
|
| 12 |
+
# Initialize database if needed
|
| 13 |
echo "--- Checking SQLite database... ---"
|
| 14 |
+
if [ ! -s ./data/sqlite/lpm.db ]; then
|
| 15 |
echo "SQLite database not found or empty, initializing..."
|
| 16 |
+
mkdir -p ./data/sqlite
|
| 17 |
+
sqlite3 ./data/sqlite/lpm.db < ./docker/sqlite/init.sql
|
| 18 |
echo "SQLite database initialized successfully"
|
|
|
|
|
|
|
| 19 |
else
|
| 20 |
+
echo "SQLite database already exists"
|
| 21 |
fi
|
| 22 |
|
| 23 |
+
# Initialize ChromaDB if needed
|
| 24 |
echo "--- Checking ChromaDB... ---"
|
| 25 |
+
if [ ! -d ./data/chroma_db/documents ] || [ ! -d ./data/chroma_db/document_chunks ]; then
|
| 26 |
echo "ChromaDB collections not found, initializing..."
|
| 27 |
+
python ./docker/app/init_chroma.py
|
| 28 |
echo "ChromaDB initialized successfully"
|
| 29 |
else
|
| 30 |
+
echo "ChromaDB already exists"
|
| 31 |
fi
|
| 32 |
|
| 33 |
+
# Try to run setup if it exists and hasn't been run
|
| 34 |
+
if [ -f "./scripts/setup.sh" ]; then
|
| 35 |
+
echo "Checking if setup is needed..."
|
| 36 |
+
# We skip full setup in container but could do minor checks
|
| 37 |
+
fi
|
| 38 |
|
| 39 |
+
echo "Starting Flask application..."
|
| 40 |
+
export FLASK_APP=lpm_kernel.app
|
| 41 |
+
# Use port 7860 for Hugging Face Spaces by default if LOCAL_APP_PORT is not set
|
| 42 |
+
PORT=${LOCAL_APP_PORT:-7860}
|
| 43 |
+
python -m flask run --host=0.0.0.0 --port=$PORT
|