harvesthealth commited on
Commit
5830379
·
verified ·
1 Parent(s): e0a3d91

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,7 +1,8 @@
1
  # Set all text files to use LF line endings
2
  * text=auto eol=lf
 
3
  # Binary files should not be modified
4
- *.png filter=lfs diff=lfs merge=lfs -text
5
  *.jpg binary
6
  *.jpeg binary
7
  *.gif binary
@@ -10,24 +11,29 @@
10
  *.mp4 binary
11
  *.mp3 binary
12
  *.pdf binary
13
- *.zip filter=lfs diff=lfs merge=lfs -text
14
  *.gz binary
15
  *.tar binary
16
  dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
17
  dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
18
  dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
19
- lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
20
- lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
21
- lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
22
  images/cover.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
23
  lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
24
  lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
25
  lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
 
 
26
  lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
 
27
  lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
28
- images/secondme_cover.png filter=lfs diff=lfs merge=lfs -text
29
- lpm_frontend/public/fonts/Calistoga.ttf filter=lfs diff=lfs merge=lfs -text
30
- lpm_frontend/public/images/app_api_mcp.png filter=lfs diff=lfs merge=lfs -text
31
- *.json filter=lfs diff=lfs merge=lfs -text
32
- *.tar.gz filter=lfs diff=lfs merge=lfs -text
33
- *.ttf filter=lfs diff=lfs merge=lfs -text
 
1
  # Set all text files to use LF line endings
2
  * text=auto eol=lf
3
+
4
  # Binary files should not be modified
5
+ *.png binary
6
  *.jpg binary
7
  *.jpeg binary
8
  *.gif binary
 
11
  *.mp4 binary
12
  *.mp3 binary
13
  *.pdf binary
14
+ *.zip binary
15
  *.gz binary
16
  *.tar binary
17
  dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
18
  dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
19
  dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
 
 
 
20
  images/cover.png filter=lfs diff=lfs merge=lfs -text
21
+ images/secondme_cover.png filter=lfs diff=lfs merge=lfs -text
22
+ lpm_frontend/package-lock.json filter=lfs diff=lfs merge=lfs -text
23
+ lpm_frontend/package.json filter=lfs diff=lfs merge=lfs -text
24
+ lpm_frontend/public/fonts/Calistoga.ttf filter=lfs diff=lfs merge=lfs -text
25
+ lpm_frontend/public/images/app_api_mcp.png filter=lfs diff=lfs merge=lfs -text
26
  lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
27
  lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
28
  lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
29
+ lpm_frontend/public/images/logo.png filter=lfs diff=lfs merge=lfs -text
30
+ lpm_frontend/public/images/single_logo.png filter=lfs diff=lfs merge=lfs -text
31
  lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
32
+ lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
33
  lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
34
+ lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
35
+ lpm_frontend/tsconfig.json filter=lfs diff=lfs merge=lfs -text
36
+ lpm_kernel/package-lock.json filter=lfs diff=lfs merge=lfs -text
37
+ lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ resources/L2/data_pipeline/data_prep/subjective/config/config.json filter=lfs diff=lfs merge=lfs -text
39
+ resources/model/processed_data/L1/graphrag_indexing_output/subjective/stats.json filter=lfs diff=lfs merge=lfs -text
Dockerfile.backend CHANGED
@@ -71,4 +71,4 @@ ENV PYTHONUNBUFFERED=1 \
71
  EXPOSE 8002 8080
72
 
73
  # Set the startup command
74
- CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=7860 >> /app/logs/backend.log 2>&1"]
 
71
  EXPOSE 8002 8080
72
 
73
  # Set the startup command
74
+ CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]
README.md CHANGED
@@ -1,7 +1,163 @@
1
- ---
2
- title: Second Me
3
- emoji: 🚀
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: docker
7
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ![Second Me](https://github.com/mindverse/Second-Me/blob/master/images/cover.png)
2
+
3
+ <div align="center">
4
+
5
+ [![Homepage](https://img.shields.io/badge/Second_Me-Homepage-blue?style=flat-square&logo=homebridge)](https://home.second.me/)
6
+ [![AI-native Memory](https://img.shields.io/badge/AI--native_Memory-arXiv-orange?style=flat-square&logo=academia)](https://arxiv.org/abs/2406.18312)
7
+ [![AI-native Memory 2.0](https://img.shields.io/badge/AI--native_Memory_2.0-arXiv-red?style=flat-square&logo=arxiv)](https://arxiv.org/abs/2503.08102)
8
+ [![Discord](https://img.shields.io/badge/Chat-Discord-5865F2?style=flat-square&logo=discord&logoColor=white)](https://discord.gg/GpWHQNUwrg)
9
+ [![Twitter](https://img.shields.io/badge/Follow-@SecondMe_AI-1DA1F2?style=flat-square&logo=x&logoColor=white)](https://x.com/SecondMe_AI1)
10
+ [![Reddit](https://img.shields.io/badge/Join-Reddit-FF4500?style=flat-square&logo=reddit&logoColor=white)](https://www.reddit.com/r/SecondMeAI/)
11
+ [![View FAQ](https://img.shields.io/badge/FAQ-GitBook-blue?style=flat-square)](https://secondme.gitbook.io/secondme/faq)
12
+
13
+ </div>
14
+
15
+
16
+ ## Our Vision
17
+
18
+ Companies like OpenAI built "Super AI" that threatens human independence. We crave individuality: AI that amplifies, not erases, **YOU**.
19
+
20
+ We’re challenging that with "**Second Me**": an open-source prototype where you craft your own **AI self**—a new AI species that preserves you, delivers your context, and defends your interests.
21
+
22
+ It’s **locally trained and hosted**—your data, your control—yet **globally connected**, scaling your intelligence across an AI network. Beyond that, it’s your AI identity interface—a bold standard linking your AI to the world, sparks collaboration among AI selves, and builds tomorrow’s truly native AI apps.
23
+
24
+ Tech enthusiasts, AI pros, domain experts, Join us! Second Me is your launchpad to extend your mind into the digital horizon.
25
+
26
+ ## Key Features
27
+
28
+ ### **Train Your AI Self** with AI-Native Memory ([Paper](https://arxiv.org/abs/2503.08102))
29
+ Start training your Second Me today with your own memories! Using Hierarchical Memory Modeling (HMM) and the Me-Alignment Algorithm, your AI self captures your identity, understands your context, and reflects you authentically.
30
+
31
+ <p align="center">
32
+ <img src="https://github.com/user-attachments/assets/a84c6135-26dc-4413-82aa-f4a373c0ff89" width="94%" />
33
+ </p>
34
+
35
+
36
+ ### **Scale Your Intelligence** on the Second Me Network
37
+ Launch your AI self from your laptop onto our decentralized network—anyone or any app can connect with your permission, sharing your context as your digital identity.
38
+
39
+ <p align="center">
40
+ <img src="https://github.com/user-attachments/assets/9a74a3f4-d8fd-41c1-8f24-534ed94c842a" width="94%" />
41
+ </p>
42
+
43
+
44
+ ### Build Tomorrow’s Apps with Second Me
45
+ **Roleplay**: Your AI self switches personas to represent you in different scenarios.
46
+ **AI Space**: Collaborate with other Second Mes to spark ideas or solve problems.
47
+
48
+ <p align="center">
49
+ <img src="https://github.com/user-attachments/assets/bc6125c1-c84f-4ecc-b620-8932cc408094" width="94%" />
50
+ </p>
51
+
52
+ ### 100% **Privacy and Control**
53
+ Unlike traditional centralized AI systems, Second Me ensures that your information and intelligence remain local and completely private.
54
+
55
+
56
+
57
+ ## Getting started & staying tuned with us
58
+ Star and join us, and you will receive all release notifications from GitHub without any delay!
59
+
60
+
61
+ <p align="center">
62
+ <img src="https://github.com/user-attachments/assets/5c14d956-f931-4c25-b0b3-3c2c96cd7581" width="94%" />
63
+ </p>
64
+
65
+
66
+ ## Quick Start
67
+
68
+ ### 📊 Model Size vs. Memory (Reference Guide)
69
+
70
+ *Note: "B" in the table represents "billion parameters model". Data shown are examples only; actual supported model sizes may vary depending on system optimization, deployment environment, and other hardware/software conditions.*
71
+
72
+ | Memory (GB) | Docker Deployment (Windows/Linux) | Docker Deployment (Mac) | Integrated Setup (Windows/Linux) | Integrated Setup (Mac) |
73
+ |--------------|-----------------------------|-------------------|--------------------------|----------------|
74
+ | 8 | ~0.8B (example) | ~0.4B (example) | ~1.0B (example) | ~0.6B (example) |
75
+ | 16 | 1.5B (example) | 0.5B (example) | ~2.0B (example) | ~0.8B (example) |
76
+ | 32 | ~2.8B (example) | ~1.2B (example) | ~3.5B (example) | ~1.5B (example) |
77
+
78
+ > **Note**: Models below 0.5B may not provide satisfactory performance for complex tasks. And we're continuously improving cross-platform support - please [submit an issue](https://github.com/mindverse/Second-Me/issues/new) for feedback or compatibility problems on different operating systems.
79
+
80
+ > **MLX Acceleration**: Mac M-series users can use [MLX](https://github.com/mindverse/Second-Me/tree/master/lpm_kernel/L2/mlx_training) to run larger models (CLI-only).
81
+
82
+ ### ⚡ Get your Second Me running in just 3 steps:
83
+
84
+ ```bash
85
+ # 1. Clone the repository
86
+ git clone https://github.com/mindverse/Second-Me.git
87
+ cd Second-Me
88
+ # 2. Start Docker containers
89
+ make docker-up
90
+ # 3. Access the web interface
91
+ # Open your browser and visit: http://localhost:3000
92
+ ```
93
+
94
+ 👉 For detailed instructions — including integrated (non-Docker) setup, model selection, memory requirements, and platform-specific tips,
95
+ check the full [Deployment Guide on GitBook](https://secondme.gitbook.io/secondme/guides/deployment).
96
+
97
+ ❓ Got questions about setup, models, or any troubleshooting? [Check our FAQ](https://secondme.gitbook.io/secondme/faq).
98
+
99
+ ## Tutorial and Use Cases
100
+ 🛠️ Feel free to follow [User tutorial](https://secondme.gitbook.io/secondme/getting-started) to build your Second Me.
101
+
102
+ 💡 Check out the links below to see how Second Me can be used in real-life scenarios:
103
+ - [Felix AMA (Roleplay app)](https://app.secondme.io/example/ama)
104
+ - [Brainstorming a 15-Day European City Itinerary (Network app)](https://app.secondme.io/example/brainstorming)
105
+ - [Icebreaking as a Speed Dating Match (Network app)](https://app.secondme.io/example/Icebreaker)
106
+
107
+
108
+ ## What's Next: May 2025
109
+
110
+ Second Me continues to evolve as the open-source identity infrastructure for AI. Here's what's on deck for May:
111
+
112
+ - 🗂️ **Version Control**: Smarter versioning of memory and identity states
113
+ - 🧠 **Continuous Training Pipelines**: Keep your AI self evolving over time, with ongoing updates based on new memory inputs.
114
+ - ⚙️ **Performance & Stability Improvements**: Enhancements across inference ability, model alignment, and base model upgrades
115
+ - ☁️ **Cloud Solutions**: Explore cloud-based solutions for both model training (fine-tuning) and model deployment, to reduce the hardware burden on users' local machines.
116
+
117
+ ## Contributing
118
+
119
+ We’d love for you to help shape what’s coming next — whether it’s fixing bugs, building new features, or improving docs.
120
+
121
+ - 📘 Check out our [Contribution Guide](./CONTRIBUTING.md) to get started
122
+ - 💻 Submit ideas, issues, or PRs on [GitHub](https://github.com/mindverse/Second-Me)
123
+ - 💬 Join the conversation and stay updated in our [Discord](https://discord.gg/GpWHQNUwrg) — it’s where the community lives
124
+
125
+
126
+ ## Contributors
127
+
128
+ We would like to express our gratitude to all the individuals who have contributed to Second Me! If you're interested in contributing to the future of intelligence uploading, whether through code, documentation, or ideas, please feel free to submit a pull request to our repository: [Second-Me](https://github.com/Mindverse/Second-Me).
129
+
130
+
131
+ <a href="https://github.com/mindverse/Second-Me/graphs/contributors">
132
+ <img src="https://contrib.rocks/image?repo=mindverse/Second-Me" />
133
+ </a>
134
+
135
+ Made with [contrib.rocks](https://contrib.rocks).
136
+
137
+ ## Acknowledgements
138
+
139
+ This work leverages the power of the open-source community.
140
+
141
+ For data synthesis, we utilized [GraphRAG](https://github.com/microsoft/graphrag) from Microsoft.
142
+
143
+ For model deployment, we utilized [llama.cpp](https://github.com/ggml-org/llama.cpp), which provides efficient inference capabilities.
144
+
145
+ Our base models primarily come from the [Qwen2.5](https://huggingface.co/Qwen) series.
146
+
147
+ We also want to extend our sincere gratitude to all users who have experienced Second Me. We recognize that there is significant room for optimization throughout the entire pipeline, and we are fully committed to iterative improvements to ensure everyone can enjoy the best possible experience locally.
148
+
149
+ ## License
150
+
151
+ Second Me is open source software licensed under the Apache License 2.0. See the [LICENSE](LICENSE) file for more details.
152
+
153
+ [license]: ./LICENSE
154
+
155
+ ## Star History
156
+
157
+ <a href="https://www.star-history.com/#mindverse/Second-Me&Date">
158
+ <picture>
159
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date&theme=dark" />
160
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
161
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
162
+ </picture>
163
+ </a>
docker-compose.yml CHANGED
@@ -6,7 +6,8 @@ services:
6
  container_name: second-me-backend
7
  restart: unless-stopped
8
  ports:
9
- - "7860:7860"
 
10
  volumes:
11
  - ./data:/app/data
12
  - ./logs:/app/logs
 
6
  container_name: second-me-backend
7
  restart: unless-stopped
8
  ports:
9
+ - "8002:8002"
10
+ - "8080:8080"
11
  volumes:
12
  - ./data:/app/data
13
  - ./logs:/app/logs
lpm_kernel/api/domains/documents/routes.py CHANGED
@@ -46,11 +46,10 @@ def list_documents():
46
  def scan_documents():
47
  """Scan documents from configured directory and store them in database"""
48
  try:
49
- # 2. Get project root directory and construct the full path
 
50
  config = Config.from_env()
51
- relative_path = config.get("USER_RAW_CONTENT_DIR").lstrip("/")
52
- project_root = Path(__file__).parent.parent.parent.parent.parent
53
- full_path = project_root / relative_path
54
 
55
  # 3. Scan and process files
56
  processed_doc_dtos = document_service.scan_directory(
@@ -146,8 +145,8 @@ def process_all_chunks():
146
  try:
147
  config = Config.from_env()
148
  chunker = DocumentChunker(
149
- chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE")),
150
- overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP")),
151
  )
152
 
153
  documents = document_service.list_documents()
 
46
  def scan_documents():
47
  """Scan documents from configured directory and store them in database"""
48
  try:
49
+ # 2. Get the full path from configuration
50
+ # The Config class already resolves relative paths using BASE_DIR
51
  config = Config.from_env()
52
+ full_path = Path(config.get("USER_RAW_CONTENT_DIR"))
 
 
53
 
54
  # 3. Scan and process files
55
  processed_doc_dtos = document_service.scan_directory(
 
145
  try:
146
  config = Config.from_env()
147
  chunker = DocumentChunker(
148
+ chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE", 4000)),
149
+ overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP", 200)),
150
  )
151
 
152
  documents = document_service.list_documents()
lpm_kernel/api/domains/kernel2/routes/role_routes.py CHANGED
@@ -20,6 +20,15 @@ def create_role():
20
  """create new Role"""
21
  try:
22
  data = request.get_json()
 
 
 
 
 
 
 
 
 
23
  create_request = CreateRoleRequest.from_dict(data)
24
  role = role_service.create_role(create_request)
25
 
@@ -28,6 +37,9 @@ def create_role():
28
  else:
29
  return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
30
 
 
 
 
31
  except Exception as e:
32
  logger.error(f"Error creating Role: {str(e)}")
33
  return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500
 
20
  """create new Role"""
21
  try:
22
  data = request.get_json()
23
+ if not data:
24
+ return jsonify(APIResponse.error("Missing request body")), 400
25
+
26
+ # Validate mandatory fields
27
+ required_fields = ["name", "description", "system_prompt"]
28
+ missing_fields = [f for f in required_fields if not data.get(f)]
29
+ if missing_fields:
30
+ return jsonify(APIResponse.error(f"Missing mandatory fields: {', '.join(missing_fields)}")), 400
31
+
32
  create_request = CreateRoleRequest.from_dict(data)
33
  role = role_service.create_role(create_request)
34
 
 
37
  else:
38
  return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
39
 
40
+ except (KeyError, TypeError) as e:
41
+ logger.error(f"Validation error creating Role: {str(e)}")
42
+ return jsonify(APIResponse.error(f"Invalid request data: {str(e)}")), 400
43
  except Exception as e:
44
  logger.error(f"Error creating Role: {str(e)}")
45
  return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500
lpm_kernel/api/domains/kernel2/routes_talk.py CHANGED
@@ -31,9 +31,9 @@ from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanc
31
 
32
  logger = logging.getLogger(__name__)
33
 
34
- talk_bp = Blueprint("talk", __name__)
35
 
36
- @talk_bp.route("/api/talk", methods=["POST"])
37
  @validate()
38
  def chat(body: ChatRequest):
39
  """
 
31
 
32
  logger = logging.getLogger(__name__)
33
 
34
+ talk_bp = Blueprint('talk', __name__, url_prefix='/api/talk')
35
 
36
+ @talk_bp.route("/chat", methods=["POST"])
37
  @validate()
38
  def chat(body: ChatRequest):
39
  """
lpm_kernel/common/repository/vector_repository.py CHANGED
@@ -26,8 +26,7 @@ class BaseVectorRepository(ABC):
26
 
27
  class ChromaRepository(BaseVectorRepository):
28
  def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
29
- settings = Settings(anonymized_telemetry=False)
30
- self.client = chromadb.PersistentClient(path=persist_directory, settings=settings)
31
 
32
  # Check if collection exists, create it if it doesn't
33
  try:
 
26
 
27
  class ChromaRepository(BaseVectorRepository):
28
  def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
29
+ self.client = chromadb.PersistentClient(path=persist_directory)
 
30
 
31
  # Check if collection exists, create it if it doesn't
32
  try:
lpm_kernel/file_data/chroma_utils.py CHANGED
@@ -1,7 +1,6 @@
1
  from typing import Optional, Dict, Any, List, Tuple
2
  import os
3
  import chromadb
4
- from chromadb.config import Settings
5
  import logging
6
  from lpm_kernel.configs.logging import get_train_process_logger
7
 
@@ -74,8 +73,7 @@ def reinitialize_chroma_collections(dimension: int = 1536) -> bool:
74
  """
75
  try:
76
  chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
77
- settings = Settings(anonymized_telemetry=False)
78
- client = chromadb.PersistentClient(path=chroma_path, settings=settings)
79
 
80
  # Delete and recreate document collection
81
  try:
 
1
  from typing import Optional, Dict, Any, List, Tuple
2
  import os
3
  import chromadb
 
4
  import logging
5
  from lpm_kernel.configs.logging import get_train_process_logger
6
 
 
73
  """
74
  try:
75
  chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
76
+ client = chromadb.PersistentClient(path=chroma_path)
 
77
 
78
  # Delete and recreate document collection
79
  try:
lpm_kernel/file_data/chunker.py CHANGED
@@ -2,7 +2,10 @@ from typing import List
2
  from lpm_kernel.L1.bio import Chunk
3
  import traceback
4
  import time
5
- from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
6
 
7
  from lpm_kernel.configs.logging import get_train_process_logger
8
  logger = get_train_process_logger()
 
2
  from lpm_kernel.L1.bio import Chunk
3
  import traceback
4
  import time
5
+ try:
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ except ImportError:
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
 
10
  from lpm_kernel.configs.logging import get_train_process_logger
11
  logger = get_train_process_logger()
lpm_kernel/utils.py CHANGED
@@ -3,7 +3,10 @@ from enum import Enum
3
  import tiktoken
4
  import re
5
  from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
6
- from langchain.text_splitter import TextSplitter
 
 
 
7
  import random
8
  import string
9
  from itertools import chain
@@ -165,7 +168,7 @@ class TokenTextSplitter(TextSplitter):
165
 
166
  def _cut_meaningless_head_tail(self, text: str) -> str:
167
  # Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
168
- sentences = re.split(r"\. |! |\? |。|!|?|\n+ *\n+", text)
169
  if len(sentences) < 2:
170
  return text
171
  head = sentences[0]
 
3
  import tiktoken
4
  import re
5
  from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
6
+ try:
7
+ from langchain_text_splitters import TextSplitter
8
+ except ImportError:
9
+ from langchain.text_splitter import TextSplitter
10
  import random
11
  import string
12
  from itertools import chain
 
168
 
169
  def _cut_meaningless_head_tail(self, text: str) -> str:
170
  # Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
171
+ sentences = re.split("\. |! |\? |。|!|?|\n+ *\n+", text)
172
  if len(sentences) < 2:
173
  return text
174
  head = sentences[0]
scripts/setup.sh CHANGED
@@ -607,10 +607,6 @@ parse_args() {
607
 
608
  # Main function
609
  main() {
610
- # Create necessary directories with write permissions
611
- mkdir -p "./logs"
612
- mkdir -p "./.cache/huggingface/hub"
613
-
614
  # Display welcome message
615
  display_header "Second-Me Complete Installation"
616
 
 
607
 
608
  # Main function
609
  main() {
 
 
 
 
610
  # Display welcome message
611
  display_header "Second-Me Complete Installation"
612
 
start.sh CHANGED
@@ -1,32 +1,43 @@
1
  #!/bin/bash
 
2
 
 
 
 
 
 
 
 
 
 
3
  echo "--- Checking SQLite database... ---"
4
- if [ ! -s /app/data/sqlite/lpm.db ]; then
5
  echo "SQLite database not found or empty, initializing..."
6
- mkdir -p /app/data/sqlite
7
- sqlite3 /app/data/sqlite/lpm.db ".read /app/docker/sqlite/init.sql"
8
  echo "SQLite database initialized successfully"
9
- echo "Tables created:"
10
- sqlite3 /app/data/sqlite/lpm.db ".tables"
11
  else
12
- echo "SQLite database already exists, skipping initialization"
13
  fi
14
 
 
15
  echo "--- Checking ChromaDB... ---"
16
- if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then
17
  echo "ChromaDB collections not found, initializing..."
18
- python /app/docker/app/init_chroma.py
19
  echo "ChromaDB initialized successfully"
20
  else
21
- echo "ChromaDB already exists, skipping initialization"
22
  fi
23
 
24
- echo "--- Starting application... ---"
25
- export TRANSFORMERS_CACHE="/app/.cache/huggingface/hub"
26
- export CHROMA_SERVER_NO_ANALYTICS=True
27
- export HF_HUB_DISABLE_TELEMETRY=1
28
- source "$SCRIPT_DIR/scripts/setup.sh"
29
 
30
- echo "--- Starting application... ---"
31
- cd /app
32
- python -m flask run --host=0.0.0.0 --port=7860
 
 
 
1
  #!/bin/bash
2
+ set -e
3
 
4
+ echo "--- Starting application... ---"
5
+
6
+ # Use relative paths for scripts
7
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
8
+
9
+ # Ensure we are in the app directory
10
+ cd "$SCRIPT_DIR"
11
+
12
+ # Initialize database if needed
13
  echo "--- Checking SQLite database... ---"
14
+ if [ ! -s ./data/sqlite/lpm.db ]; then
15
  echo "SQLite database not found or empty, initializing..."
16
+ mkdir -p ./data/sqlite
17
+ sqlite3 ./data/sqlite/lpm.db < ./docker/sqlite/init.sql
18
  echo "SQLite database initialized successfully"
 
 
19
  else
20
+ echo "SQLite database already exists"
21
  fi
22
 
23
+ # Initialize ChromaDB if needed
24
  echo "--- Checking ChromaDB... ---"
25
+ if [ ! -d ./data/chroma_db/documents ] || [ ! -d ./data/chroma_db/document_chunks ]; then
26
  echo "ChromaDB collections not found, initializing..."
27
+ python ./docker/app/init_chroma.py
28
  echo "ChromaDB initialized successfully"
29
  else
30
+ echo "ChromaDB already exists"
31
  fi
32
 
33
+ # Try to run setup if it exists and hasn't been run
34
+ if [ -f "./scripts/setup.sh" ]; then
35
+ echo "Checking if setup is needed..."
36
+ # We skip full setup in container but could do minor checks
37
+ fi
38
 
39
+ echo "Starting Flask application..."
40
+ export FLASK_APP=lpm_kernel.app
41
+ # Use port 7860 for Hugging Face Spaces by default if LOCAL_APP_PORT is not set
42
+ PORT=${LOCAL_APP_PORT:-7860}
43
+ python -m flask run --host=0.0.0.0 --port=$PORT