secondme-api

Sleeping

App Files Files Community

harvesthealth commited on Jan 26

Commit

5830379

verified ·

1 Parent(s): e0a3d91

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +17 -11
Dockerfile.backend +1 -1
README.md +163 -7
docker-compose.yml +2 -1
lpm_kernel/api/domains/documents/routes.py +5 -6
lpm_kernel/api/domains/kernel2/routes/role_routes.py +12 -0
lpm_kernel/api/domains/kernel2/routes_talk.py +2 -2
lpm_kernel/common/repository/vector_repository.py +1 -2
lpm_kernel/file_data/chroma_utils.py +1 -3
lpm_kernel/file_data/chunker.py +4 -1
lpm_kernel/utils.py +5 -2
scripts/setup.sh +0 -4
start.sh +28 -17

.gitattributes CHANGED Viewed

@@ -1,7 +1,8 @@
 # Set all text files to use LF line endings
 * text=auto eol=lf
 # Binary files should not be modified
-*.png filter=lfs diff=lfs merge=lfs -text
 *.jpg binary
 *.jpeg binary
 *.gif binary
@@ -10,24 +11,29 @@
 *.mp4 binary
 *.mp3 binary
 *.pdf binary
-*.zip filter=lfs diff=lfs merge=lfs -text
 *.gz binary
 *.tar binary
 dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
 dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
 dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
-lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
-lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
-lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
 images/cover.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
-images/secondme_cover.png filter=lfs diff=lfs merge=lfs -text
-lpm_frontend/public/fonts/Calistoga.ttf filter=lfs diff=lfs merge=lfs -text
-lpm_frontend/public/images/app_api_mcp.png filter=lfs diff=lfs merge=lfs -text
-*.json filter=lfs diff=lfs merge=lfs -text
-*.tar.gz filter=lfs diff=lfs merge=lfs -text
-*.ttf filter=lfs diff=lfs merge=lfs -text

 # Set all text files to use LF line endings
 * text=auto eol=lf
 # Binary files should not be modified
+*.png binary
 *.jpg binary
 *.jpeg binary
 *.gif binary
 *.mp4 binary
 *.mp3 binary
 *.pdf binary
+*.zip binary
 *.gz binary
 *.tar binary
 dependencies/graphrag-1.2.1.dev27.tar.gz filter=lfs diff=lfs merge=lfs -text
 dependencies/graphrag-modified.tar.gz filter=lfs diff=lfs merge=lfs -text
 dependencies/llama.cpp.zip filter=lfs diff=lfs merge=lfs -text
 images/cover.png filter=lfs diff=lfs merge=lfs -text
+images/secondme_cover.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/package-lock.json filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/package.json filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/fonts/Calistoga.ttf filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/images/app_api_mcp.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_native_applications.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_secondme_apps.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/app_secondme_network.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/images/logo.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/images/single_logo.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/step_1.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/images/step_2.png filter=lfs diff=lfs merge=lfs -text
 lpm_frontend/public/images/step_3.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/public/images/step_4.png filter=lfs diff=lfs merge=lfs -text
+lpm_frontend/tsconfig.json filter=lfs diff=lfs merge=lfs -text
+lpm_kernel/package-lock.json filter=lfs diff=lfs merge=lfs -text
+lpm_kernel/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+resources/L2/data_pipeline/data_prep/subjective/config/config.json filter=lfs diff=lfs merge=lfs -text
+resources/model/processed_data/L1/graphrag_indexing_output/subjective/stats.json filter=lfs diff=lfs merge=lfs -text

Dockerfile.backend CHANGED Viewed

@@ -71,4 +71,4 @@ ENV PYTHONUNBUFFERED=1 \
 EXPOSE 8002 8080
 # Set the startup command
-CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=7860 >> /app/logs/backend.log 2>&1"]

 EXPOSE 8002 8080
 # Set the startup command
+CMD ["bash", "-c", "echo \"Checking SQLite database...\" && if [ ! -s /app/data/sqlite/lpm.db ]; then echo \"SQLite database not found or empty, initializing...\" && mkdir -p /app/data/sqlite && sqlite3 /app/data/sqlite/lpm.db \".read /app/docker/sqlite/init.sql\" && echo \"SQLite database initialized successfully\" && echo \"Tables created:\" && sqlite3 /app/data/sqlite/lpm.db \".tables\"; else echo \"SQLite database already exists, skipping initialization\"; fi && echo \"Checking ChromaDB...\" && if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then echo \"ChromaDB collections not found, initializing...\" && python /app/docker/app/init_chroma.py && echo \"ChromaDB initialized successfully\"; else echo \"ChromaDB already exists, skipping initialization\"; fi && echo \"Starting application at $(date)\" >> /app/logs/backend.log && cd /app && python -m flask run --host=0.0.0.0 --port=${LOCAL_APP_PORT:-8002} >> /app/logs/backend.log 2>&1"]

README.md CHANGED Viewed

@@ -1,7 +1,163 @@
----
-title: Second Me
-emoji: 🚀
-colorFrom: blue
-colorTo: green
-sdk: docker
----

+![Second Me](https://github.com/mindverse/Second-Me/blob/master/images/cover.png)
+<div align="center">
+[![Homepage](https://img.shields.io/badge/Second_Me-Homepage-blue?style=flat-square&logo=homebridge)](https://home.second.me/)
+[![AI-native Memory](https://img.shields.io/badge/AI--native_Memory-arXiv-orange?style=flat-square&logo=academia)](https://arxiv.org/abs/2406.18312)
+[![AI-native Memory 2.0](https://img.shields.io/badge/AI--native_Memory_2.0-arXiv-red?style=flat-square&logo=arxiv)](https://arxiv.org/abs/2503.08102)
+[![Discord](https://img.shields.io/badge/Chat-Discord-5865F2?style=flat-square&logo=discord&logoColor=white)](https://discord.gg/GpWHQNUwrg)
+[![Twitter](https://img.shields.io/badge/Follow-@SecondMe_AI-1DA1F2?style=flat-square&logo=x&logoColor=white)](https://x.com/SecondMe_AI1)
+[![Reddit](https://img.shields.io/badge/Join-Reddit-FF4500?style=flat-square&logo=reddit&logoColor=white)](https://www.reddit.com/r/SecondMeAI/)
+[![View FAQ](https://img.shields.io/badge/FAQ-GitBook-blue?style=flat-square)](https://secondme.gitbook.io/secondme/faq)
+</div>
+## Our Vision
+Companies like OpenAI built "Super AI" that threatens human independence. We crave individuality: AI that amplifies, not erases, **YOU**.
+We’re challenging that with "**Second Me**": an open-source prototype where you craft your own **AI self**—a new AI species that preserves you, delivers your context, and defends your interests.
+It’s **locally trained and hosted**—your data, your control—yet **globally connected**, scaling your intelligence across an AI network. Beyond that, it’s your AI identity interface—a bold standard linking your AI to the world, sparks collaboration among AI selves, and builds tomorrow’s truly native AI apps.
+Tech enthusiasts, AI pros, domain experts, Join us! Second Me is your launchpad to extend your mind into the digital horizon.
+## Key Features
+### **Train Your AI Self** with AI-Native Memory ([Paper](https://arxiv.org/abs/2503.08102))
+Start training your Second Me today with your own memories! Using Hierarchical Memory Modeling (HMM) and the Me-Alignment Algorithm, your AI self captures your identity, understands your context, and reflects you authentically.
+ <p align="center">
+  <img src="https://github.com/user-attachments/assets/a84c6135-26dc-4413-82aa-f4a373c0ff89" width="94%" />
+</p>
+### **Scale Your Intelligence** on the Second Me Network
+Launch your AI self from your laptop onto our decentralized network—anyone or any app can connect with your permission, sharing your context as your digital identity.
+<p align="center">
+  <img src="https://github.com/user-attachments/assets/9a74a3f4-d8fd-41c1-8f24-534ed94c842a" width="94%" />
+</p>
+### Build Tomorrow’s Apps with Second Me
+**Roleplay**: Your AI self switches personas to represent you in different scenarios.
+**AI Space**: Collaborate with other Second Mes to spark ideas or solve problems.
+<p align="center">
+  <img src="https://github.com/user-attachments/assets/bc6125c1-c84f-4ecc-b620-8932cc408094" width="94%" />
+</p>
+### 100% **Privacy and Control**
+Unlike traditional centralized AI systems, Second Me ensures that your information and intelligence remain local and completely private.
+## Getting started & staying tuned with us
+Star and join us, and you will receive all release notifications from GitHub without any delay!
+ <p align="center">
+  <img src="https://github.com/user-attachments/assets/5c14d956-f931-4c25-b0b3-3c2c96cd7581" width="94%" />
+</p>
+## Quick Start
+### 📊 Model Size vs. Memory (Reference Guide)
+*Note: "B" in the table represents "billion parameters model". Data shown are examples only; actual supported model sizes may vary depending on system optimization, deployment environment, and other hardware/software conditions.*
+| Memory (GB) | Docker Deployment (Windows/Linux) | Docker Deployment (Mac) | Integrated Setup (Windows/Linux) | Integrated Setup (Mac) |
+|--------------|-----------------------------|-------------------|--------------------------|----------------|
+| 8            | ~0.8B (example)                | ~0.4B (example)       | ~1.0B (example)              | ~0.6B (example)    |
+| 16           | 1.5B (example)                 | 0.5B (example)        | ~2.0B (example)              | ~0.8B (example)    |
+| 32           | ~2.8B (example)                | ~1.2B (example)       | ~3.5B (example)              | ~1.5B (example)    |
+> **Note**: Models below 0.5B may not provide satisfactory performance for complex tasks. And we're continuously improving cross-platform support - please [submit an issue](https://github.com/mindverse/Second-Me/issues/new) for feedback or compatibility problems on different operating systems.
+> **MLX Acceleration**: Mac M-series users can use [MLX](https://github.com/mindverse/Second-Me/tree/master/lpm_kernel/L2/mlx_training) to run larger models (CLI-only).
+### ⚡ Get your Second Me running in just 3 steps:
+```bash
+# 1. Clone the repository
+git clone https://github.com/mindverse/Second-Me.git
+cd Second-Me
+# 2. Start Docker containers
+make docker-up
+# 3. Access the web interface
+# Open your browser and visit: http://localhost:3000
+```
+👉 For detailed instructions — including integrated (non-Docker) setup, model selection, memory requirements, and platform-specific tips,
+check the full [Deployment Guide on GitBook](https://secondme.gitbook.io/secondme/guides/deployment).
+❓ Got questions about setup, models, or any troubleshooting? [Check our FAQ](https://secondme.gitbook.io/secondme/faq).
+## Tutorial and Use Cases
+🛠️ Feel free to follow [User tutorial](https://secondme.gitbook.io/secondme/getting-started) to build your Second Me.
+💡 Check out the links below to see how Second Me can be used in real-life scenarios:
+- [Felix AMA (Roleplay app)](https://app.secondme.io/example/ama)
+- [Brainstorming a 15-Day European City Itinerary (Network app)](https://app.secondme.io/example/brainstorming)
+- [Icebreaking as a Speed Dating Match (Network app)](https://app.secondme.io/example/Icebreaker)
+## What's Next: May 2025
+Second Me continues to evolve as the open-source identity infrastructure for AI. Here's what's on deck for May:
+- 🗂️ **Version Control**: Smarter versioning of memory and identity states
+- 🧠 **Continuous Training Pipelines**: Keep your AI self evolving over time, with ongoing updates based on new memory inputs.
+- ⚙️ **Performance & Stability Improvements**: Enhancements across inference ability, model alignment,  and base model upgrades
+- ☁️ **Cloud Solutions**: Explore cloud-based solutions for both model training (fine-tuning) and model deployment, to reduce the hardware burden on users' local machines.
+## Contributing
+We’d love for you to help shape what’s coming next — whether it’s fixing bugs, building new features, or improving docs.
+- 📘 Check out our [Contribution Guide](./CONTRIBUTING.md) to get started
+- 💻 Submit ideas, issues, or PRs on [GitHub](https://github.com/mindverse/Second-Me)
+- 💬 Join the conversation and stay updated in our [Discord](https://discord.gg/GpWHQNUwrg) — it’s where the community lives
+## Contributors
+We would like to express our gratitude to all the individuals who have contributed to Second Me! If you're interested in contributing to the future of intelligence uploading, whether through code, documentation, or ideas, please feel free to submit a pull request to our repository: [Second-Me](https://github.com/Mindverse/Second-Me).
+<a href="https://github.com/mindverse/Second-Me/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=mindverse/Second-Me" />
+</a>
+Made with [contrib.rocks](https://contrib.rocks).
+## Acknowledgements
+This work leverages the power of the open-source community.
+For data synthesis, we utilized [GraphRAG](https://github.com/microsoft/graphrag) from Microsoft.
+For model deployment, we utilized [llama.cpp](https://github.com/ggml-org/llama.cpp), which provides efficient inference capabilities.
+Our base models primarily come from the [Qwen2.5](https://huggingface.co/Qwen) series.
+We also want to extend our sincere gratitude to all users who have experienced Second Me. We recognize that there is significant room for optimization throughout the entire pipeline, and we are fully committed to iterative improvements to ensure everyone can enjoy the best possible experience locally.
+## License
+Second Me is open source software licensed under the Apache License 2.0. See the [LICENSE](LICENSE) file for more details.
+[license]: ./LICENSE
+## Star History
+<a href="https://www.star-history.com/#mindverse/Second-Me&Date">
+ <picture>
+   <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date&theme=dark" />
+   <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
+   <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=mindverse/Second-Me&type=Date" />
+ </picture>
+</a>

docker-compose.yml CHANGED Viewed

@@ -6,7 +6,8 @@ services:
     container_name: second-me-backend
     restart: unless-stopped
     ports:
-      - "7860:7860"
     volumes:
       - ./data:/app/data
       - ./logs:/app/logs

     container_name: second-me-backend
     restart: unless-stopped
     ports:
+      - "8002:8002"
+      - "8080:8080"
     volumes:
       - ./data:/app/data
       - ./logs:/app/logs

lpm_kernel/api/domains/documents/routes.py CHANGED Viewed

@@ -46,11 +46,10 @@ def list_documents():
 def scan_documents():
     """Scan documents from configured directory and store them in database"""
     try:
-        # 2. Get project root directory and construct the full path
         config = Config.from_env()
-        relative_path = config.get("USER_RAW_CONTENT_DIR").lstrip("/")
-        project_root = Path(__file__).parent.parent.parent.parent.parent
-        full_path = project_root / relative_path
         # 3. Scan and process files
         processed_doc_dtos = document_service.scan_directory(
@@ -146,8 +145,8 @@ def process_all_chunks():
     try:
         config = Config.from_env()
         chunker = DocumentChunker(
-            chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE")),
-            overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP")),
         )
         documents = document_service.list_documents()

 def scan_documents():
     """Scan documents from configured directory and store them in database"""
     try:
+        # 2. Get the full path from configuration
+        # The Config class already resolves relative paths using BASE_DIR
         config = Config.from_env()
+        full_path = Path(config.get("USER_RAW_CONTENT_DIR"))
         # 3. Scan and process files
         processed_doc_dtos = document_service.scan_directory(
     try:
         config = Config.from_env()
         chunker = DocumentChunker(
+            chunk_size=int(config.get("DOCUMENT_CHUNK_SIZE", 4000)),
+            overlap=int(config.get("DOCUMENT_CHUNK_OVERLAP", 200)),
         )
         documents = document_service.list_documents()

lpm_kernel/api/domains/kernel2/routes/role_routes.py CHANGED Viewed

@@ -20,6 +20,15 @@ def create_role():
     """create new Role"""
     try:
         data = request.get_json()
         create_request = CreateRoleRequest.from_dict(data)
         role = role_service.create_role(create_request)
@@ -28,6 +37,9 @@ def create_role():
         else:
             return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
     except Exception as e:
         logger.error(f"Error creating Role: {str(e)}")
         return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500

     """create new Role"""
     try:
         data = request.get_json()
+        if not data:
+            return jsonify(APIResponse.error("Missing request body")), 400
+        # Validate mandatory fields
+        required_fields = ["name", "description", "system_prompt"]
+        missing_fields = [f for f in required_fields if not data.get(f)]
+        if missing_fields:
+            return jsonify(APIResponse.error(f"Missing mandatory fields: {', '.join(missing_fields)}")), 400
         create_request = CreateRoleRequest.from_dict(data)
         role = role_service.create_role(create_request)
         else:
             return jsonify(APIResponse.error("create role failed, maybe the name existed")), 400
+    except (KeyError, TypeError) as e:
+        logger.error(f"Validation error creating Role: {str(e)}")
+        return jsonify(APIResponse.error(f"Invalid request data: {str(e)}")), 400
     except Exception as e:
         logger.error(f"Error creating Role: {str(e)}")
         return jsonify(APIResponse.error(f"Error occurred when creating role: {str(e)}")), 500

lpm_kernel/api/domains/kernel2/routes_talk.py CHANGED Viewed

@@ -31,9 +31,9 @@ from lpm_kernel.api.domains.kernel2.services.advanced_chat_service import advanc
 logger = logging.getLogger(__name__)
-talk_bp = Blueprint("talk", __name__)
-@talk_bp.route("/api/talk", methods=["POST"])
 @validate()
 def chat(body: ChatRequest):
     """

 logger = logging.getLogger(__name__)
+talk_bp = Blueprint('talk', __name__, url_prefix='/api/talk')
+@talk_bp.route("/chat", methods=["POST"])
 @validate()
 def chat(body: ChatRequest):
     """

lpm_kernel/common/repository/vector_repository.py CHANGED Viewed

@@ -26,8 +26,7 @@ class BaseVectorRepository(ABC):
 class ChromaRepository(BaseVectorRepository):
     def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
-        settings = Settings(anonymized_telemetry=False)
-        self.client = chromadb.PersistentClient(path=persist_directory, settings=settings)
         # Check if collection exists, create it if it doesn't
         try:

 class ChromaRepository(BaseVectorRepository):
     def __init__(self, collection_name: str, persist_directory: str = "./chroma_db"):
+        self.client = chromadb.PersistentClient(path=persist_directory)
         # Check if collection exists, create it if it doesn't
         try:

lpm_kernel/file_data/chroma_utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Optional, Dict, Any, List, Tuple
 import os
 import chromadb
-from chromadb.config import Settings
 import logging
 from lpm_kernel.configs.logging import get_train_process_logger
@@ -74,8 +73,7 @@ def reinitialize_chroma_collections(dimension: int = 1536) -> bool:
     """
     try:
         chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
-        settings = Settings(anonymized_telemetry=False)
-        client = chromadb.PersistentClient(path=chroma_path, settings=settings)
         # Delete and recreate document collection
         try:

 from typing import Optional, Dict, Any, List, Tuple
 import os
 import chromadb
 import logging
 from lpm_kernel.configs.logging import get_train_process_logger
     """
     try:
         chroma_path = os.getenv("CHROMA_PERSIST_DIRECTORY", "./data/chroma_db")
+        client = chromadb.PersistentClient(path=chroma_path)
         # Delete and recreate document collection
         try:

lpm_kernel/file_data/chunker.py CHANGED Viewed

@@ -2,7 +2,10 @@ from typing import List
 from lpm_kernel.L1.bio import Chunk
 import traceback
 import time
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from lpm_kernel.configs.logging import get_train_process_logger
 logger = get_train_process_logger()

 from lpm_kernel.L1.bio import Chunk
 import traceback
 import time
+try:
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+except ImportError:
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
 from lpm_kernel.configs.logging import get_train_process_logger
 logger = get_train_process_logger()

lpm_kernel/utils.py CHANGED Viewed

@@ -3,7 +3,10 @@ from enum import Enum
 import tiktoken
 import re
 from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
-from langchain.text_splitter import TextSplitter
 import random
 import string
 from itertools import chain
@@ -165,7 +168,7 @@ class TokenTextSplitter(TextSplitter):
     def _cut_meaningless_head_tail(self, text: str) -> str:
         # Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
-        sentences = re.split(r"\. |! |\? |。|！|？|\n+ *\n+", text)
         if len(sentences) < 2:
             return text
         head = sentences[0]

 import tiktoken
 import re
 from typing import Any, Optional, Union, Collection, AbstractSet, Literal, List
+try:
+    from langchain_text_splitters import TextSplitter
+except ImportError:
+    from langchain.text_splitter import TextSplitter
 import random
 import string
 from itertools import chain
     def _cut_meaningless_head_tail(self, text: str) -> str:
         # Only split when there are multiple newlines, as parsing of PDF/Word often contains false newlines
+        sentences = re.split("\. |! |\? |。|！|？|\n+ *\n+", text)
         if len(sentences) < 2:
             return text
         head = sentences[0]

scripts/setup.sh CHANGED Viewed

@@ -607,10 +607,6 @@ parse_args() {
 # Main function
 main() {
-    # Create necessary directories with write permissions
-    mkdir -p "./logs"
-    mkdir -p "./.cache/huggingface/hub"
     # Display welcome message
     display_header "Second-Me Complete Installation"

 # Main function
 main() {
     # Display welcome message
     display_header "Second-Me Complete Installation"

start.sh CHANGED Viewed

@@ -1,32 +1,43 @@
 #!/bin/bash
 echo "--- Checking SQLite database... ---"
-if [ ! -s /app/data/sqlite/lpm.db ]; then
     echo "SQLite database not found or empty, initializing..."
-    mkdir -p /app/data/sqlite
-    sqlite3 /app/data/sqlite/lpm.db ".read /app/docker/sqlite/init.sql"
     echo "SQLite database initialized successfully"
-    echo "Tables created:"
-    sqlite3 /app/data/sqlite/lpm.db ".tables"
 else
-    echo "SQLite database already exists, skipping initialization"
 fi
 echo "--- Checking ChromaDB... ---"
-if [ ! -d /app/data/chroma_db/documents ] || [ ! -d /app/data/chroma_db/document_chunks ]; then
     echo "ChromaDB collections not found, initializing..."
-    python /app/docker/app/init_chroma.py
     echo "ChromaDB initialized successfully"
 else
-    echo "ChromaDB already exists, skipping initialization"
 fi
-echo "--- Starting application... ---"
-export TRANSFORMERS_CACHE="/app/.cache/huggingface/hub"
-export CHROMA_SERVER_NO_ANALYTICS=True
-export HF_HUB_DISABLE_TELEMETRY=1
-source "$SCRIPT_DIR/scripts/setup.sh"
-echo "--- Starting application... ---"
-cd /app
-python -m flask run --host=0.0.0.0 --port=7860

 #!/bin/bash
+set -e
+echo "--- Starting application... ---"
+# Use relative paths for scripts
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# Ensure we are in the app directory
+cd "$SCRIPT_DIR"
+# Initialize database if needed
 echo "--- Checking SQLite database... ---"
+if [ ! -s ./data/sqlite/lpm.db ]; then
     echo "SQLite database not found or empty, initializing..."
+    mkdir -p ./data/sqlite
+    sqlite3 ./data/sqlite/lpm.db < ./docker/sqlite/init.sql
     echo "SQLite database initialized successfully"
 else
+    echo "SQLite database already exists"
 fi
+# Initialize ChromaDB if needed
 echo "--- Checking ChromaDB... ---"
+if [ ! -d ./data/chroma_db/documents ] || [ ! -d ./data/chroma_db/document_chunks ]; then
     echo "ChromaDB collections not found, initializing..."
+    python ./docker/app/init_chroma.py
     echo "ChromaDB initialized successfully"
 else
+    echo "ChromaDB already exists"
 fi
+# Try to run setup if it exists and hasn't been run
+if [ -f "./scripts/setup.sh" ]; then
+    echo "Checking if setup is needed..."
+    # We skip full setup in container but could do minor checks
+fi
+echo "Starting Flask application..."
+export FLASK_APP=lpm_kernel.app
+# Use port 7860 for Hugging Face Spaces by default if LOCAL_APP_PORT is not set
+PORT=${LOCAL_APP_PORT:-7860}
+python -m flask run --host=0.0.0.0 --port=$PORT