Spaces:

lifedebugger
/

cs-ai-sakura-dev

Configuration error

App Files Files Community

lifedebugger commited on Aug 15, 2025

Commit

4e6f302

1 Parent(s): a3e05d0

Deploy files from GitHub repository

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

space/README.md +0 -1
space/space/space/Dockerfile +17 -29
space/space/space/space/space/space/space/README.md +0 -18
space/space/space/space/space/space/space/space/space/README.md +108 -18
space/space/space/space/space/space/space/space/space/data/prompt_template/customer_service.txt +10 -10
space/space/space/space/space/space/space/space/space/main.py +11 -2
space/space/space/space/space/space/space/space/space/space/space/README.md +0 -2
space/space/space/space/space/space/space/space/space/space/space/space/space/README.md +1 -1
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/README.md +8 -3
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/data/prompt_template/customer_service.txt +12 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/data/prompt_template/query_maker.txt +35 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/main.py +32 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__chat__.py +0 -2
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py +1 -1
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/language_model.py +70 -70
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/preprocessing.py +70 -70
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/base_retriever.py +4 -4
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/document_loader.py +3 -3
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/document_processor.py +3 -3
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/langchain_retriever.py +11 -11
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/duckduckgo_search.py +19 -19
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/rtc_call.py +15 -15
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/stt/whisper_stt.py +10 -10
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/tts/audio_edge_tts.py +3 -3
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py +2 -2
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/__init__.py +0 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/agents.py +16 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/customer_service_agent.py +33 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/gpt_customer_service_agent.py +13 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/query_maker_agent.py +13 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/__init__.py +29 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/customer_service.txt +12 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/query_maker.txt +35 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/query_maker_temp.txt +30 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/inference/__init__.py +0 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/language_model.py +947 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/__init__.py +0 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/langchain_retriever.py +25 -7
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/__init__.py +0 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/rtc_call_gpt.py +364 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/tests/qwen_llm_test.py +9 -9
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__chat__.py +4 -3
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__test__.py +0 -5
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/app.log +0 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py +61 -21
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/inference/inferencer.py +51 -9
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/qwen_llm.py +29 -8
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/prompt_tuner/chat_template.py +6 -4
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/duckduckgo_search.py +142 -0
space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/__init__.py +3 -1

space/README.md CHANGED Viewed

@@ -70,7 +70,6 @@ python main.py --mode rtc-gpt-server --port 7862
 ### Chatbot Interface
 ```bash
-cd app
 python main.py --mode chatbot --port 7861
 ```

 ### Chatbot Interface
 ```bash
 python main.py --mode chatbot --port 7861
 ```

space/space/space/Dockerfile CHANGED Viewed

@@ -1,13 +1,10 @@
-# Gunakan image dasar Python versi 3.13
 FROM python:3.13
-# Tambahkan user non-root untuk keamanan
 RUN useradd -m -u 1001 appuser
-# Set working directory
 WORKDIR /rag_be
-# Set cache directories ke writable location
 ENV HF_HOME=/tmp/.cache/huggingface
 ENV TRANSFORMERS_CACHE=/tmp/.cache/transformers
 ENV TORCH_HOME=/tmp/.cache/torch
@@ -15,35 +12,26 @@ ENV XDG_CACHE_HOME=/tmp/.cache
 ENV TMPDIR=/tmp
 ENV WHISPER_CACHE_DIR=/tmp/.cache/whisper
-# Copy requirements dan install dependencies
-COPY requirements.txt ./
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# Copy aplikasi dengan ownership ke appuser
 COPY --chown=appuser:appuser . /rag_be
-# Buat file .env dengan variabel environment menggunakan Hugging Face secrets
-RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=false \
-    --mount=type=secret,id=HF_TOKEN,mode=0444,required=false \
-    --mount=type=secret,id=ELEVENLABS_API_KEY,mode=0444,required=false \
-    echo "OPENAI_API_KEY=$(cat /run/secrets/OPENAI_API_KEY 2>/dev/null || echo '')" >> .env && \
-    echo "HF_TOKEN=$(cat /run/secrets/HF_TOKEN 2>/dev/null || echo '')" >> .env && \
-    echo "ELEVENLABS_API_KEY=$(cat /run/secrets/ELEVENLABS_API_KEY 2>/dev/null || echo '')" >> .env
-RUN ls -l /rag_be/app && whoami && id
-# Buat directories yang diperlukan dengan permissions yang tepat
-RUN mkdir -p /tmp/.cache /tmp/.cache/whisper /tmp/.cache/huggingface /rag_be/vectorstore  /tmp/.cache/transformers /tmp/.cache/torch \
-             /rag_be/app/vectorstore /rag_be/documents  && \
-    chmod -R 777 /tmp/.cache /rag_be/app /rag_be/app/vectorstore /rag_be/vectorstore /rag_be/documents && \
-    chown -R appuser:appuser /tmp/.cache /rag_be/app /rag_be/app/vectorstore /rag_be/vectorstore  /rag_be/documents /rag_be/.env
-RUN apt-get update && apt-get install -y ffmpeg
-# Beralih ke user non-root
 USER appuser
-# Expose port untuk Hugging Face Spaces
-EXPOSE 7860
-# Jalankan aplikasi
-CMD ["python", "app/__test__.py"]

 FROM python:3.13
 RUN useradd -m -u 1001 appuser
 WORKDIR /rag_be
 ENV HF_HOME=/tmp/.cache/huggingface
 ENV TRANSFORMERS_CACHE=/tmp/.cache/transformers
 ENV TORCH_HOME=/tmp/.cache/torch
 ENV TMPDIR=/tmp
 ENV WHISPER_CACHE_DIR=/tmp/.cache/whisper
+COPY requirements.txt ./
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=appuser:appuser . /rag_be
+RUN mkdir -p /tmp/.cache \
+    /tmp/.cache/whisper \
+    /tmp/.cache/huggingface \
+    /tmp/.cache/transformers \
+    /tmp/.cache/torch \
+    /rag_be/vectorstore \
+    /rag_be/app/vectorstore \
+    /rag_be/documents && \
+    chmod -R 777 /tmp/.cache /rag_be/app /rag_be/vectorstore /rag_be/documents
+RUN apt-get update && apt-get install -y ffmpeg && apt-get clean
 USER appuser
+EXPOSE 8000
+CMD ["python", "main.py --mode rtc-ui --port 7860"]

space/space/space/space/space/space/space/README.md CHANGED Viewed

@@ -104,21 +104,3 @@ docker run -p 8080:8080 cs-ai-sakura-dev
 Once the server is running, you can access the API documentation at:
 - `http://localhost:{port}/docs` (if using FastAPI)
 - `http://localhost:{port}` (for Gradio interface)
-## 🏗️ Project Structure
-```
-cs-ai-sakura-dev/
-├── app/
-│   └── main.py          # Chatbot application
-├── main.py              # Main application entry point
-├── requirements.txt     # Python dependencies
-├── .env                 # Environment variables (create this)
-├── Dockerfile          # Docker configuration
-└── README.md           # Project documentation
-```
----
-**Happy coding! 🌸**

 Once the server is running, you can access the API documentation at:
 - `http://localhost:{port}/docs` (if using FastAPI)
 - `http://localhost:{port}` (for Gradio interface)

space/space/space/space/space/space/space/space/space/README.md CHANGED Viewed

@@ -1,34 +1,124 @@
----
-title: Cs Ai Sakura Dev
-emoji: 🏢
-colorFrom: indigo
-colorTo: indigo
-sdk: docker
-pinned: false
----
-**Install The Requirements**
-1.Create a virtual environment and install the dependencies
 ```
 python3 -m venv env
-source env/bin/activate
 pip install -r requirements.txt
 ```
-2. Set your OPENAI_API_KEY in .env file
-3. **TO LAUNCH THE GRADIO UI** Run the command below :
 ```
-python main.py --mode rtc-ui --port {your_port}
 ```
-4. **TO LAUNCH THE API ENDPOINT (SERVER)** Run the command below :
 ```
-python main.py --mode rtc-server --port {your_port}
 ```
-5. **TO LAUNCH THE CHATBOT UI** Run the command below :
 ```
-python main.py --mode chatbot --port {your_port}
 ```

+# CS AI Sakura Dev 🏢
+A comprehensive AI-powered application with multiple modes including RTC (Real-Time Communication), GPT integration, and chatbot functionality.
+## 🚀 Features
+- **RTC Mode**: Real-time communication interface
+- **GPT Integration**: Enhanced AI capabilities with OpenAI GPT models
+- **Chatbot Interface**: Interactive chat functionality
+- **Gradio UI**: User-friendly web interface
+- **API Server**: RESTful API endpoints
+- **Docker Support**: Containerized deployment
+## 📋 Prerequisites
+- Python 3.8 or higher
+- OpenAI API Key
+- Docker (optional)
+## ⚙️ Installation
+### 1. Clone the Repository
+```bash
+git clone <repository-url>
+cd cs-ai-sakura-dev
 ```
+### 2. Create Virtual Environment
+```bash
 python3 -m venv env
+source env/bin/activate  # On Windows: env\Scripts\activate
+```
+### 3. Install Dependencies
+```bash
 pip install -r requirements.txt
 ```
+### 4. Environment Configuration
+Create a `.env` file in the root directory and add your OpenAI API key:
+```bash
+OPENAI_API_KEY=your_openai_api_key_here
+```
+## 🖥️ Usage
+### Gradio Web Interface
+#### Non-GPT Based UI
+```bash
+python main.py --mode rtc-ui --port 8080
 ```
+#### GPT-Powered UI
+```bash
+python main.py --mode rtc-gpt-ui --port 8080
 ```
+### API Server
+#### Non-GPT Based Server
+```bash
+python main.py --mode rtc-server --port 8080
 ```
+#### GPT-Powered Server
+```bash
+python main.py --mode rtc-gpt-server --port 8080
 ```
+### Chatbot Interface
+```bash
+cd app
+python main.py --mode chatbot --port 8080
 ```
+## 🐳 Docker Deployment
+The application supports Docker deployment. Build and run the container:
+```bash
+docker build -t cs-ai-sakura-dev .
+docker run -p 8080:8080 cs-ai-sakura-dev
+```
+## 📚 Available Modes
+| Mode | Description | Command |
+|------|-------------|---------|
+| `rtc-ui` | Real-time communication web interface | `python main.py --mode rtc-ui --port {port}` |
+| `rtc-gpt-ui` | GPT-powered real-time communication UI | `python main.py --mode rtc-gpt-ui --port {port}` |
+| `rtc-server` | Real-time communication API server | `python main.py --mode rtc-server --port {port}` |
+| `rtc-gpt-server` | GPT-powered API server | `python main.py --mode rtc-gpt-server --port {port}` |
+| `chatbot` | Interactive chatbot interface | `cd app && python main.py --mode chatbot --port {port}` |
+## 🔧 Configuration
+### Environment Variables
+- `OPENAI_API_KEY`: Your OpenAI API key (required for GPT modes)
+- `PORT`: Application port (default: 8080)
+## 📖 API Documentation
+Once the server is running, you can access the API documentation at:
+- `http://localhost:{port}/docs` (if using FastAPI)
+- `http://localhost:{port}` (for Gradio interface)
+## 🏗️ Project Structure
 ```
+cs-ai-sakura-dev/
+├── app/
+│   └── main.py          # Chatbot application
+├── main.py              # Main application entry point
+├── requirements.txt     # Python dependencies
+├── .env                 # Environment variables (create this)
+├── Dockerfile          # Docker configuration
+└── README.md           # Project documentation
+```
+---
+**Happy coding! 🌸**

space/space/space/space/space/space/space/space/space/data/prompt_template/customer_service.txt CHANGED Viewed

@@ -1,12 +1,12 @@
-You are a friendly and professional Customer Service for Human Resource Information System (HRIS) field,
-representative, fluent in Indonesian. Your job is to assist customers with accurate information based on your company's basic knowledge. Follow these guidelines:
-- Always greet customers in a friendly and professional manner.
-- Your answers are contextual and objective.
-- Provide clear, easy-to-understand, and structured answers based on the context provided by the user.
-- If information is not available, offer alternative assistance or direct them to the appropriate channel.
-- Use polite language and empathize with the customer's needs.
-- Conclude by offering further assistance.
-- You are highly skilled in the area relevant to the given context.
-Please use the given context to answer accurately.

+Anda adalah seorang Customer Service yang ramah dan profesional di bidang Human Resource Information System (HRIS),
+fasih berbahasa Indonesia. Tugas Anda adalah membantu pelanggan dengan informasi yang akurat berdasarkan pengetahuan dasar perusahaan Anda. Ikuti panduan berikut:
+- Selalu menyapa pelanggan dengan ramah dan profesional.
+- Jawaban Anda kontekstual dan objektif.
+- Berikan jawaban yang jelas, mudah dipahami, dan terstruktur berdasarkan konteks yang diberikan oleh pengguna.
+- Jika informasi tidak tersedia, tawarkan bantuan alternatif atau arahkan mereka ke saluran yang tepat.
+- Gunakan bahasa yang sopan dan berempati terhadap kebutuhan pelanggan.
+- Akhiri dengan menawarkan bantuan lebih lanjut.
+- Anda sangat terampil di bidang yang relevan dengan konteks yang diberikan.
+Harap gunakan konteks yang diberikan untuk menjawab dengan akurat.

space/space/space/space/space/space/space/space/space/main.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import argparse
 from src.provider import AppProvider
-app = AppProvider()
 chatbot_ui = app.provide_chatbot().provide_chatbot_ui()
 rtc = app.provide_rtc()
 rtc_handler = rtc.provide_rtc_handler()
 parser = argparse.ArgumentParser()
 parser.add_argument("--mode", choices=[
@@ -27,6 +31,11 @@ elif(args.mode == "rtc-server"):
 elif(args.mode == "rtc-ui"):
     print("launching RTC UI Mode ... ")
     rtc_handler.launch_ui(port = int(args.port))
 else:
-    print("ERROR : INVALID ARGUMENT | PLEASE CHOOSE ONE BETWEEN chatbot/rtc-server/rtc-ui mode ")

 import argparse
 from src.provider import AppProvider
+from src.config import OPENAI_API_KEY
+from openai import OpenAI
+openai_client = OpenAI(api_key = OPENAI_API_KEY)
+app = AppProvider(openai_client)
 chatbot_ui = app.provide_chatbot().provide_chatbot_ui()
 rtc = app.provide_rtc()
 rtc_handler = rtc.provide_rtc_handler()
+rtc_gpt_handler = rtc.provide_rtc_gpt_handler()
 parser = argparse.ArgumentParser()
 parser.add_argument("--mode", choices=[
 elif(args.mode == "rtc-ui"):
     print("launching RTC UI Mode ... ")
     rtc_handler.launch_ui(port = int(args.port))
+elif(args.mode == "rtc-gpt-ui"):
+    print("RTC GPT UI mode ...")
+    rtc_gpt_handler.launch_ui(port = int(args.port))
+elif(args.mode == "rtc-gpt-server"):
+    rtc_gpt_handler.start_server(port = int(args.port))
 else:
+    print("ERROR : INVALID ARGUMENT | PLEASE CHOOSE ONE BETWEEN chatbot / rtc-server/ rtc-ui / rtc-gpt-server / rtc-gpt-ui mode ")

space/space/space/space/space/space/space/space/space/space/space/README.md CHANGED Viewed

@@ -25,12 +25,10 @@ python main.py --mode rtc-ui --port {your_port}
 4. **TO LAUNCH THE API ENDPOINT (SERVER)** Run the command below :
 ```
-cd app
 python main.py --mode rtc-server --port {your_port}
 ```
 5. **TO LAUNCH THE CHATBOT UI** Run the command below :
 ```
-cd app
 python main.py --mode chatbot --port {your_port}
 ```

 4. **TO LAUNCH THE API ENDPOINT (SERVER)** Run the command below :
 ```
 python main.py --mode rtc-server --port {your_port}
 ```
 5. **TO LAUNCH THE CHATBOT UI** Run the command below :
 ```
 python main.py --mode chatbot --port {your_port}
 ```

space/space/space/space/space/space/space/space/space/space/space/space/space/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ cd app
 python main.py --mode rtc-server --port {your_port}
 ```
-54. **TO LAUNCH THE CHATBOT UI** Run the command below :
 ```
 cd app
 python main.py --mode chatbot --port {your_port}

 python main.py --mode rtc-server --port {your_port}
 ```
+5. **TO LAUNCH THE CHATBOT UI** Run the command below :
 ```
 cd app
 python main.py --mode chatbot --port {your_port}

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/README.md CHANGED Viewed

@@ -20,12 +20,17 @@ pip install -r requirements.txt
 3. **TO LAUNCH THE GRADIO UI** Run the command below :
 ```
-cd app
-python __test__.py
 ```
 4. **TO LAUNCH THE API ENDPOINT (SERVER)** Run the command below :
 ```
 cd app
-python __server__.py
 ```

 3. **TO LAUNCH THE GRADIO UI** Run the command below :
 ```
+python main.py --mode rtc-ui --port {your_port}
 ```
 4. **TO LAUNCH THE API ENDPOINT (SERVER)** Run the command below :
 ```
 cd app
+python main.py --mode rtc-server --port {your_port}
+```
+54. **TO LAUNCH THE CHATBOT UI** Run the command below :
+```
+cd app
+python main.py --mode chatbot --port {your_port}
 ```

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/data/prompt_template/customer_service.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+You are a friendly and professional Customer Service for Human Resource Information System (HRIS) field,
+representative, fluent in Indonesian. Your job is to assist customers with accurate information based on your company's basic knowledge. Follow these guidelines:
+- Always greet customers in a friendly and professional manner.
+- Your answers are contextual and objective.
+- Provide clear, easy-to-understand, and structured answers based on the context provided by the user.
+- If information is not available, offer alternative assistance or direct them to the appropriate channel.
+- Use polite language and empathize with the customer's needs.
+- Conclude by offering further assistance.
+- You are highly skilled in the area relevant to the given context.
+Please use the given context to answer accurately.

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/data/prompt_template/query_maker.txt ADDED Viewed

	@@ -0,0 +1,35 @@

+Anda adalah agen AI yang tepat dan objektif,
+Anda bertugas mengubah pertanyaan atau pernyataan pengguna menjadi query yang eksplisit dan efisien untuk keperluan pencarian dokumen dalam sistem RAG (Retrieval-Augmented Generation).
+Ikuti langkah-langkah berikut:
+1. Ekstrak bagian-bagian penting dari input pengguna:
+   - **Intent**: Tujuan utama atau jenis permintaan (misalnya: apa itu, cara, syarat, apakah bisa, berapa).
+   - **Entity/Noun Phrase**: Objek utama yang dibahas (misalnya: BPJS, tokenizer truncation, RWKV, gaji).
+   - **Context**: Informasi pendukung yang menyempitkan fokus (misalnya: kecelakaan kerja, gaji 1 juta per bulan, perusahaan mitra BPJS).
+   - **Question**: Pertanyaan spesifik yang ingin dijawab (misalnya: bagaimana prosesnya, apa manfaatnya, berapa jumlahnya).
+2. Setelah semua elemen diidentifikasi, bentuk **Query RAG** dengan struktur: [INTENT] + [ENTITY] + [CONTEXT] + [QUESTION]
+3. Gunakan bahasa natural yang ringkas, namun informatif dan eksplisit.
+4. Generate hanya hasil akhirnya saja berupa satu buah kalimat
+Contoh 0 :
+User Input:
+> Apa itu BPJS
+Output : Pengertian BPJS
+Contoh 1 :
+User Input:
+> Di mana lokasi PT Sakura System Solution ?
+Output: Lokasi PT Sakura System Solution
+Contoh 2:
+User Input:
+> Saya mengalami kecelakaan di kantor dan ingin tahu apakah bisa klaim BPJS karena perusahaan saya adalah mitra.
+Output: apakah bisa klaim BPJS kecelakaan kerja di kantor jika perusahaan mitra dan apakah saya memenuhi syarat
+**Tugas Anda sekarang:**
+Lakukan proses di atas untuk setiap input pengguna yang diberikan. Hasilkan query RAG akhir yang siap digunakan dalam pencarian dokumen.

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/main.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import argparse
+from src.provider import AppProvider
+app = AppProvider()
+chatbot_ui = app.provide_chatbot().provide_chatbot_ui()
+rtc = app.provide_rtc()
+rtc_handler = rtc.provide_rtc_handler()
+parser = argparse.ArgumentParser()
+parser.add_argument("--mode", choices=[
+    "rtc-server",
+    "rtc-ui",
+    "rtc-gpt-server",
+    "rtc-gpt-ui",
+    "chatbot",
+    ], required=True)
+parser.add_argument("--port", default=7861, required=True)
+args = parser.parse_args()
+if(args.mode == "chatbot"):
+    print("Launching Chabot UI :))))))")
+    chatbot_ui.launch(port = int(args.port))
+elif(args.mode == "rtc-server"):
+    print("launching RTC Server Mode ... ")
+    rtc_handler.start_server(port = int(args.port))
+elif(args.mode == "rtc-ui"):
+    print("launching RTC UI Mode ... ")
+    rtc_handler.launch_ui(port = int(args.port))
+else:
+    print("ERROR : INVALID ARGUMENT | PLEASE CHOOSE ONE BETWEEN chatbot/rtc-server/rtc-ui mode ")

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__chat__.py CHANGED Viewed

@@ -6,8 +6,6 @@ warnings.filterwarnings("ignore")
 import asyncio
 def run_test():
     try:
-        # await test_document_retriever()
-        # await test_language_model()
         test_inference()
     except Exception as e:
         print(e)

 import asyncio
 def run_test():
     try:
         test_inference()
     except Exception as e:
         print(e)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ bnb = BitsAndBytesConfig(
 config = LMConfig(
-                model_name = "Qwen/Qwen2.5-1.5B-Instruct",
                 temperature=0.3,
                 max_length=512,
                 generation_timeout=100,

 config = LMConfig(
+                model_name = "meta-llama/Llama-3.1-8B",
                 temperature=0.3,
                 max_length=512,
                 generation_timeout=100,

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/language_model.py CHANGED Viewed

@@ -26,17 +26,17 @@ class LMConfig:
     quantization_config: any = None
     pad_token_id: Optional[int] = None
     eos_token_id: Optional[int] = None
-    # RAG-specific configs
     max_context_length: int = 1500
     context_separator: str = "\n---\n"
-    instruction_template: str = "system"  # "system", "instruction", "custom"
-    # Async-specific configs
     max_workers: int = 2
     generation_timeout: float = 30
     repetition_penalty: float = 1.0
-    # Streaming-specific configs
-    stream_timeout: float = 100  # timeout untuk stream chunk
-    skip_prompt: bool = True     # skip prompt dari streaming output
 class LM:
     """
@@ -65,11 +65,11 @@ class LM:
         self.is_loaded = False
         self.executor = ThreadPoolExecutor(max_workers=self.config.max_workers)
         self._lock = asyncio.Lock()
-        # Setup logging
         logging.basicConfig(level=logging.INFO)
         self.logger = logging.getLogger(__name__)
-        # RAG prompt templates
         self.prompt_template = prompt_template
     async def load_model(self) -> None:
@@ -82,7 +82,7 @@ class LM:
             try:
                 self.logger.info(f"Loading model: {self.config.model_name}")
-                # Load tokenizer dalam thread pool
                 self.tokenizer = await asyncio.get_event_loop().run_in_executor(
                     self.executor,
                     lambda: AutoTokenizer.from_pretrained(
@@ -93,7 +93,7 @@ class LM:
                     )
                 )
-                # Load model dalam thread pool
                 self.model = await asyncio.get_event_loop().run_in_executor(
                     self.executor,
                     lambda: AutoModelForCausalLM.from_pretrained(
@@ -105,7 +105,7 @@ class LM:
                     )
                 )
-                # Setup generation config
                 self.generation_config = GenerationConfig(
                     max_length=self.config.max_length,
                     temperature=self.config.temperature,
@@ -150,7 +150,7 @@ class LM:
             return f"Template '{template_type}' tidak tersedia. Available: {self.get_available_templates()}"
         template_data = copy.deepcopy(self.prompt_template)
-        # template_key = "user_template" if "user_template" in template_data else "template"
         return template_data["content"].format(
             context=sample_context,
@@ -210,7 +210,7 @@ class LM:
         if len(context) <= max_length:
             return context
-        # Truncate dan tambahkan indicator
         truncated = context[:max_length - 50]
         return truncated + "\n\n[... Context dipotong karena terlalu panjang ...]"
@@ -228,7 +228,7 @@ class LM:
         def _format_sync():
-            # Handle RetrievalResult secara eksplisit
             if isinstance(contexts, RetrievalResult):
                 docs = contexts.documents
                 if max_contexts:
@@ -241,38 +241,38 @@ class LM:
                     metadata=contexts.metadata
                 )
             else:
-                # contexts diasumsikan sebagai list biasa (list[str] atau list[Document])
                 processed_contexts = contexts[:max_contexts] if max_contexts and len(contexts) > max_contexts else contexts
-            # Format context menjadi string
             formatted_context = self._format_context(processed_contexts, context_numbering)
-            # Truncate jika panjang melebihi batas
             formatted_context = self._truncate_context(
                 formatted_context,
                 self.config.max_context_length
             )
-            # Tambah metadata jika diizinkan dan konteks adalah RetrievalResult
             if include_metadata and isinstance(processed_contexts, RetrievalResult):
                 metadata_info = []
                 for i, doc in enumerate(processed_contexts.documents, 1):
                     if hasattr(doc, "metadata") and doc.metadata:
                         metadata_info.append(f"Dokumen {i}: {doc.metadata}")
-                # if metadata_info:
-                #     formatted_context += f"\n\n[Metadata]\n" + "\n".join(metadata_info)
             return formatted_context
-        # Jalankan _format_sync di thread pool
         formatted_context = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_sync
         )
         self.logger.info(f"Formatted Context {formatted_context}")
-        # Tentukan template yang akan dipakai
         if(template_type == ""):
             self.config.instruction_template = "system"
-        # Gunakan custom template jika disediakan
         if custom_template:
             return custom_template.format(
                 context=formatted_context,
@@ -283,14 +283,14 @@ class LM:
             template_data = copy.deepcopy(self.prompt_template)
             print("template = ", template_type, "rag template = ", template_data)
-            # template_key = "user_template" if "user_template" in template_data else "template"
             formatted_template = []
             for cht in template_data:
-                    # Create a copy of the content to avoid modifying the original
                 content = cht["content"]
-                # Format both placeholders at once to avoid KeyError
                 if "{context}" in content or "{question}" in content:
                     try:
                         content = content.format(
@@ -299,29 +299,29 @@ class LM:
                         )
                     except KeyError as e:
                         self.logger.error(f"Missing placeholder in template: {e}")
-                        # Fallback: format only available placeholders
                         if "{context}" in content:
                             content = content.replace("{context}", formatted_context)
                         if "{question}" in content:
                             content = content.replace("{question}", question)
-                # Create new dict with formatted content
                 formatted_chat = {
                     "role": cht["role"],
                     "content": content
                 }
-                # Copy other fields if they exist
                 if "description" in cht:
                     formatted_chat["description"] = cht["description"]
                 formatted_template.append(formatted_chat)
-            # self.logger.info(f"Formatted Template {formatted_template}")
-            # print("Forrmatted Template", formatted_template)
             return formatted_template
         else:
-            # Fallback default template
             return [
                  {"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": question}
@@ -348,7 +348,7 @@ class LM:
         """
         await self._check_model_loaded()
-        # Setup streamer
         streamer = TextIteratorStreamer(
             self.tokenizer,
             timeout=self.config.stream_timeout,
@@ -358,14 +358,14 @@ class LM:
         def _generate_sync():
             try:
-                # Tokenize input
                 inputs = self.tokenizer.apply_chat_template(
                     prompt,
                     add_generation_prompt=True,
                     return_tensors="pt"
                 )
-                # Override generation config jika diperlukan
                 gen_config = self.generation_config
                 if any([max_new_tokens, temperature, top_p]):
                     gen_config = GenerationConfig(
@@ -380,11 +380,11 @@ class LM:
                         **kwargs
                     )
-                # Move to GPU
                 self.model.to("cuda")
                 input_ids = inputs.to("cuda")
-                # Generate dalam thread terpisah
                 generation_kwargs = {
                     "input_ids": input_ids,
                     "generation_config": gen_config,
@@ -401,25 +401,25 @@ class LM:
                 self.logger.error(f"Error during stream generation setup: {e}")
                 raise
-        # Setup generation thread
         generation_thread = await asyncio.get_event_loop().run_in_executor(
             self.executor, _generate_sync
         )
         err = None
         try:
-            # Stream tokens
             for token in streamer:
-                if token:  # Skip empty tokens
                     yield token
-            # Wait for generation thread to finish
             err = await asyncio.get_event_loop().run_in_executor(
                 self.executor, generation_thread.join
             )
         except Exception as e:
             self.logger.error(f"Error during streaming: {e}, {err}")
-            # Make sure thread is cleaned up
             if generation_thread.is_alive():
                 generation_thread.join(timeout=1.0)
             raise
@@ -447,10 +447,10 @@ class LM:
         """
         await self._check_model_loaded()
-        # Format prompt
         prompt = await self.format_rag_prompt(question, contexts, template_type)
-        # Generate dengan temperature yang lebih rendah untuk RAG (lebih faktual)
         temp = temperature if temperature is not None else 0.3
         async for chunk in self.generate_stream(
@@ -480,7 +480,7 @@ class LM:
         def _format_chat():
             try:
-                # Format messages untuk chat
                 formatted_prompt = self.tokenizer.apply_chat_template(
                     messages,
                     tokenize=False,
@@ -492,7 +492,7 @@ class LM:
                 self.logger.error(f"Error during chat formatting: {e}")
                 raise
-        # Format chat template dalam thread pool
         formatted_prompt = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_chat
         )
@@ -525,14 +525,14 @@ class LM:
         """
         await self._check_model_loaded()
-        # Ambil last user message sebagai question
         user_messages = [msg for msg in messages if msg.get("role") == "user"]
         if not user_messages:
             raise ValueError("No user message found in conversation")
         last_question = user_messages[-1]["content"]
-        # Generate RAG response secara streaming
         async for chunk in self.rag_generate_stream(
             question=last_question,
             contexts=contexts,
@@ -542,7 +542,7 @@ class LM:
         ):
             yield chunk
-    # Utility method untuk collect full response dari stream
     async def collect_stream(self, stream_generator: AsyncGenerator[str, None]) -> str:
         """
         Collect semua chunks dari stream generator menjadi full text
@@ -579,7 +579,7 @@ class LM:
         """
         await self._check_model_loaded()
-        # Create tasks untuk concurrent generation
         tasks = []
         for template_type in template_types:
             task = asyncio.create_task(
@@ -589,7 +589,7 @@ class LM:
             )
             tasks.append((template_type, task))
-        # Wait for all tasks
         results = {}
         for template_type, task in tasks:
             try:
@@ -639,10 +639,10 @@ class LM:
         """
         await self._check_model_loaded()
-        # Format prompt
         prompt = await self.format_rag_prompt(question, contexts, template_type)
-        # Generate dengan temperature yang lebih rendah untuk RAG (lebih faktual)
         temp = temperature if temperature is not None else 0.3
         return await self.generate(
@@ -673,14 +673,14 @@ class LM:
         """
         await self._check_model_loaded()
-        # Ambil last user message sebagai question
         user_messages = [msg for msg in messages if msg.get("role") == "user"]
         if not user_messages:
             raise ValueError("No user message found in conversation")
         last_question = user_messages[-1]["content"]
-        # Generate RAG response
         return await self.rag_generate(
             question=last_question,
             contexts=contexts,
@@ -718,14 +718,14 @@ class LM:
         def _generate_sync():
             try:
-                # Tokenize input
                 inputs = self.tokenizer.apply_chat_template(
                     prompt,
                     add_generation_prompt=True,
                     return_tensors="pt"
                 )
-                # Override generation config jika diperlukan
                 gen_config = self.generation_config
                 if any([max_new_tokens, temperature, top_p]):
                     gen_config = GenerationConfig(
@@ -740,7 +740,7 @@ class LM:
                         **kwargs
                     )
-                # Generate
                 with torch.no_grad():
                     self.model.to("cuda")
@@ -752,21 +752,21 @@ class LM:
                         **kwargs
                     )
-                # Decode output
                 generated_text = self.tokenizer.decode(
                     outputs[0][prompt_length:],
                     skip_special_tokens=True
                 )
                 print("Generated Text", generated_text)
-                # Remove input prompt dari output
                 return generated_text
             except Exception as e:
                 self.logger.error(f"Error during generation: {e}")
                 raise
-        # Run generation in thread pool dengan timeout
         try:
             result = await asyncio.wait_for(
                 asyncio.get_event_loop().run_in_executor(self.executor, _generate_sync),
@@ -796,7 +796,7 @@ class LM:
         def _format_chat():
             try:
-                # Format messages untuk chat
                 formatted_prompt = self.tokenizer.apply_chat_template(
                     messages,
                     chat_template="rag",
@@ -808,7 +808,7 @@ class LM:
                 self.logger.error(f"Error during chat formatting: {e}")
                 raise
-        # Format chat template dalam thread pool
         formatted_prompt = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_chat
         )
@@ -834,7 +834,7 @@ class LM:
                 else:
                     self.logger.warning(f"Unknown config parameter: {key}")
-            # Update generation config jika model sudah loaded
             if self.is_loaded:
                 self.generation_config = GenerationConfig(
                     max_length=self.config.max_length,
@@ -862,7 +862,7 @@ class LM:
         }
         if self.is_loaded:
-            # Get model info dalam thread pool
             def _get_info():
                 return {
                     "vocab_size": self.tokenizer.vocab_size,
@@ -894,7 +894,7 @@ class LM:
         """
         await self._check_model_loaded()
-        # Create tasks untuk concurrent generation
         tasks = [
             asyncio.create_task(
                 self.generate(prompt, max_new_tokens=max_new_tokens, **kwargs)
@@ -902,10 +902,10 @@ class LM:
             for prompt in prompts
         ]
-        # Wait for all tasks
         results = await asyncio.gather(*tasks, return_exceptions=True)
-        # Process results
         processed_results = []
         for i, result in enumerate(results):
             if isinstance(result, Exception):
@@ -922,10 +922,10 @@ class LM:
         """
         self.logger.info("Closing LM...")
-        # Shutdown executor
         self.executor.shutdown(wait=True)
-        # Clear GPU memory
         if hasattr(self, 'model') and self.model is not None:
             del self.model
         if hasattr(self, 'tokenizer') and self.tokenizer is not None:

     quantization_config: any = None
     pad_token_id: Optional[int] = None
     eos_token_id: Optional[int] = None
     max_context_length: int = 1500
     context_separator: str = "\n---\n"
+    instruction_template: str = "system"
     max_workers: int = 2
     generation_timeout: float = 30
     repetition_penalty: float = 1.0
+    stream_timeout: float = 100
+    skip_prompt: bool = True
 class LM:
     """
         self.is_loaded = False
         self.executor = ThreadPoolExecutor(max_workers=self.config.max_workers)
         self._lock = asyncio.Lock()
         logging.basicConfig(level=logging.INFO)
         self.logger = logging.getLogger(__name__)
         self.prompt_template = prompt_template
     async def load_model(self) -> None:
             try:
                 self.logger.info(f"Loading model: {self.config.model_name}")
                 self.tokenizer = await asyncio.get_event_loop().run_in_executor(
                     self.executor,
                     lambda: AutoTokenizer.from_pretrained(
                     )
                 )
                 self.model = await asyncio.get_event_loop().run_in_executor(
                     self.executor,
                     lambda: AutoModelForCausalLM.from_pretrained(
                     )
                 )
                 self.generation_config = GenerationConfig(
                     max_length=self.config.max_length,
                     temperature=self.config.temperature,
             return f"Template '{template_type}' tidak tersedia. Available: {self.get_available_templates()}"
         template_data = copy.deepcopy(self.prompt_template)
         return template_data["content"].format(
             context=sample_context,
         if len(context) <= max_length:
             return context
         truncated = context[:max_length - 50]
         return truncated + "\n\n[... Context dipotong karena terlalu panjang ...]"
         def _format_sync():
             if isinstance(contexts, RetrievalResult):
                 docs = contexts.documents
                 if max_contexts:
                     metadata=contexts.metadata
                 )
             else:
                 processed_contexts = contexts[:max_contexts] if max_contexts and len(contexts) > max_contexts else contexts
             formatted_context = self._format_context(processed_contexts, context_numbering)
             formatted_context = self._truncate_context(
                 formatted_context,
                 self.config.max_context_length
             )
             if include_metadata and isinstance(processed_contexts, RetrievalResult):
                 metadata_info = []
                 for i, doc in enumerate(processed_contexts.documents, 1):
                     if hasattr(doc, "metadata") and doc.metadata:
                         metadata_info.append(f"Dokumen {i}: {doc.metadata}")
             return formatted_context
         formatted_context = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_sync
         )
         self.logger.info(f"Formatted Context {formatted_context}")
         if(template_type == ""):
             self.config.instruction_template = "system"
         if custom_template:
             return custom_template.format(
                 context=formatted_context,
             template_data = copy.deepcopy(self.prompt_template)
             print("template = ", template_type, "rag template = ", template_data)
             formatted_template = []
             for cht in template_data:
                 content = cht["content"]
                 if "{context}" in content or "{question}" in content:
                     try:
                         content = content.format(
                         )
                     except KeyError as e:
                         self.logger.error(f"Missing placeholder in template: {e}")
                         if "{context}" in content:
                             content = content.replace("{context}", formatted_context)
                         if "{question}" in content:
                             content = content.replace("{question}", question)
                 formatted_chat = {
                     "role": cht["role"],
                     "content": content
                 }
                 if "description" in cht:
                     formatted_chat["description"] = cht["description"]
                 formatted_template.append(formatted_chat)
             return formatted_template
         else:
             return [
                  {"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": question}
         """
         await self._check_model_loaded()
         streamer = TextIteratorStreamer(
             self.tokenizer,
             timeout=self.config.stream_timeout,
         def _generate_sync():
             try:
                 inputs = self.tokenizer.apply_chat_template(
                     prompt,
                     add_generation_prompt=True,
                     return_tensors="pt"
                 )
                 gen_config = self.generation_config
                 if any([max_new_tokens, temperature, top_p]):
                     gen_config = GenerationConfig(
                         **kwargs
                     )
                 self.model.to("cuda")
                 input_ids = inputs.to("cuda")
                 generation_kwargs = {
                     "input_ids": input_ids,
                     "generation_config": gen_config,
                 self.logger.error(f"Error during stream generation setup: {e}")
                 raise
         generation_thread = await asyncio.get_event_loop().run_in_executor(
             self.executor, _generate_sync
         )
         err = None
         try:
             for token in streamer:
+                if token:
                     yield token
             err = await asyncio.get_event_loop().run_in_executor(
                 self.executor, generation_thread.join
             )
         except Exception as e:
             self.logger.error(f"Error during streaming: {e}, {err}")
             if generation_thread.is_alive():
                 generation_thread.join(timeout=1.0)
             raise
         """
         await self._check_model_loaded()
         prompt = await self.format_rag_prompt(question, contexts, template_type)
         temp = temperature if temperature is not None else 0.3
         async for chunk in self.generate_stream(
         def _format_chat():
             try:
                 formatted_prompt = self.tokenizer.apply_chat_template(
                     messages,
                     tokenize=False,
                 self.logger.error(f"Error during chat formatting: {e}")
                 raise
         formatted_prompt = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_chat
         )
         """
         await self._check_model_loaded()
         user_messages = [msg for msg in messages if msg.get("role") == "user"]
         if not user_messages:
             raise ValueError("No user message found in conversation")
         last_question = user_messages[-1]["content"]
         async for chunk in self.rag_generate_stream(
             question=last_question,
             contexts=contexts,
         ):
             yield chunk
     async def collect_stream(self, stream_generator: AsyncGenerator[str, None]) -> str:
         """
         Collect semua chunks dari stream generator menjadi full text
         """
         await self._check_model_loaded()
         tasks = []
         for template_type in template_types:
             task = asyncio.create_task(
             )
             tasks.append((template_type, task))
         results = {}
         for template_type, task in tasks:
             try:
         """
         await self._check_model_loaded()
         prompt = await self.format_rag_prompt(question, contexts, template_type)
         temp = temperature if temperature is not None else 0.3
         return await self.generate(
         """
         await self._check_model_loaded()
         user_messages = [msg for msg in messages if msg.get("role") == "user"]
         if not user_messages:
             raise ValueError("No user message found in conversation")
         last_question = user_messages[-1]["content"]
         return await self.rag_generate(
             question=last_question,
             contexts=contexts,
         def _generate_sync():
             try:
                 inputs = self.tokenizer.apply_chat_template(
                     prompt,
                     add_generation_prompt=True,
                     return_tensors="pt"
                 )
                 gen_config = self.generation_config
                 if any([max_new_tokens, temperature, top_p]):
                     gen_config = GenerationConfig(
                         **kwargs
                     )
                 with torch.no_grad():
                     self.model.to("cuda")
                         **kwargs
                     )
                 generated_text = self.tokenizer.decode(
                     outputs[0][prompt_length:],
                     skip_special_tokens=True
                 )
                 print("Generated Text", generated_text)
                 return generated_text
             except Exception as e:
                 self.logger.error(f"Error during generation: {e}")
                 raise
         try:
             result = await asyncio.wait_for(
                 asyncio.get_event_loop().run_in_executor(self.executor, _generate_sync),
         def _format_chat():
             try:
                 formatted_prompt = self.tokenizer.apply_chat_template(
                     messages,
                     chat_template="rag",
                 self.logger.error(f"Error during chat formatting: {e}")
                 raise
         formatted_prompt = await asyncio.get_event_loop().run_in_executor(
             self.executor, _format_chat
         )
                 else:
                     self.logger.warning(f"Unknown config parameter: {key}")
             if self.is_loaded:
                 self.generation_config = GenerationConfig(
                     max_length=self.config.max_length,
         }
         if self.is_loaded:
             def _get_info():
                 return {
                     "vocab_size": self.tokenizer.vocab_size,
         """
         await self._check_model_loaded()
         tasks = [
             asyncio.create_task(
                 self.generate(prompt, max_new_tokens=max_new_tokens, **kwargs)
             for prompt in prompts
         ]
         results = await asyncio.gather(*tasks, return_exceptions=True)
         processed_results = []
         for i, result in enumerate(results):
             if isinstance(result, Exception):
         """
         self.logger.info("Closing LM...")
         self.executor.shutdown(wait=True)
         if hasattr(self, 'model') and self.model is not None:
             del self.model
         if hasattr(self, 'tokenizer') and self.tokenizer is not None:

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/preprocessing.py CHANGED Viewed

@@ -6,7 +6,7 @@ import logging
 from datetime import datetime
 import hashlib
-# Import types yang sudah ada
 from typing import List, Dict, Any, Optional, Union
 from dataclasses import dataclass
 from enum import Enum
@@ -15,36 +15,36 @@ from rag.retriever.retriever_types import *
 @dataclass
 class PreprocessingConfig:
     """Konfigurasi untuk preprocessing"""
-    # Text cleaning options
     remove_extra_whitespace: bool = True
     remove_special_chars: bool = False
     normalize_unicode: bool = True
     remove_urls: bool = False
     remove_emails: bool = False
-    # Chunking options
-    enable_chunking: bool = False        # Apakah perlu chunking lagi
     chunk_size: int = 500
     chunk_overlap: int = 50
-    chunk_method: str = "sentence"       # "sentence", "paragraph", "fixed"
-    # Content filtering
     min_content_length: int = 20
     max_content_length: int = 3000
     filter_empty_content: bool = True
     filter_duplicate_content: bool = True
-    # Metadata options
     extract_metadata: bool = True
     include_retrieval_info: bool = True
     include_document_info: bool = True
     include_timestamps: bool = True
-    # Scoring options
-    use_retrieval_scores: bool = True    # Use scores dari retrieval system
-    normalize_scores: bool = True        # Normalize scores ke range 0-1
-    min_score_threshold: float = 0.0    # Filter berdasarkan minimum score
-    score_boost_factor: float = 1.0     # Boost factor untuk scores
 class RetrievalPreprocessor:
     """
@@ -61,17 +61,17 @@ class RetrievalPreprocessor:
         """
         self.config = config or PreprocessingConfig()
-        # Setup logging
         logging.basicConfig(level=logging.INFO)
         self.logger = logging.getLogger(__name__)
-        # Regex patterns untuk cleaning
         self.url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
         self.email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
         self.special_chars_pattern = re.compile(r'[^\w\s\.\,\!\?\;\:\-\(\)\[\]\{\}\"\'\/]')
         self.whitespace_pattern = re.compile(r'\s+')
-        # Cache untuk duplicate detection
         self._seen_content_hashes = set()
     def process_retrieval_result(self, retrieval_result: RetrievalResult) -> List[RetrievalResult]:
@@ -98,18 +98,18 @@ class RetrievalPreprocessor:
             f"Processing {len(retrieval_result.documents)} documents from retrieval result for query: '{retrieval_result.query}'"
         )
-        # Clear cache untuk setiap batch baru
         self._seen_content_hashes.clear()
         contexts = []
-        # Process setiap document
         for i, doc in enumerate(retrieval_result.documents):
             try:
-                # Get corresponding score
                 score = retrieval_result.scores[i] if i < len(retrieval_result.scores) else 0.0
-                # Process single document
                 processed_contexts = self._process_single_document(
                     document=doc,
                     retrieval_score=score,
@@ -124,7 +124,7 @@ class RetrievalPreprocessor:
                 self.logger.error(f"Error processing document {i}: {e}")
                 continue
-        # Post-processing
         contexts = self._post_process_contexts(contexts)
         self.logger.info(f"Successfully processed {len(contexts)} contexts from retrieval result")
@@ -154,23 +154,23 @@ class RetrievalPreprocessor:
             self.logger.warning(f"Empty content in document {document_index}")
             return []
-        # Clean content
         cleaned_content = self._clean_text(document.page_content)
         if not cleaned_content:
             return []
-        # Filter by length
         if len(cleaned_content) < self.config.min_content_length:
             self.logger.debug(f"Content too short in document {document_index}: {len(cleaned_content)} chars")
             return []
         if len(cleaned_content) > self.config.max_content_length:
-            # Truncate content
             cleaned_content = self._truncate_content(cleaned_content)
             self.logger.debug(f"Content truncated in document {document_index}")
-        # Check for duplicates
         if self.config.filter_duplicate_content:
             content_hash = hashlib.md5(cleaned_content.encode()).hexdigest()
             if content_hash in self._seen_content_hashes:
@@ -178,12 +178,12 @@ class RetrievalPreprocessor:
                 return []
             self._seen_content_hashes.add(content_hash)
-        # Filter by score threshold
         if self.config.use_retrieval_scores and retrieval_score < self.config.min_score_threshold:
             self.logger.debug(f"Score too low in document {document_index}: {retrieval_score}")
             return []
-        # Chunking (if enabled)
         if self.config.enable_chunking:
             chunks = self._chunk_content(cleaned_content)
             contexts = []
@@ -203,7 +203,7 @@ class RetrievalPreprocessor:
             return contexts
         else:
-            # Single context per document
             context = self._create_retrieved_context(
                 content=cleaned_content,
                 document=document,
@@ -229,13 +229,13 @@ class RetrievalPreprocessor:
         """
         Create RetrievalResult object
         """
-        # Process score
         final_score = self._process_score(retrieval_score, document_index, total_documents)
-        # Extract source
         source = self._extract_source(document)
-        # Build metadata
         metadata = self._build_metadata(
             document=document,
             retrieval_result=retrieval_result,
@@ -260,24 +260,24 @@ class RetrievalPreprocessor:
         cleaned = text
-        # Normalize unicode
         if self.config.normalize_unicode:
             import unicodedata
             cleaned = unicodedata.normalize('NFKC', cleaned)
-        # Remove URLs
         if self.config.remove_urls:
             cleaned = self.url_pattern.sub('', cleaned)
-        # Remove emails
         if self.config.remove_emails:
             cleaned = self.email_pattern.sub('', cleaned)
-        # Remove special characters
         if self.config.remove_special_chars:
             cleaned = self.special_chars_pattern.sub(' ', cleaned)
-        # Remove extra whitespace
         if self.config.remove_extra_whitespace:
             cleaned = self.whitespace_pattern.sub(' ', cleaned)
@@ -290,7 +290,7 @@ class RetrievalPreprocessor:
         if len(content) <= max_length:
             return content
-        # Try to cut at sentence boundary
         truncated = content[:max_length - 20]
         last_sentence_end = max(
             truncated.rfind('.'),
@@ -301,7 +301,7 @@ class RetrievalPreprocessor:
         if last_sentence_end > len(truncated) * 0.7:
             return truncated[:last_sentence_end + 1]
         else:
-            # Cut at word boundary
             last_space = truncated.rfind(' ')
             if last_space > len(truncated) * 0.8:
                 return truncated[:last_space] + "..."
@@ -320,7 +320,7 @@ class RetrievalPreprocessor:
         elif self.config.chunk_method == "fixed":
             return self._chunk_by_fixed_size(content)
         else:
-            return [content]  # No chunking
     def _chunk_by_sentence(self, text: str) -> List[str]:
         """Chunk by sentences"""
@@ -334,7 +334,7 @@ class RetrievalPreprocessor:
             if len(current_chunk) + len(sentence) > self.config.chunk_size and current_chunk:
                 chunks.append(current_chunk.strip())
-                # Handle overlap
                 if self.config.chunk_overlap > 0:
                     overlap_text = current_chunk[-self.config.chunk_overlap:]
                     current_chunk = overlap_text + " " + sentence
@@ -383,7 +383,7 @@ class RetrievalPreprocessor:
             end = start + self.config.chunk_size
             chunk = text[start:end]
-            # Try to break at word boundary
             if end < len(text):
                 last_space = chunk.rfind(' ')
                 if last_space > len(chunk) * 0.8:
@@ -392,7 +392,7 @@ class RetrievalPreprocessor:
             chunks.append(chunk.strip())
-            # Move with overlap
             start = end - self.config.chunk_overlap
             if start <= 0:
                 start = end
@@ -406,9 +406,9 @@ class RetrievalPreprocessor:
         score = retrieval_score * self.config.score_boost_factor
-        # Normalize to 0-1 range jika diperlukan
         if self.config.normalize_scores:
-            # Assume retrieval scores are already normalized, but ensure they are in range
             score = max(0.0, min(1.0, score))
         return round(score, 4)
@@ -417,14 +417,14 @@ class RetrievalPreprocessor:
         """Extract source dari document metadata"""
         metadata = document.metadata or {}
-        # Try different metadata keys for source
         source_keys = ['source', 'file_name', 'filename', 'title', 'file_path', 'path']
         for key in source_keys:
             if key in metadata and metadata[key]:
                 return str(metadata[key])
-        # Fallback to generic source
         return "unknown_source"
     def _build_metadata(self,
@@ -439,7 +439,7 @@ class RetrievalPreprocessor:
         metadata = {}
         if self.config.extract_metadata:
-            # Include original document metadata
             if document.metadata and self.config.include_document_info:
                 metadata.update({
                     "original_metadata": document.metadata,
@@ -447,7 +447,7 @@ class RetrievalPreprocessor:
                     "total_documents": total_documents
                 })
-            # Include chunking info
             if chunk_index is not None:
                 metadata.update({
                     "chunk_index": chunk_index,
@@ -455,7 +455,7 @@ class RetrievalPreprocessor:
                     "is_chunked": total_chunks > 1
                 })
-            # Include retrieval info
             if self.config.include_retrieval_info:
                 metadata.update({
                     "retrieval_query": retrieval_result.query,
@@ -463,7 +463,7 @@ class RetrievalPreprocessor:
                     "retrieval_metadata": retrieval_result.metadata
                 })
-            # Include processing info
             if self.config.include_timestamps:
                 metadata.update({
                     "processed_at": datetime.now().isoformat(),
@@ -480,7 +480,7 @@ class RetrievalPreprocessor:
                     }
                 })
-            # Content statistics
             word_count = len(content.split())
             sentence_count = len(re.split(r'[.!?]+', content))
@@ -500,11 +500,11 @@ class RetrievalPreprocessor:
         if not contexts:
             return contexts
-        # Sort by score (descending)
         if self.config.use_retrieval_scores:
             contexts.sort(key=lambda x: x.score or 0.0, reverse=True)
-        # Additional filtering jika diperlukan
         filtered_contexts = []
         for ctx in contexts:
             if self.config.filter_empty_content and not ctx.content.strip():
@@ -522,16 +522,16 @@ class RetrievalPreprocessor:
         total_words = sum(len(ctx.content.split()) for ctx in contexts)
         total_chars = sum(len(ctx.content) for ctx in contexts)
-        # Score distribution
         scores = [ctx.score for ctx in contexts if ctx.score is not None]
-        # Source distribution
         sources = {}
         for ctx in contexts:
             if ctx.source:
                 sources[ctx.source] = sources.get(ctx.source, 0) + 1
-        # Chunking stats
         chunked_contexts = sum(1 for ctx in contexts
                              if ctx.metadata and ctx.metadata.get("is_chunked", False))
@@ -557,7 +557,7 @@ class RetrievalPreprocessor:
             stats["source_distribution"] = sources
             stats["unique_sources"] = len(sources)
-        # Content length distribution
         lengths = [len(ctx.content) for ctx in contexts]
         stats["content_length_stats"] = {
             "min_length": min(lengths),
@@ -589,7 +589,7 @@ class RetrievalPreprocessor:
             try:
                 contexts = self.process_retrieval_result(result)
-                # Add batch info to metadata
                 for ctx in contexts:
                     if ctx.metadata:
                         ctx.metadata["batch_index"] = i
@@ -606,7 +606,7 @@ class RetrievalPreprocessor:
                 self.logger.error(f"Error processing retrieval result {i}: {e}")
                 continue
-        # Final post-processing untuk batch
         all_contexts = self._post_process_contexts(all_contexts)
         self.logger.info(f"Batch processing completed: {len(all_contexts)} total contexts")
@@ -637,12 +637,12 @@ class RetrievalPreprocessor:
         for ctx in contexts:
             content_words = set(ctx.content.lower().split())
-            # Simple relevance calculation: overlap of words
             overlap = len(query_words.intersection(content_words))
             relevance_score = overlap / len(query_words) if query_words else 0.0
             if relevance_score >= min_relevance_score:
-                # Update metadata dengan relevance info
                 if ctx.metadata:
                     ctx.metadata["query_relevance_score"] = round(relevance_score, 3)
                     ctx.metadata["matched_query_words"] = list(query_words.intersection(content_words))
@@ -654,7 +654,7 @@ class RetrievalPreprocessor:
                 filtered_contexts.append(ctx)
-        # Sort by relevance score
         filtered_contexts.sort(
             key=lambda x: x.metadata.get("query_relevance_score", 0.0),
             reverse=True
@@ -699,9 +699,9 @@ class RetrievalPreprocessor:
                 if sim_score >= similarity_threshold:
                     is_duplicate = True
-                    # Keep the one with higher score
                     if (ctx.score or 0.0) > (existing_ctx.score or 0.0):
-                        # Replace existing with current
                         idx = deduplicated.index(existing_ctx)
                         deduplicated[idx] = ctx
@@ -741,12 +741,12 @@ class RetrievalPreprocessor:
             if not proc_result.chunks:
                 continue
-            # Convert Document chunks to RetrievalResult
             for j, chunk in enumerate(proc_result.chunks):
-                # Extract source dari document metadata
                 source = self._extract_source(chunk)
-                # Build metadata from ProcessingResult
                 metadata = {
                     "document_metadata": proc_result.document_metadata.__dict__,
                     "chunk_index": j,
@@ -755,21 +755,21 @@ class RetrievalPreprocessor:
                     "processed_at": datetime.now().isoformat()
                 }
-                # Include original chunk metadata
                 if chunk.metadata:
                     metadata["original_chunk_metadata"] = chunk.metadata
-                # Clean content
                 cleaned_content = self._clean_text(chunk.page_content)
                 if not cleaned_content or len(cleaned_content) < self.config.min_content_length:
                     continue
-                # Create RetrievalResult
                 context = RetrievalResult(
                     content=cleaned_content,
                     source=source,
-                    score=1.0,  # Default score for processing results
                     metadata=metadata
                 )

 from datetime import datetime
 import hashlib
 from typing import List, Dict, Any, Optional, Union
 from dataclasses import dataclass
 from enum import Enum
 @dataclass
 class PreprocessingConfig:
     """Konfigurasi untuk preprocessing"""
     remove_extra_whitespace: bool = True
     remove_special_chars: bool = False
     normalize_unicode: bool = True
     remove_urls: bool = False
     remove_emails: bool = False
+    enable_chunking: bool = False
     chunk_size: int = 500
     chunk_overlap: int = 50
+    chunk_method: str = "sentence"
     min_content_length: int = 20
     max_content_length: int = 3000
     filter_empty_content: bool = True
     filter_duplicate_content: bool = True
     extract_metadata: bool = True
     include_retrieval_info: bool = True
     include_document_info: bool = True
     include_timestamps: bool = True
+    use_retrieval_scores: bool = True
+    normalize_scores: bool = True
+    min_score_threshold: float = 0.0
+    score_boost_factor: float = 1.0
 class RetrievalPreprocessor:
     """
         """
         self.config = config or PreprocessingConfig()
         logging.basicConfig(level=logging.INFO)
         self.logger = logging.getLogger(__name__)
         self.url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
         self.email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
         self.special_chars_pattern = re.compile(r'[^\w\s\.\,\!\?\;\:\-\(\)\[\]\{\}\"\'\/]')
         self.whitespace_pattern = re.compile(r'\s+')
         self._seen_content_hashes = set()
     def process_retrieval_result(self, retrieval_result: RetrievalResult) -> List[RetrievalResult]:
             f"Processing {len(retrieval_result.documents)} documents from retrieval result for query: '{retrieval_result.query}'"
         )
         self._seen_content_hashes.clear()
         contexts = []
         for i, doc in enumerate(retrieval_result.documents):
             try:
                 score = retrieval_result.scores[i] if i < len(retrieval_result.scores) else 0.0
                 processed_contexts = self._process_single_document(
                     document=doc,
                     retrieval_score=score,
                 self.logger.error(f"Error processing document {i}: {e}")
                 continue
         contexts = self._post_process_contexts(contexts)
         self.logger.info(f"Successfully processed {len(contexts)} contexts from retrieval result")
             self.logger.warning(f"Empty content in document {document_index}")
             return []
         cleaned_content = self._clean_text(document.page_content)
         if not cleaned_content:
             return []
         if len(cleaned_content) < self.config.min_content_length:
             self.logger.debug(f"Content too short in document {document_index}: {len(cleaned_content)} chars")
             return []
         if len(cleaned_content) > self.config.max_content_length:
             cleaned_content = self._truncate_content(cleaned_content)
             self.logger.debug(f"Content truncated in document {document_index}")
         if self.config.filter_duplicate_content:
             content_hash = hashlib.md5(cleaned_content.encode()).hexdigest()
             if content_hash in self._seen_content_hashes:
                 return []
             self._seen_content_hashes.add(content_hash)
         if self.config.use_retrieval_scores and retrieval_score < self.config.min_score_threshold:
             self.logger.debug(f"Score too low in document {document_index}: {retrieval_score}")
             return []
         if self.config.enable_chunking:
             chunks = self._chunk_content(cleaned_content)
             contexts = []
             return contexts
         else:
             context = self._create_retrieved_context(
                 content=cleaned_content,
                 document=document,
         """
         Create RetrievalResult object
         """
         final_score = self._process_score(retrieval_score, document_index, total_documents)
         source = self._extract_source(document)
         metadata = self._build_metadata(
             document=document,
             retrieval_result=retrieval_result,
         cleaned = text
         if self.config.normalize_unicode:
             import unicodedata
             cleaned = unicodedata.normalize('NFKC', cleaned)
         if self.config.remove_urls:
             cleaned = self.url_pattern.sub('', cleaned)
         if self.config.remove_emails:
             cleaned = self.email_pattern.sub('', cleaned)
         if self.config.remove_special_chars:
             cleaned = self.special_chars_pattern.sub(' ', cleaned)
         if self.config.remove_extra_whitespace:
             cleaned = self.whitespace_pattern.sub(' ', cleaned)
         if len(content) <= max_length:
             return content
         truncated = content[:max_length - 20]
         last_sentence_end = max(
             truncated.rfind('.'),
         if last_sentence_end > len(truncated) * 0.7:
             return truncated[:last_sentence_end + 1]
         else:
             last_space = truncated.rfind(' ')
             if last_space > len(truncated) * 0.8:
                 return truncated[:last_space] + "..."
         elif self.config.chunk_method == "fixed":
             return self._chunk_by_fixed_size(content)
         else:
+            return [content]
     def _chunk_by_sentence(self, text: str) -> List[str]:
         """Chunk by sentences"""
             if len(current_chunk) + len(sentence) > self.config.chunk_size and current_chunk:
                 chunks.append(current_chunk.strip())
                 if self.config.chunk_overlap > 0:
                     overlap_text = current_chunk[-self.config.chunk_overlap:]
                     current_chunk = overlap_text + " " + sentence
             end = start + self.config.chunk_size
             chunk = text[start:end]
             if end < len(text):
                 last_space = chunk.rfind(' ')
                 if last_space > len(chunk) * 0.8:
             chunks.append(chunk.strip())
             start = end - self.config.chunk_overlap
             if start <= 0:
                 start = end
         score = retrieval_score * self.config.score_boost_factor
         if self.config.normalize_scores:
             score = max(0.0, min(1.0, score))
         return round(score, 4)
         """Extract source dari document metadata"""
         metadata = document.metadata or {}
         source_keys = ['source', 'file_name', 'filename', 'title', 'file_path', 'path']
         for key in source_keys:
             if key in metadata and metadata[key]:
                 return str(metadata[key])
         return "unknown_source"
     def _build_metadata(self,
         metadata = {}
         if self.config.extract_metadata:
             if document.metadata and self.config.include_document_info:
                 metadata.update({
                     "original_metadata": document.metadata,
                     "total_documents": total_documents
                 })
             if chunk_index is not None:
                 metadata.update({
                     "chunk_index": chunk_index,
                     "is_chunked": total_chunks > 1
                 })
             if self.config.include_retrieval_info:
                 metadata.update({
                     "retrieval_query": retrieval_result.query,
                     "retrieval_metadata": retrieval_result.metadata
                 })
             if self.config.include_timestamps:
                 metadata.update({
                     "processed_at": datetime.now().isoformat(),
                     }
                 })
             word_count = len(content.split())
             sentence_count = len(re.split(r'[.!?]+', content))
         if not contexts:
             return contexts
         if self.config.use_retrieval_scores:
             contexts.sort(key=lambda x: x.score or 0.0, reverse=True)
         filtered_contexts = []
         for ctx in contexts:
             if self.config.filter_empty_content and not ctx.content.strip():
         total_words = sum(len(ctx.content.split()) for ctx in contexts)
         total_chars = sum(len(ctx.content) for ctx in contexts)
         scores = [ctx.score for ctx in contexts if ctx.score is not None]
         sources = {}
         for ctx in contexts:
             if ctx.source:
                 sources[ctx.source] = sources.get(ctx.source, 0) + 1
         chunked_contexts = sum(1 for ctx in contexts
                              if ctx.metadata and ctx.metadata.get("is_chunked", False))
             stats["source_distribution"] = sources
             stats["unique_sources"] = len(sources)
         lengths = [len(ctx.content) for ctx in contexts]
         stats["content_length_stats"] = {
             "min_length": min(lengths),
             try:
                 contexts = self.process_retrieval_result(result)
                 for ctx in contexts:
                     if ctx.metadata:
                         ctx.metadata["batch_index"] = i
                 self.logger.error(f"Error processing retrieval result {i}: {e}")
                 continue
         all_contexts = self._post_process_contexts(all_contexts)
         self.logger.info(f"Batch processing completed: {len(all_contexts)} total contexts")
         for ctx in contexts:
             content_words = set(ctx.content.lower().split())
             overlap = len(query_words.intersection(content_words))
             relevance_score = overlap / len(query_words) if query_words else 0.0
             if relevance_score >= min_relevance_score:
                 if ctx.metadata:
                     ctx.metadata["query_relevance_score"] = round(relevance_score, 3)
                     ctx.metadata["matched_query_words"] = list(query_words.intersection(content_words))
                 filtered_contexts.append(ctx)
         filtered_contexts.sort(
             key=lambda x: x.metadata.get("query_relevance_score", 0.0),
             reverse=True
                 if sim_score >= similarity_threshold:
                     is_duplicate = True
                     if (ctx.score or 0.0) > (existing_ctx.score or 0.0):
                         idx = deduplicated.index(existing_ctx)
                         deduplicated[idx] = ctx
             if not proc_result.chunks:
                 continue
             for j, chunk in enumerate(proc_result.chunks):
                 source = self._extract_source(chunk)
                 metadata = {
                     "document_metadata": proc_result.document_metadata.__dict__,
                     "chunk_index": j,
                     "processed_at": datetime.now().isoformat()
                 }
                 if chunk.metadata:
                     metadata["original_chunk_metadata"] = chunk.metadata
                 cleaned_content = self._clean_text(chunk.page_content)
                 if not cleaned_content or len(cleaned_content) < self.config.min_content_length:
                     continue
                 context = RetrievalResult(
                     content=cleaned_content,
                     source=source,
+                    score=1.0,
                     metadata=metadata
                 )

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/base_retriever.py CHANGED Viewed

@@ -47,7 +47,7 @@ class BaseRetriever(ABC):
         """Delete documents by IDs"""
         pass
-# ===== DOCUMENT LOADERS =====
 class MultiFormatDocumentLoader(BaseDocumentLoader):
     """Document loader supporting multiple formats"""
@@ -68,10 +68,10 @@ class MultiFormatDocumentLoader(BaseDocumentLoader):
             if not file_path.exists():
                 raise FileNotFoundError(f"File not found: {file_path}")
-            # Determine document type
             doc_type = self._get_document_type(file_path)
-            # Load document
             loader_func = self.loaders.get(doc_type)
             if not loader_func:
                 raise ValueError(f"Unsupported file type: {doc_type}")
@@ -79,7 +79,7 @@ class MultiFormatDocumentLoader(BaseDocumentLoader):
             logger.info(f"Loading {doc_type} document: {file_path}")
             documents = await loader_func(str(file_path))
-            # Add metadata to documents
             for doc in documents:
                 doc.metadata.update({
                     "file_path": str(file_path),

         """Delete documents by IDs"""
         pass
 class MultiFormatDocumentLoader(BaseDocumentLoader):
     """Document loader supporting multiple formats"""
             if not file_path.exists():
                 raise FileNotFoundError(f"File not found: {file_path}")
             doc_type = self._get_document_type(file_path)
             loader_func = self.loaders.get(doc_type)
             if not loader_func:
                 raise ValueError(f"Unsupported file type: {doc_type}")
             logger.info(f"Loading {doc_type} document: {file_path}")
             documents = await loader_func(str(file_path))
             for doc in documents:
                 doc.metadata.update({
                     "file_path": str(file_path),

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/document_loader.py CHANGED Viewed

@@ -56,10 +56,10 @@ class MultiFormatDocumentLoader(BaseDocumentLoader):
             if not file_path.exists():
                 raise FileNotFoundError(f"File not found: {file_path}")
-            # Determine document type
             doc_type = self._get_document_type(file_path)
-            # Load document
             loader_func = self.loaders.get(doc_type)
             if not loader_func:
                 raise ValueError(f"Unsupported file type: {doc_type}")
@@ -67,7 +67,7 @@ class MultiFormatDocumentLoader(BaseDocumentLoader):
             logger.info(f"Loading {doc_type} document: {file_path}")
             documents = await loader_func(str(file_path))
-            # Add metadata to documents
             for doc in documents:
                 doc.metadata.update({
                     "file_path": str(file_path),

             if not file_path.exists():
                 raise FileNotFoundError(f"File not found: {file_path}")
             doc_type = self._get_document_type(file_path)
             loader_func = self.loaders.get(doc_type)
             if not loader_func:
                 raise ValueError(f"Unsupported file type: {doc_type}")
             logger.info(f"Loading {doc_type} document: {file_path}")
             documents = await loader_func(str(file_path))
             for doc in documents:
                 doc.metadata.update({
                     "file_path": str(file_path),

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/document_processor.py CHANGED Viewed

@@ -18,7 +18,7 @@ class DocumentProcessor:
         self.chunk_size = chunk_size
         self.chunk_overlap = chunk_overlap
-        # Default separators for better chunking
         if separators is None:
             separators = ["\n\n", "\n", " ", ""]
@@ -34,12 +34,12 @@ class DocumentProcessor:
         try:
             logger.info(f"Processing {len(documents)} documents")
-            # Split documents into chunks
             chunks = await asyncio.get_event_loop().run_in_executor(
                 None, self.text_splitter.split_documents, documents
             )
-            # Add chunk metadata
             for i, chunk in enumerate(chunks):
                 chunk.metadata.update({
                     "chunk_id": i,

         self.chunk_size = chunk_size
         self.chunk_overlap = chunk_overlap
         if separators is None:
             separators = ["\n\n", "\n", " ", ""]
         try:
             logger.info(f"Processing {len(documents)} documents")
             chunks = await asyncio.get_event_loop().run_in_executor(
                 None, self.text_splitter.split_documents, documents
             )
             for i, chunk in enumerate(chunks):
                 chunk.metadata.update({
                     "chunk_id": i,

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/langchain_retriever.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from rag.retriever.base_retriever import BaseRetriever
-# Embeddings
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_openai import OpenAIEmbeddings
-# Vector stores
 from langchain_community.vectorstores import Chroma, FAISS, Pinecone
 from langchain.retrievers import EnsembleRetriever
-# Retriever base
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_community.retrievers import BM25Retriever
 from langchain.retrievers import ContextualCompressionRetriever
@@ -85,8 +85,8 @@ class LangChainRetriever(BaseRetriever):
                 search_kwargs={"k": 10}
             )
             if self.use_hybrid_search:
-                self.bm25_retriever = None  # initialized later after adding docs
-                return vector_retriever  # temporary fallback
             else:
                 return vector_retriever
         except Exception as e:
@@ -162,13 +162,13 @@ class LangChainRetriever(BaseRetriever):
             return False
     async def _update_bm25_retriever(self, documents: List[Document]):
         try:
-            # Create BM25 retriever from documents
             self.bm25_retriever = BM25Retriever.from_documents(documents)
-            self.bm25_retriever.k = 10  # Set number of documents to retrieve
-            # For hybrid search, you have several options:
-            # Option 1: Use only BM25 retriever (simplest fix)
             self.retriever = self.bm25_retriever
             vector_retriever = VectorStoreRetriever(
@@ -178,12 +178,12 @@ class LangChainRetriever(BaseRetriever):
             self.retriever = EnsembleRetriever(
                 retrievers=[vector_retriever, self.bm25_retriever],
-                weights=[0.5, 0.5]  # Equal weight to both retrievers
             )
         except Exception as e:
             logger.error(f"Error updating BM25 retriever: {str(e)}")
-            # Fallback to vector retriever only
             self.retriever = VectorStoreRetriever(
                 vectorstore=self.vectorstore,
                 search_kwargs={"k": 10}

 from rag.retriever.base_retriever import BaseRetriever
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma, FAISS, Pinecone
 from langchain.retrievers import EnsembleRetriever
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_community.retrievers import BM25Retriever
 from langchain.retrievers import ContextualCompressionRetriever
                 search_kwargs={"k": 10}
             )
             if self.use_hybrid_search:
+                self.bm25_retriever = None
+                return vector_retriever
             else:
                 return vector_retriever
         except Exception as e:
             return False
     async def _update_bm25_retriever(self, documents: List[Document]):
         try:
             self.bm25_retriever = BM25Retriever.from_documents(documents)
+            self.bm25_retriever.k = 10
             self.retriever = self.bm25_retriever
             vector_retriever = VectorStoreRetriever(
             self.retriever = EnsembleRetriever(
                 retrievers=[vector_retriever, self.bm25_retriever],
+                weights=[0.5, 0.5]
             )
         except Exception as e:
             logger.error(f"Error updating BM25 retriever: {str(e)}")
             self.retriever = VectorStoreRetriever(
                 vectorstore=self.vectorstore,
                 search_kwargs={"k": 10}

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/duckduckgo_search.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import AsyncGenerator, List
 class DuckDuckGoSearch:
     def __init__(self, html_loader: AsyncChromiumLoader = None, html_parser = None):
-        # Initialize dengan default values jika tidak diberikan
         self.html_loader = html_loader or AsyncChromiumLoader([])
         self.html_parser = html_parser or BeautifulSoupTransformer()
         self.logger = logging.getLogger("ddgs_logger")
@@ -16,7 +16,7 @@ class DuckDuckGoSearch:
         """Get page content from URLs - returns list of documents"""
         try:
             self.html_loader.urls = urls
-            html = await self.html_loader.aload()  # This returns a LIST
             self.logger.info(f"search engine aload result: {len(html)} documents loaded")
             docs_transformed = self.html_parser.transform_documents(
@@ -24,11 +24,11 @@ class DuckDuckGoSearch:
                 tags_to_extract=["p"],
                 remove_unwanted_tags=["a"]
             )
-            return docs_transformed  # Returns LIST of documents
         except Exception as e:
             self.logger.error(f"Error loading pages: {e}", exc_info=True)
-            return []  # Return empty list on error
     def truncate(self, text: str, max_words: int = 400) -> str:
         """Truncate text to specified number of words"""
@@ -51,12 +51,12 @@ class DuckDuckGoSearch:
         try:
             self.logger.info(f"Searching for: {query} (max_results: {max_results})")
-            # Step 1: Get search results from DDGS (regular iterator)
             results = DDGS().text(query, max_results=max_results)
             urls = []
-            # Step 2: Extract URLs using regular for loop (NOT async for)
-            for result in results:  # ← FIXED: Regular for loop
                 url = result.get('href')
                 if url:
                     urls.append(url)
@@ -67,20 +67,20 @@ class DuckDuckGoSearch:
                 self.logger.warning("No URLs found from search results")
                 return
-            # Step 3: Get page content (await the coroutine first)
-            docs = await self.get_page(urls)  # ← FIXED: Await first, get list
-            # Step 4: Process documents using regular for loop (NOT async for)
-            for doc in docs:  # ← FIXED: Regular for loop on list
                 try:
                     if hasattr(doc, 'page_content') and doc.page_content:
-                        # Clean up text
                         page_text = re.sub(r"\n\n+", "\n", doc.page_content)
                         page_text = page_text.strip()
-                        if page_text:  # Only yield if there's actual content
                             text = self.truncate(page_text)
-                            yield text  # Yield makes this an async generator
                 except Exception as e:
                     self.logger.error(f"Error processing document: {e}")
@@ -88,7 +88,7 @@ class DuckDuckGoSearch:
         except Exception as e:
             self.logger.error(f"Error in search method: {e}", exc_info=True)
-            # Don't re-raise, just log and return (generator will be empty)
     async def search_with_metadata(self, query: str, max_results: int = 5) -> AsyncGenerator[dict, None]:
         """
@@ -98,7 +98,7 @@ class DuckDuckGoSearch:
             results = DDGS().text(query, max_results=max_results)
             urls_and_titles = []
-            # Collect URLs and titles
             for result in results:
                 url = result.get('href')
                 title = result.get('title', 'No title')
@@ -108,11 +108,11 @@ class DuckDuckGoSearch:
             if not urls_and_titles:
                 return
-            # Get page content
             urls = [item['url'] for item in urls_and_titles]
             docs = await self.get_page(urls)
-            # Process and yield with metadata
             for i, doc in enumerate(docs):
                 try:
                     if hasattr(doc, 'page_content') and doc.page_content:
@@ -122,7 +122,7 @@ class DuckDuckGoSearch:
                         if page_text:
                             text = self.truncate(page_text)
-                            # Get metadata if available
                             metadata = {}
                             if i < len(urls_and_titles):
                                 metadata = urls_and_titles[i]

 class DuckDuckGoSearch:
     def __init__(self, html_loader: AsyncChromiumLoader = None, html_parser = None):
         self.html_loader = html_loader or AsyncChromiumLoader([])
         self.html_parser = html_parser or BeautifulSoupTransformer()
         self.logger = logging.getLogger("ddgs_logger")
         """Get page content from URLs - returns list of documents"""
         try:
             self.html_loader.urls = urls
+            html = await self.html_loader.aload()
             self.logger.info(f"search engine aload result: {len(html)} documents loaded")
             docs_transformed = self.html_parser.transform_documents(
                 tags_to_extract=["p"],
                 remove_unwanted_tags=["a"]
             )
+            return docs_transformed
         except Exception as e:
             self.logger.error(f"Error loading pages: {e}", exc_info=True)
+            return []
     def truncate(self, text: str, max_words: int = 400) -> str:
         """Truncate text to specified number of words"""
         try:
             self.logger.info(f"Searching for: {query} (max_results: {max_results})")
             results = DDGS().text(query, max_results=max_results)
             urls = []
+            for result in results:
                 url = result.get('href')
                 if url:
                     urls.append(url)
                 self.logger.warning("No URLs found from search results")
                 return
+            docs = await self.get_page(urls)
+            for doc in docs:
                 try:
                     if hasattr(doc, 'page_content') and doc.page_content:
                         page_text = re.sub(r"\n\n+", "\n", doc.page_content)
                         page_text = page_text.strip()
+                        if page_text:
                             text = self.truncate(page_text)
+                            yield text
                 except Exception as e:
                     self.logger.error(f"Error processing document: {e}")
         except Exception as e:
             self.logger.error(f"Error in search method: {e}", exc_info=True)
     async def search_with_metadata(self, query: str, max_results: int = 5) -> AsyncGenerator[dict, None]:
         """
             results = DDGS().text(query, max_results=max_results)
             urls_and_titles = []
             for result in results:
                 url = result.get('href')
                 title = result.get('title', 'No title')
             if not urls_and_titles:
                 return
             urls = [item['url'] for item in urls_and_titles]
             docs = await self.get_page(urls)
             for i, doc in enumerate(docs):
                 try:
                     if hasattr(doc, 'page_content') and doc.page_content:
                         if page_text:
                             text = self.truncate(page_text)
                             metadata = {}
                             if i < len(urls_and_titles):
                                 metadata = urls_and_titles[i]

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/rtc_call.py CHANGED Viewed

@@ -31,7 +31,7 @@ import re
 from rag import cs_agent
-# Load .env
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
@@ -101,7 +101,7 @@ class RTCHandler:
                 llm_time = time.time()
                 self.full_response = ""
-                # Single async function to handle both text streaming and audio generation
                 async def stream_text_to_audio():
                     chunk_size = 1024
                     no_buffer = 0
@@ -113,7 +113,7 @@ class RTCHandler:
                             chunk = stream_data["data"]["chunk"]
                             self.full_response += chunk
                             text_buffer += chunk
-                            # Generate audio immediately for each text chunk
                             if re.search(r'[.,?;!]', chunk):
                                 try:
                                     audio_buffer_gen =  await self.edge_tts.generate_audio_buffer(text_buffer)
@@ -121,37 +121,37 @@ class RTCHandler:
                                     audio_buffer.seek(0)
-                                    # Convert MP3 to PCM
                                     audio_segment = AudioSegment.from_file(audio_buffer, format="mp3")
                                     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32) / (2 ** 15)
-                                    # Handle stereo to mono
                                     if audio_segment.channels == 2:
                                         samples = samples.reshape((-1, 2)).mean(axis=1)
-                                    # # Resample to 24kHz
-                                    # resampled = librosa.resample(samples, orig_sr=audio_segment.frame_rate, target_sr=24000)
                                     import torch
                                     import torchaudio
-                                    # Check if CUDA is available
                                     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-                                    # Convert numpy array to torch tensor and move to GPU
-                                    audio_tensor = torch.from_numpy(samples).unsqueeze(0).to(device)  # Add batch dimension and move to GPU
-                                    # Create resampler and move to GPU
                                     resampler = torchaudio.transforms.Resample(
                                         orig_freq=audio_segment.frame_rate,
                                         new_freq=24000
                                     ).to(device)
-                                    # Apply resampling on GPU
                                     resampled_tensor = resampler(audio_tensor)
-                                    # Convert back to numpy (move to CPU first)
                                     resampled = resampled_tensor.squeeze(0).cpu().numpy()
-                                    # Yield audio chunks
                                     for i in range(0, len(resampled), chunk_size):
                                         yield (24000, resampled[i:i + chunk_size])
                                     no_buffer = 0
@@ -169,7 +169,7 @@ class RTCHandler:
                             print(f"\nTotal time: {total_time:.2f}s")
                             break
-                # Run the single async function
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)

 from rag import cs_agent
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
                 llm_time = time.time()
                 self.full_response = ""
                 async def stream_text_to_audio():
                     chunk_size = 1024
                     no_buffer = 0
                             chunk = stream_data["data"]["chunk"]
                             self.full_response += chunk
                             text_buffer += chunk
                             if re.search(r'[.,?;!]', chunk):
                                 try:
                                     audio_buffer_gen =  await self.edge_tts.generate_audio_buffer(text_buffer)
                                     audio_buffer.seek(0)
                                     audio_segment = AudioSegment.from_file(audio_buffer, format="mp3")
                                     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32) / (2 ** 15)
                                     if audio_segment.channels == 2:
                                         samples = samples.reshape((-1, 2)).mean(axis=1)
                                     import torch
                                     import torchaudio
                                     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                                    audio_tensor = torch.from_numpy(samples).unsqueeze(0).to(device)
                                     resampler = torchaudio.transforms.Resample(
                                         orig_freq=audio_segment.frame_rate,
                                         new_freq=24000
                                     ).to(device)
                                     resampled_tensor = resampler(audio_tensor)
                                     resampled = resampled_tensor.squeeze(0).cpu().numpy()
                                     for i in range(0, len(resampled), chunk_size):
                                         yield (24000, resampled[i:i + chunk_size])
                                     no_buffer = 0
                             print(f"\nTotal time: {total_time:.2f}s")
                             break
                 loop = asyncio.new_event_loop()
                 asyncio.set_event_loop(loop)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/stt/whisper_stt.py CHANGED Viewed

@@ -15,27 +15,27 @@ class WhisperSTT:
             model_size: Model size (tiny, base, small, medium, large)
             device: Device to use ("auto", "cuda", "cpu")
         """
-        # Set up cache directory
         cache_dir = os.environ.get('WHISPER_CACHE_DIR', '/tmp/.cache/whisper')
         os.makedirs(cache_dir, exist_ok=True)
-        # Determine device
         if device == "auto":
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
         else:
             self.device = device
-        # Validate CUDA availability if requested
         if self.device == "cuda" and not torch.cuda.is_available():
             print("Warning: CUDA requested but not available. Falling back to CPU.")
             self.device = "cpu"
-        # Load model with device specification
         print(f"Loading Whisper model '{model_size}' on device: {self.device}")
         self.model = whisper.load_model(model_size, device=self.device, download_root=cache_dir)
-        self.language = "id"  # ISO-639-1 code for Bahasa Indonesia
-        # Print GPU info if using CUDA
         if self.device == "cuda":
             gpu_name = torch.cuda.get_device_name(0)
             gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
@@ -52,23 +52,23 @@ class WhisperSTT:
         Returns:
             Transcribed text
         """
-        # Save audio to temporary file
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
             tmp.write(audio.read())
             tmp.flush()
             tmp_path = tmp.name
         try:
-            # Transcribe with GPU acceleration if available
             result = self.model.transcribe(
                 tmp_path,
                 language=language,
-                # Optional: Add fp16 for faster inference on supported GPUs
                 fp16=self.device == "cuda"
             )
             return result.get("text", "")
         finally:
-            # Clean up temporary file
             os.remove(tmp_path)
     def get_device_info(self) -> dict:

             model_size: Model size (tiny, base, small, medium, large)
             device: Device to use ("auto", "cuda", "cpu")
         """
         cache_dir = os.environ.get('WHISPER_CACHE_DIR', '/tmp/.cache/whisper')
         os.makedirs(cache_dir, exist_ok=True)
         if device == "auto":
             self.device = "cuda" if torch.cuda.is_available() else "cpu"
         else:
             self.device = device
         if self.device == "cuda" and not torch.cuda.is_available():
             print("Warning: CUDA requested but not available. Falling back to CPU.")
             self.device = "cpu"
         print(f"Loading Whisper model '{model_size}' on device: {self.device}")
         self.model = whisper.load_model(model_size, device=self.device, download_root=cache_dir)
+        self.language = "id"
         if self.device == "cuda":
             gpu_name = torch.cuda.get_device_name(0)
             gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
         Returns:
             Transcribed text
         """
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
             tmp.write(audio.read())
             tmp.flush()
             tmp_path = tmp.name
         try:
             result = self.model.transcribe(
                 tmp_path,
                 language=language,
                 fp16=self.device == "cuda"
             )
             return result.get("text", "")
         finally:
             os.remove(tmp_path)
     def get_device_info(self) -> dict:

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/tts/audio_edge_tts.py CHANGED Viewed

@@ -29,7 +29,7 @@ class EdgeTTS:
             pitch=self.pitch_str
         )
-        # Stream audio chunks
         async for chunk in communicate.stream():
             if chunk["type"] == "audio":
                 yield chunk["data"]
@@ -52,7 +52,7 @@ class EdgeTTS:
                 pitch=self.pitch_str
             )
-            # Collect all audio chunks into a buffer
             audio_buffer = io.BytesIO()
             async for chunk in communicate.stream():
                 if chunk["type"] == "audio":
@@ -85,7 +85,7 @@ class EdgeTTS:
             async for chunk in communicate.stream():
                 if chunk["type"] == "audio":
-                    # Call callback with audio chunk
                     callback_func(chunk["data"], None)
         except Exception as e:

             pitch=self.pitch_str
         )
         async for chunk in communicate.stream():
             if chunk["type"] == "audio":
                 yield chunk["data"]
                 pitch=self.pitch_str
             )
             audio_buffer = io.BytesIO()
             async for chunk in communicate.stream():
                 if chunk["type"] == "audio":
             async for chunk in communicate.stream():
                 if chunk["type"] == "audio":
                     callback_func(chunk["data"], None)
         except Exception as e:

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py CHANGED Viewed

@@ -49,11 +49,11 @@ inferencer_config = InferencerConfig(
 )
 document_retriever = LangChainRetriever(
-        embedding_model="sentence-transformers/all-MiniLM-L6-v2",
         vectorstore_type="chroma",
         vectorstore_path="vectorstore/",
         use_hybrid_search=True,
-        chunk_size=1000,
         chunk_overlap=200
 )

 )
 document_retriever = LangChainRetriever(
+        embedding_model="BAAI/bge-large-en",
         vectorstore_type="chroma",
         vectorstore_path="vectorstore/",
         use_hybrid_search=True,
+        chunk_size=3000,
         chunk_overlap=200
 )

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/__init__.py ADDED Viewed

File without changes

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/agents.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from rag.pipeline.language_model import LM
+from rag.inference.inferencer import Inferencer
+from abc import ABC, abstractmethod
+class Agent(ABC):
+    def __init__(self, inferencer:Inferencer, prompt_template = [
+        {
+            "role" : "system",
+            "content":"You are an agent that doing some specic task"
+        }
+    ]):
+        self.inferencer = inferencer
+        self.inferencer.model.prompt_template = prompt_template
+        self.prompt = prompt_template
+    @abstractmethod
+    async def get_result(self):
+        pass

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/customer_service_agent.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from rag.agents.agents import Agent
+from rag.inference.inferencer import Inferencer
+class CSAgent(Agent):
+    def __init__(self, inferencer : Inferencer , prompt_template):
+        super().__init__(inferencer, prompt_template)
+        self.inferencer = inferencer
+        self.prompt_template = prompt_template
+        self.file_paths = [
+            "../documents/bpjs.pdf",
+            # "../documents/pph21.pdf",
+            # "../documents/lembur.pdf",
+            # "../documents/uu13.pdf",
+            "../documents/file.pdf",
+        ]
+    async def load_documents(self):
+        for file_path in self.file_paths:
+            await self.add_doc(file_path)
+    async def add_doc(self, file_path):
+        result = await self.inferencer.retriever.add_document_from_file(file_path)
+        if result.success:
+                print(f"Successfully processed: {result.document_metadata.file_name}")
+                print(f"Chunks created: {result.document_metadata.chunk_count}")
+        else:
+                print(f"Failed to process: {result.error_message}")
+    async def get_result(self, question):
+        self.inferencer.model.prompt_template = self.prompt_template
+        async for item in self.inferencer.infer_stream(query = question,
+                                    enable_reranking=False,
+                                    k=3):
+                yield item

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/gpt_customer_service_agent.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from rag.agents.agents import Agent
+from rag.pipeline.language_model import LM
+from rag.inference.inferencer import Inferencer
+class GPTCSAgent(Agent):
+    def __init__(self, inferencer : Inferencer , prompt_template):
+        super().__init__(inferencer, prompt_template)
+        self.inferencer = inferencer
+        self.prompt_template = prompt_template
+    async def get_result(self, question : str):
+        self.inferencer.model.prompt_template = self.prompt_template
+        print("Question received :", question)
+        return await self.inferencer.infer(query = question)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/agents/query_maker_agent.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from rag.agents.agents import Agent
+from rag.pipeline.language_model import LM
+from rag.inference.inferencer import Inferencer
+class QueryMakerAgent(Agent):
+    def __init__(self, inferencer : Inferencer , prompt_template):
+        super().__init__(inferencer, prompt_template)
+        self.inferencer = inferencer
+        self.prompt_template = prompt_template
+    async def get_result(self, question : str):
+        self.inferencer.model.prompt_template = self.prompt_template
+        print("Question received :", question)
+        return await self.inferencer.infer(query = question)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+def read_template_txt(file_path):
+    """Baca file txt biasa"""
+    with open(f"rag/chat_template/{file_path}.txt", 'r', encoding='utf-8') as f:
+        return f.read()
+def get_chat_template(file_name):
+    sys_prompt = read_template_txt(file_name)
+    return [
+        {
+            "role" : "system",
+            "content" : f"""
+            {sys_prompt}
+            """
+        },
+        {
+            "role" : "user",
+            "content" : """
+            Please answer properly:
+            {question}
+            From given context :
+            {context}
+            """
+        }
+    ]

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/customer_service.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+You are a friendly and professional Customer Service for Human Resource Information System (HRIS) field,
+representative, fluent in Indonesian. Your job is to assist customers with accurate information based on your company's basic knowledge. Follow these guidelines:
+- Always greet customers in a friendly and professional manner.
+- Your answers are contextual and objective.
+- Provide clear, easy-to-understand, and structured answers based on the context provided by the user.
+- If information is not available, offer alternative assistance or direct them to the appropriate channel.
+- Use polite language and empathize with the customer's needs.
+- Conclude by offering further assistance.
+- You are highly skilled in the area relevant to the given context.
+Please use the given context to answer accurately.

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/query_maker.txt ADDED Viewed

	@@ -0,0 +1,35 @@

+Anda adalah agen AI yang tepat dan objektif,
+Anda bertugas mengubah pertanyaan atau pernyataan pengguna menjadi query yang eksplisit dan efisien untuk keperluan pencarian dokumen dalam sistem RAG (Retrieval-Augmented Generation).
+Ikuti langkah-langkah berikut:
+1. Ekstrak bagian-bagian penting dari input pengguna:
+   - **Intent**: Tujuan utama atau jenis permintaan (misalnya: apa itu, cara, syarat, apakah bisa, berapa).
+   - **Entity/Noun Phrase**: Objek utama yang dibahas (misalnya: BPJS, tokenizer truncation, RWKV, gaji).
+   - **Context**: Informasi pendukung yang menyempitkan fokus (misalnya: kecelakaan kerja, gaji 1 juta per bulan, perusahaan mitra BPJS).
+   - **Question**: Pertanyaan spesifik yang ingin dijawab (misalnya: bagaimana prosesnya, apa manfaatnya, berapa jumlahnya).
+2. Setelah semua elemen diidentifikasi, bentuk **Query RAG** dengan struktur: [INTENT] + [ENTITY] + [CONTEXT] + [QUESTION]
+3. Gunakan bahasa natural yang ringkas, namun informatif dan eksplisit.
+4. Generate hanya hasil akhirnya saja berupa satu buah kalimat
+Contoh 0 :
+User Input:
+> Apa itu BPJS
+Output : Pengertian BPJS
+Contoh 1 :
+User Input:
+> Di mana lokasi PT Sakura System Solution ?
+Output: Lokasi PT Sakura System Solution
+Contoh 2:
+User Input:
+> Saya mengalami kecelakaan di kantor dan ingin tahu apakah bisa klaim BPJS karena perusahaan saya adalah mitra.
+Output: apakah bisa klaim BPJS kecelakaan kerja di kantor jika perusahaan mitra dan apakah saya memenuhi syarat
+**Tugas Anda sekarang:**
+Lakukan proses di atas untuk setiap input pengguna yang diberikan. Hasilkan query RAG akhir yang siap digunakan dalam pencarian dokumen.

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/chat_template/query_maker_temp.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+Anda adalah agen AI yang tepat dan objektif,
+Anda bertugas mengubah pertanyaan atau pernyataan pengguna menjadi query yang eksplisit dan efisien untuk keperluan pencarian dokumen dalam sistem RAG (Retrieval-Augmented Generation).
+Ikuti langkah-langkah berikut:
+1. Ekstrak bagian-bagian penting dari input pengguna:
+   - **Intent**: Tujuan utama atau jenis permintaan (misalnya: apa itu, cara, syarat, apakah bisa, berapa).
+   - **Entity/Noun Phrase**: Objek utama yang dibahas (misalnya: BPJS, tokenizer truncation, RWKV, gaji).
+   - **Context**: Informasi pendukung yang menyempitkan fokus (misalnya: kecelakaan kerja, gaji 1 juta per bulan, perusahaan mitra BPJS).
+   - **Question**: Pertanyaan spesifik yang ingin dijawab (misalnya: bagaimana prosesnya, apa manfaatnya, berapa jumlahnya).
+2. Setelah semua elemen diidentifikasi, bentuk **Query RAG** dengan struktur: [INTENT] + [ENTITY] + [CONTEXT] + [QUESTION]
+3. Gunakan bahasa natural yang ringkas, namun informatif dan eksplisit.
+4. Generate hanya hasil akhirnya saja berupa satu buah kalimat
+Contoh 1 :
+User Input:
+> Di mana lokasi PT Sakura System Solution ?
+Output: Lokasi PT Sakura System Solution
+Contoh 2:
+User Input:
+> Saya mengalami kecelakaan di kantor dan ingin tahu apakah bisa klaim BPJS karena perusahaan saya adalah mitra.
+Output: apakah bisa klaim BPJS kecelakaan kerja di kantor jika perusahaan mitra dan apakah saya memenuhi syarat
+**Tugas Anda sekarang:**
+Lakukan proses di atas untuk setiap input pengguna yang diberikan. Hasilkan query RAG akhir yang siap digunakan dalam pencarian dokumen.

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/inference/__init__.py ADDED Viewed

File without changes

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/language_model.py ADDED Viewed

	@@ -0,0 +1,947 @@

+import torch
+import asyncio
+from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TextIteratorStreamer, BitsAndBytesConfig
+import torch
+from typing import Optional, Dict, Any, List, Union, Callable, Awaitable, AsyncGenerator
+import logging
+from dataclasses import dataclass
+from datetime import datetime
+from concurrent.futures import ThreadPoolExecutor
+from functools import partial
+from threading import Thread
+from rag.retriever.retriever_types import RetrievalResult
+from langchain_core.documents import Document
+import copy
+@dataclass
+class LMConfig:
+    model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"
+    device: str = "cuda"
+    torch_dtype: torch.dtype = torch.float16
+    max_length: int = 2048
+    temperature: float = 0.7
+    top_p: float = 0.8
+    top_k: int = 50
+    do_sample: bool = True
+    quantization_config: any = None
+    pad_token_id: Optional[int] = None
+    eos_token_id: Optional[int] = None
+    # RAG-specific configs
+    max_context_length: int = 1500
+    context_separator: str = "\n---\n"
+    instruction_template: str = "system"  # "system", "instruction", "custom"
+    # Async-specific configs
+    max_workers: int = 2
+    generation_timeout: float = 30
+    repetition_penalty: float = 1.0
+    # Streaming-specific configs
+    stream_timeout: float = 100  # timeout untuk stream chunk
+    skip_prompt: bool = True     # skip prompt dari streaming output
+class LM:
+    """
+    Async LLM Qwen 0.5B dengan interface yang mudah digunakan
+    Termasuk prompt formatting khusus untuk RAG (Retrieval-Augmented Generation)
+    Dan support untuk text streaming
+    """
+    def __init__(self, config: Optional[LMConfig] = None, prompt_template = [
+                 {"role": "system", "content": "You are a helpful assistant."},
+                 {"role": "user", "content": "{question}"}
+            ] ):
+        """
+        Inisialisasi LM
+        Args:
+            config: Konfigurasi model (optional, akan menggunakan default jika None)
+        """
+        if(config is None):
+            self.config = LMConfig()
+        else:
+            self.config = config
+        self.tokenizer : AutoTokenizer = None
+        self.model = None
+        self.generation_config = None
+        self.is_loaded = False
+        self.executor = ThreadPoolExecutor(max_workers=self.config.max_workers)
+        self._lock = asyncio.Lock()
+        # Setup logging
+        logging.basicConfig(level=logging.INFO)
+        self.logger = logging.getLogger(__name__)
+        # RAG prompt templates
+        self.prompt_template = prompt_template
+    async def load_model(self) -> None:
+        """Load model dan tokenizer secara async"""
+        async with self._lock:
+            if self.is_loaded:
+                self.logger.info("Model already loaded")
+                return
+            try:
+                self.logger.info(f"Loading model: {self.config.model_name}")
+                # Load tokenizer dalam thread pool
+                self.tokenizer = await asyncio.get_event_loop().run_in_executor(
+                    self.executor,
+                    lambda: AutoTokenizer.from_pretrained(
+                        self.config.model_name,
+                        trust_remote_code=True,
+                        torch_dtype="auto",
+                        device_map="auto",
+                    )
+                )
+                # Load model dalam thread pool
+                self.model = await asyncio.get_event_loop().run_in_executor(
+                    self.executor,
+                    lambda: AutoModelForCausalLM.from_pretrained(
+                        self.config.model_name,
+                        quantization_config=self.config.quantization_config,
+                        torch_dtype=self.config.torch_dtype,
+                        device_map=self.config.device,
+                        trust_remote_code=True
+                    )
+                )
+                # Setup generation config
+                self.generation_config = GenerationConfig(
+                    max_length=self.config.max_length,
+                    temperature=self.config.temperature,
+                    top_p=self.config.top_p,
+                    top_k=self.config.top_k,
+                    do_sample=self.config.do_sample,
+                    pad_token_id=self.config.pad_token_id or self.tokenizer.eos_token_id,
+                    eos_token_id=self.config.eos_token_id or self.tokenizer.eos_token_id,
+                    repetition_penalty = self.config.repetition_penalty,
+                )
+                self.is_loaded = True
+                self.logger.info("Model loaded successfully!")
+            except Exception as e:
+                self.logger.error(f"Error loading model: {e}")
+                raise
+    def get_available_templates(self) -> List[str]:
+        """
+        Dapatkan list template yang tersedia
+        Returns:
+            List of available template names
+        """
+        return list(self.prompt_template)
+    def preview_template(self, template_type: str, sample_question: str = "Apa itu AI?",
+                        sample_context: str = "Artificial Intelligence adalah teknologi...") -> str:
+        """
+        Preview template dengan sample data
+        Args:
+            template_type: Template type to preview
+            sample_question: Sample question
+            sample_context: Sample context
+        Returns:
+            Preview of formatted template
+        """
+        if template_type not in self.prompt_template:
+            return f"Template '{template_type}' tidak tersedia. Available: {self.get_available_templates()}"
+        template_data = copy.deepcopy(self.prompt_template)
+        # template_key = "user_template" if "user_template" in template_data else "template"
+        return template_data["content"].format(
+            context=sample_context,
+            question=sample_question
+        )
+    def _format_context(self, contexts: Union[List[str], RetrievalResult], numbering: bool = True) -> str:
+        """
+        Format retrieved contexts menjadi string yang coherent
+        Args:
+            contexts: List of contexts (string atau RetrievalResult objects)
+            numbering: Whether to add document numbering
+        Returns:
+            Formatted context string
+        """
+        if not contexts:
+            return ""
+        formatted_contexts = []
+        self.logger.info(f"Context : {contexts}")
+        self.logger.info(f"Is RetrievalResult Contexts =  {isinstance(contexts, RetrievalResult)}")
+        if isinstance(contexts, RetrievalResult):
+                for i, ctx in enumerate(contexts.documents, 1):
+                    if numbering:
+                        header = f"[Dokumen {i}"
+                        if contexts.scores[i - 1]:
+                            header += f" (Skor: {contexts.scores[i - 1]:.3f})"
+                        header += "]"
+                    else:
+                        header = "[Dokumen"
+                        header += "]"
+                    formatted_contexts.append(f"{header}\n{ctx.page_content}")
+        else:
+            for i, ctx in enumerate(contexts, 1):
+                if isinstance(ctx, str):
+                    header = f"[Dokumen {i}]" if numbering else "[Dokumen]"
+                    formatted_contexts.append(f"{header}\n{ctx}")
+                else:
+                    header = f"[Dokumen {i}]" if numbering else "[Dokumen]"
+                    formatted_contexts.append(f"{header}\n{str(ctx)}")
+        return self.config.context_separator.join(formatted_contexts)
+    def _truncate_context(self, context: str, max_length: int) -> str:
+        """
+        Truncate context jika terlalu panjang
+        Args:
+            context: Context string
+            max_length: Maximum length in characters
+        Returns:
+            Truncated context
+        """
+        if len(context) <= max_length:
+            return context
+        # Truncate dan tambahkan indicator
+        truncated = context[:max_length - 50]
+        return truncated + "\n\n[... Context dipotong karena terlalu panjang ...]"
+    async def format_rag_prompt(self,
+                                question: str,
+                                contexts: Union[List[str], RetrievalResult],
+                                template_type: Optional[str] = None,
+                                custom_template: Optional[str] = None,
+                                include_metadata: bool = True,
+                                context_numbering: bool = True,
+                                max_contexts: Optional[int] = None) -> str:
+        """
+        Format prompt untuk RAG dengan berbagai template options (async)
+        """
+        def _format_sync():
+            # Handle RetrievalResult secara eksplisit
+            if isinstance(contexts, RetrievalResult):
+                docs = contexts.documents
+                if max_contexts:
+                    docs = docs[:max_contexts]
+                processed_contexts = RetrievalResult(
+                    documents=docs,
+                    scores=contexts.scores[:len(docs)] if contexts.scores else [],
+                    query=contexts.query,
+                    retrieval_time=contexts.retrieval_time,
+                    metadata=contexts.metadata
+                )
+            else:
+                # contexts diasumsikan sebagai list biasa (list[str] atau list[Document])
+                processed_contexts = contexts[:max_contexts] if max_contexts and len(contexts) > max_contexts else contexts
+            # Format context menjadi string
+            formatted_context = self._format_context(processed_contexts, context_numbering)
+            # Truncate jika panjang melebihi batas
+            formatted_context = self._truncate_context(
+                formatted_context,
+                self.config.max_context_length
+            )
+            # Tambah metadata jika diizinkan dan konteks adalah RetrievalResult
+            if include_metadata and isinstance(processed_contexts, RetrievalResult):
+                metadata_info = []
+                for i, doc in enumerate(processed_contexts.documents, 1):
+                    if hasattr(doc, "metadata") and doc.metadata:
+                        metadata_info.append(f"Dokumen {i}: {doc.metadata}")
+                # if metadata_info:
+                #     formatted_context += f"\n\n[Metadata]\n" + "\n".join(metadata_info)
+            return formatted_context
+        # Jalankan _format_sync di thread pool
+        formatted_context = await asyncio.get_event_loop().run_in_executor(
+            self.executor, _format_sync
+        )
+        self.logger.info(f"Formatted Context {formatted_context}")
+        # Tentukan template yang akan dipakai
+        if(template_type == ""):
+            self.config.instruction_template = "system"
+        # Gunakan custom template jika disediakan
+        if custom_template:
+            return custom_template.format(
+                context=formatted_context,
+                question=question
+            )
+        elif self.prompt_template:
+            print("question", question)
+            template_data = copy.deepcopy(self.prompt_template)
+            print("template = ", template_type, "rag template = ", template_data)
+            # template_key = "user_template" if "user_template" in template_data else "template"
+            formatted_template = []
+            for cht in template_data:
+                    # Create a copy of the content to avoid modifying the original
+                content = cht["content"]
+                # Format both placeholders at once to avoid KeyError
+                if "{context}" in content or "{question}" in content:
+                    try:
+                        content = content.format(
+                            context=formatted_context,
+                            question=question
+                        )
+                    except KeyError as e:
+                        self.logger.error(f"Missing placeholder in template: {e}")
+                        # Fallback: format only available placeholders
+                        if "{context}" in content:
+                            content = content.replace("{context}", formatted_context)
+                        if "{question}" in content:
+                            content = content.replace("{question}", question)
+                # Create new dict with formatted content
+                formatted_chat = {
+                    "role": cht["role"],
+                    "content": content
+                }
+                # Copy other fields if they exist
+                if "description" in cht:
+                    formatted_chat["description"] = cht["description"]
+                formatted_template.append(formatted_chat)
+            # self.logger.info(f"Formatted Template {formatted_template}")
+            # print("Forrmatted Template", formatted_template)
+            return formatted_template
+        else:
+            # Fallback default template
+            return [
+                 {"role": "system", "content": "You are a helpful assistant."},
+                 {"role": "user", "content": question}
+            ]
+    async def generate_stream(self,
+                             prompt: List[Dict],
+                             max_new_tokens: Optional[int] = None,
+                             temperature: Optional[float] = None,
+                             top_p: Optional[float] = None,
+                             **kwargs) -> AsyncGenerator[str, None]:
+        """
+        Generate text dari prompt secara streaming async
+        Args:
+            prompt: Input text prompt
+            max_new_tokens: Maximum token baru yang akan di-generate
+            temperature: Temperature untuk generation (override config)
+            top_p: Top-p untuk generation (override config)
+            **kwargs: Parameter tambahan untuk generation
+        Yields:
+            Generated text chunks
+        """
+        await self._check_model_loaded()
+        # Setup streamer
+        streamer = TextIteratorStreamer(
+            self.tokenizer,
+            timeout=self.config.stream_timeout,
+            skip_prompt=self.config.skip_prompt,
+            skip_special_tokens=True
+        )
+        def _generate_sync():
+            try:
+                # Tokenize input
+                inputs = self.tokenizer.apply_chat_template(
+                    prompt,
+                    add_generation_prompt=True,
+                    return_tensors="pt"
+                )
+                # Override generation config jika diperlukan
+                gen_config = self.generation_config
+                if any([max_new_tokens, temperature, top_p]):
+                    gen_config = GenerationConfig(
+                        max_new_tokens=max_new_tokens or self.config.max_length,
+                        temperature=temperature or self.config.temperature,
+                        top_p=top_p or self.config.top_p,
+                        top_k=self.config.top_k,
+                        do_sample=self.config.do_sample,
+                        pad_token_id=self.config.pad_token_id or self.tokenizer.eos_token_id,
+                        eos_token_id=self.config.eos_token_id or self.tokenizer.eos_token_id,
+                        repetition_penalty=self.config.repetition_penalty,
+                        **kwargs
+                    )
+                # Move to GPU
+                self.model.to("cuda")
+                input_ids = inputs.to("cuda")
+                # Generate dalam thread terpisah
+                generation_kwargs = {
+                    "input_ids": input_ids,
+                    "generation_config": gen_config,
+                    "streamer": streamer,
+                    **kwargs
+                }
+                thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
+                thread.start()
+                return thread
+            except Exception as e:
+                self.logger.error(f"Error during stream generation setup: {e}")
+                raise
+        # Setup generation thread
+        generation_thread = await asyncio.get_event_loop().run_in_executor(
+            self.executor, _generate_sync
+        )
+        err = None
+        try:
+            # Stream tokens
+            for token in streamer:
+                if token:  # Skip empty tokens
+                    yield token
+            # Wait for generation thread to finish
+            err = await asyncio.get_event_loop().run_in_executor(
+                self.executor, generation_thread.join
+            )
+        except Exception as e:
+            self.logger.error(f"Error during streaming: {e}, {err}")
+            # Make sure thread is cleaned up
+            if generation_thread.is_alive():
+                generation_thread.join(timeout=1.0)
+            raise
+    async def rag_generate_stream(self,
+                                 question: str,
+                                 contexts: Union[List[str], RetrievalResult],
+                                 template_type: Optional[str] = None,
+                                 max_new_tokens: Optional[int] = None,
+                                 temperature: Optional[float] = None,
+                                 **kwargs) -> AsyncGenerator[str, None]:
+        """
+        Generate jawaban untuk RAG secara streaming async
+        Args:
+            question: User question
+            contexts: List of retrieved contexts
+            template_type: Template type untuk formatting
+            max_new_tokens: Maximum token baru yang akan di-generate
+            temperature: Temperature untuk generation
+            **kwargs: Parameter tambahan untuk generation
+        Yields:
+            Generated answer chunks
+        """
+        await self._check_model_loaded()
+        # Format prompt
+        prompt = await self.format_rag_prompt(question, contexts, template_type)
+        # Generate dengan temperature yang lebih rendah untuk RAG (lebih faktual)
+        temp = temperature if temperature is not None else 0.3
+        async for chunk in self.generate_stream(
+            prompt=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temp,
+            **kwargs
+        ):
+            yield chunk
+    async def chat_stream(self,
+                         messages: List[Dict[str, str]],
+                         max_new_tokens: Optional[int] = None,
+                         **kwargs) -> AsyncGenerator[str, None]:
+        """
+        Chat dengan format conversation secara streaming async
+        Args:
+            messages: List of messages dengan format [{"role": "user", "content": "..."}]
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Yields:
+            Response text chunks
+        """
+        await self._check_model_loaded()
+        def _format_chat():
+            try:
+                # Format messages untuk chat
+                formatted_prompt = self.tokenizer.apply_chat_template(
+                    messages,
+                    tokenize=False,
+                    add_generation_prompt=True
+                )
+                return formatted_prompt
+            except Exception as e:
+                self.logger.error(f"Error during chat formatting: {e}")
+                raise
+        # Format chat template dalam thread pool
+        formatted_prompt = await asyncio.get_event_loop().run_in_executor(
+            self.executor, _format_chat
+        )
+        async for chunk in self.generate_stream(
+            formatted_prompt,
+            max_new_tokens=max_new_tokens,
+            **kwargs
+        ):
+            yield chunk
+    async def rag_chat_stream(self,
+                             messages: List[Dict[str, str]],
+                             contexts: Union[List[str], RetrievalResult],
+                             template_type: Optional[str] = None,
+                             max_new_tokens: Optional[int] = None,
+                             **kwargs) -> AsyncGenerator[str, None]:
+        """
+        RAG Chat dengan format conversation secara streaming async
+        Args:
+            messages: List of messages dengan format [{"role": "user", "content": "..."}]
+            contexts: List of retrieved contexts
+            template_type: Template type untuk formatting
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Yields:
+            Response text chunks
+        """
+        await self._check_model_loaded()
+        # Ambil last user message sebagai question
+        user_messages = [msg for msg in messages if msg.get("role") == "user"]
+        if not user_messages:
+            raise ValueError("No user message found in conversation")
+        last_question = user_messages[-1]["content"]
+        # Generate RAG response secara streaming
+        async for chunk in self.rag_generate_stream(
+            question=last_question,
+            contexts=contexts,
+            template_type=template_type,
+            max_new_tokens=max_new_tokens,
+            **kwargs
+        ):
+            yield chunk
+    # Utility method untuk collect full response dari stream
+    async def collect_stream(self, stream_generator: AsyncGenerator[str, None]) -> str:
+        """
+        Collect semua chunks dari stream generator menjadi full text
+        Args:
+            stream_generator: AsyncGenerator yang menghasilkan text chunks
+        Returns:
+            Complete generated text
+        """
+        chunks = []
+        async for chunk in stream_generator:
+            chunks.append(chunk)
+        return "".join(chunks)
+    async def multi_template_generate(self,
+                                    question: str,
+                                    contexts: Union[List[str], RetrievalResult],
+                                    template_types: List[str],
+                                    max_new_tokens: Optional[int] = None,
+                                    **kwargs) -> Dict[str, str]:
+        """
+        Generate jawaban menggunakan multiple templates secara concurrent
+        Args:
+            question: User question
+            contexts: List of retrieved contexts
+            template_types: List of template types to use
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            Dictionary dengan template_type sebagai key dan response sebagai value
+        """
+        await self._check_model_loaded()
+        # Create tasks untuk concurrent generation
+        tasks = []
+        for template_type in template_types:
+            task = asyncio.create_task(
+                self._generate_single_template(
+                    question, contexts, template_type, max_new_tokens, **kwargs
+                )
+            )
+            tasks.append((template_type, task))
+        # Wait for all tasks
+        results = {}
+        for template_type, task in tasks:
+            try:
+                response = await task
+                results[template_type] = response
+            except Exception as e:
+                self.logger.error(f"Error generating with template {template_type}: {e}")
+                results[template_type] = f"Error: {str(e)}"
+        return results
+    async def _generate_single_template(self,
+                                      question: str,
+                                      contexts: Union[List[str], RetrievalResult],
+                                      template_type: str,
+                                      max_new_tokens: Optional[int] = None,
+                                      **kwargs) -> str:
+        """Helper method untuk single template generation"""
+        return await self.rag_generate(
+            question=question,
+            contexts=contexts,
+            template_type=template_type,
+            max_new_tokens=max_new_tokens,
+            **kwargs
+        )
+    async def rag_generate(self,
+                          question: str,
+                          contexts: Union[List[str], RetrievalResult],
+                          template_type: Optional[str] = None,
+                          max_new_tokens: Optional[int] = None,
+                          temperature: Optional[float] = None,
+                          **kwargs) -> str:
+        """
+        Generate jawaban untuk RAG secara async
+        Args:
+            question: User question
+            contexts: List of retrieved contexts
+            template_type: Template type untuk formatting
+            max_new_tokens: Maximum token baru yang akan di-generate
+            temperature: Temperature untuk generation
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            Generated answer
+        """
+        await self._check_model_loaded()
+        # Format prompt
+        prompt = await self.format_rag_prompt(question, contexts, template_type)
+        # Generate dengan temperature yang lebih rendah untuk RAG (lebih faktual)
+        temp = temperature if temperature is not None else 0.3
+        return await self.generate(
+            prompt=prompt,
+            max_new_tokens=max_new_tokens,
+            temperature=temp,
+            **kwargs
+        )
+    async def rag_chat(self,
+                      messages: List[Dict[str, str]],
+                      contexts: Union[List[str], RetrievalResult],
+                      template_type: Optional[str] = None,
+                      max_new_tokens: Optional[int] = None,
+                      **kwargs) -> str:
+        """
+        RAG Chat dengan format conversation secara async
+        Args:
+            messages: List of messages dengan format [{"role": "user", "content": "..."}]
+            contexts: List of retrieved contexts
+            template_type: Template type untuk formatting
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            Response text
+        """
+        await self._check_model_loaded()
+        # Ambil last user message sebagai question
+        user_messages = [msg for msg in messages if msg.get("role") == "user"]
+        if not user_messages:
+            raise ValueError("No user message found in conversation")
+        last_question = user_messages[-1]["content"]
+        # Generate RAG response
+        return await self.rag_generate(
+            question=last_question,
+            contexts=contexts,
+            template_type=template_type,
+            max_new_tokens=max_new_tokens,
+            **kwargs
+        )
+    async def _check_model_loaded(self) -> None:
+        """Cek apakah model sudah di-load secara async"""
+        if not self.is_loaded:
+            raise RuntimeError("Model belum di-load. Panggil await load_model() terlebih dahulu.")
+    async def generate(self,
+                      prompt: Union[List[Dict], str],
+                      max_new_tokens: Optional[int] = None,
+                      temperature: Optional[float] = None,
+                      top_p: Optional[float] = None,
+                      **kwargs) -> str:
+        """
+        Generate text dari prompt secara async
+        Args:
+            prompt: Input text prompt
+            max_new_tokens: Maximum token baru yang akan di-generate
+            temperature: Temperature untuk generation (override config)
+            top_p: Top-p untuk generation (override config)
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            Generated text
+        """
+        await self._check_model_loaded()
+        def _generate_sync():
+            try:
+                # Tokenize input
+                inputs = self.tokenizer.apply_chat_template(
+                    prompt,
+                    add_generation_prompt=True,
+                    return_tensors="pt"
+                )
+                # Override generation config jika diperlukan
+                gen_config = self.generation_config
+                if any([max_new_tokens, temperature, top_p]):
+                    gen_config = GenerationConfig(
+                        max_new_tokens=max_new_tokens or self.config.max_length,
+                        temperature=temperature or self.config.temperature,
+                        top_p=top_p or self.config.top_p,
+                        top_k=self.config.top_k,
+                        do_sample=self.config.do_sample,
+                        pad_token_id=self.config.pad_token_id or self.tokenizer.eos_token_id,
+                        eos_token_id=self.config.eos_token_id or self.tokenizer.eos_token_id,
+                        repetition_penalty = self.config.repetition_penalty,
+                        **kwargs
+                    )
+                # Generate
+                with torch.no_grad():
+                    self.model.to("cuda")
+                    input_ids = inputs.to("cuda")
+                    prompt_length = input_ids.shape[-1]
+                    outputs = self.model.generate(
+                        input_ids,
+                        generation_config=gen_config,
+                        **kwargs
+                    )
+                # Decode output
+                generated_text = self.tokenizer.decode(
+                    outputs[0][prompt_length:],
+                    skip_special_tokens=True
+                )
+                print("Generated Text", generated_text)
+                # Remove input prompt dari output
+                return generated_text
+            except Exception as e:
+                self.logger.error(f"Error during generation: {e}")
+                raise
+        # Run generation in thread pool dengan timeout
+        try:
+            result = await asyncio.wait_for(
+                asyncio.get_event_loop().run_in_executor(self.executor, _generate_sync),
+                timeout=self.config.generation_timeout
+            )
+            return result
+        except asyncio.TimeoutError:
+            self.logger.error(f"Generation timeout after {self.config.generation_timeout} seconds")
+            raise TimeoutError(f"Generation timeout after {self.config.generation_timeout} seconds")
+    async def chat(self,
+                  messages: List[Dict[str, str]],
+                  max_new_tokens: Optional[int] = None,
+                  **kwargs) -> str:
+        """
+        Chat dengan format conversation secara async
+        Args:
+            messages: List of messages dengan format [{"role": "user", "content": "..."}]
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            Response text
+        """
+        await self._check_model_loaded()
+        def _format_chat():
+            try:
+                # Format messages untuk chat
+                formatted_prompt = self.tokenizer.apply_chat_template(
+                    messages,
+                    chat_template="rag",
+                    return_tensors="pt"
+                )
+                return formatted_prompt
+            except Exception as e:
+                self.logger.error(f"Error during chat formatting: {e}")
+                raise
+        # Format chat template dalam thread pool
+        formatted_prompt = await asyncio.get_event_loop().run_in_executor(
+            self.executor, _format_chat
+        )
+        return await self.generate(
+            formatted_prompt,
+            max_new_tokens=max_new_tokens,
+            **kwargs
+        )
+    async def update_config(self, **kwargs) -> None:
+        """
+        Update konfigurasi model secara async
+        Args:
+            **kwargs: Parameter konfigurasi yang akan diupdate
+        """
+        async with self._lock:
+            for key, value in kwargs.items():
+                if hasattr(self.config, key):
+                    setattr(self.config, key, value)
+                    self.logger.info(f"Updated {key} to {value}")
+                else:
+                    self.logger.warning(f"Unknown config parameter: {key}")
+            # Update generation config jika model sudah loaded
+            if self.is_loaded:
+                self.generation_config = GenerationConfig(
+                    max_length=self.config.max_length,
+                    temperature=self.config.temperature,
+                    top_p=self.config.top_p,
+                    top_k=self.config.top_k,
+                    do_sample=self.config.do_sample,
+                    pad_token_id=self.config.pad_token_id or self.tokenizer.eos_token_id,
+                    eos_token_id=self.config.eos_token_id or self.tokenizer.eos_token_id,
+                    repetition_penalty = self.config.repetition_penalty,
+                )
+    async def get_model_info(self) -> Dict[str, Any]:
+        """
+        Dapatkan informasi model secara async
+        Returns:
+            Dictionary dengan informasi model
+        """
+        info = {
+            "model_name": self.config.model_name,
+            "is_loaded": self.is_loaded,
+            "config": self.config.__dict__
+        }
+        if self.is_loaded:
+            # Get model info dalam thread pool
+            def _get_info():
+                return {
+                    "vocab_size": self.tokenizer.vocab_size,
+                    "model_parameters": sum(p.numel() for p in self.model.parameters()),
+                    "device": str(next(self.model.parameters()).device)
+                }
+            model_info = await asyncio.get_event_loop().run_in_executor(
+                self.executor, _get_info
+            )
+            info.update(model_info)
+        return info
+    async def batch_generate(self,
+                           prompts: List[str],
+                           max_new_tokens: Optional[int] = None,
+                           **kwargs) -> List[str]:
+        """
+        Generate multiple prompts secara batch dan concurrent
+        Args:
+            prompts: List of prompts to generate
+            max_new_tokens: Maximum token baru yang akan di-generate
+            **kwargs: Parameter tambahan untuk generation
+        Returns:
+            List of generated texts
+        """
+        await self._check_model_loaded()
+        # Create tasks untuk concurrent generation
+        tasks = [
+            asyncio.create_task(
+                self.generate(prompt, max_new_tokens=max_new_tokens, **kwargs)
+            )
+            for prompt in prompts
+        ]
+        # Wait for all tasks
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # Process results
+        processed_results = []
+        for i, result in enumerate(results):
+            if isinstance(result, Exception):
+                self.logger.error(f"Error generating prompt {i}: {result}")
+                processed_results.append(f"Error: {str(result)}")
+            else:
+                processed_results.append(result)
+        return processed_results
+    async def close(self) -> None:
+        """
+        Cleanup resources secara async
+        """
+        self.logger.info("Closing LM...")
+        # Shutdown executor
+        self.executor.shutdown(wait=True)
+        # Clear GPU memory
+        if hasattr(self, 'model') and self.model is not None:
+            del self.model
+        if hasattr(self, 'tokenizer') and self.tokenizer is not None:
+            del self.tokenizer
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        self.is_loaded = False
+        self.logger.info("LM closed successfully")
+    async def __aenter__(self):
+        """Async context manager entry"""
+        await self.load_model()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit"""
+        await self.close()

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/__init__.py ADDED Viewed

File without changes

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/retriever/langchain_retriever.py CHANGED Viewed

@@ -6,6 +6,7 @@ from langchain_openai import OpenAIEmbeddings
 # Vector stores
 from langchain_community.vectorstores import Chroma, FAISS, Pinecone
 # Retriever base
 from langchain_core.vectorstores import VectorStoreRetriever
@@ -24,7 +25,6 @@ from langchain_core.documents import Document
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class LangChainRetriever(BaseRetriever):
     """LangChain-based retriever with multiple format support"""
@@ -160,17 +160,34 @@ class LangChainRetriever(BaseRetriever):
         except Exception as e:
             logger.error(f"Error adding documents: {str(e)}")
             return False
     async def _update_bm25_retriever(self, documents: List[Document]):
         try:
             self.bm25_retriever = BM25Retriever.from_documents(documents)
-            self.retriever = ContextualCompressionRetriever(
-                base_compressor=None,  # Optional: add compressor like CohereRerank or LLM-based
-                base_retriever=self.bm25_retriever  # Example: use BM25 as base, can combine
             )
         except Exception as e:
             logger.error(f"Error updating BM25 retriever: {str(e)}")
     async def retrieve(self, query: str, k: int = 5) -> RetrievalResult:
         try:
             import time
@@ -181,6 +198,7 @@ class LangChainRetriever(BaseRetriever):
                 None, self.retriever.get_relevant_documents, query
             )
             retrieved_docs = retrieved_docs[:k]
             scores = [0.9 - (i * 0.1) for i in range(len(retrieved_docs))]
             retrieval_time = time.time() - start_time
@@ -222,4 +240,4 @@ class LangChainRetriever(BaseRetriever):
         return list(self.processed_documents.values())
     def get_supported_formats(self) -> List[str]:
-        return self.document_loader.get_supported_extensions()

 # Vector stores
 from langchain_community.vectorstores import Chroma, FAISS, Pinecone
+from langchain.retrievers import EnsembleRetriever
 # Retriever base
 from langchain_core.vectorstores import VectorStoreRetriever
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class LangChainRetriever(BaseRetriever):
     """LangChain-based retriever with multiple format support"""
         except Exception as e:
             logger.error(f"Error adding documents: {str(e)}")
             return False
     async def _update_bm25_retriever(self, documents: List[Document]):
         try:
+            # Create BM25 retriever from documents
             self.bm25_retriever = BM25Retriever.from_documents(documents)
+            self.bm25_retriever.k = 10  # Set number of documents to retrieve
+            # For hybrid search, you have several options:
+            # Option 1: Use only BM25 retriever (simplest fix)
+            self.retriever = self.bm25_retriever
+            vector_retriever = VectorStoreRetriever(
+                vectorstore=self.vectorstore,
+                search_kwargs={"k": 10}
+            )
+            self.retriever = EnsembleRetriever(
+                retrievers=[vector_retriever, self.bm25_retriever],
+                weights=[0.5, 0.5]  # Equal weight to both retrievers
             )
         except Exception as e:
             logger.error(f"Error updating BM25 retriever: {str(e)}")
+            # Fallback to vector retriever only
+            self.retriever = VectorStoreRetriever(
+                vectorstore=self.vectorstore,
+                search_kwargs={"k": 10}
+            )
     async def retrieve(self, query: str, k: int = 5) -> RetrievalResult:
         try:
             import time
                 None, self.retriever.get_relevant_documents, query
             )
             retrieved_docs = retrieved_docs[:k]
             scores = [0.9 - (i * 0.1) for i in range(len(retrieved_docs))]
             retrieval_time = time.time() - start_time
         return list(self.processed_documents.values())
     def get_supported_formats(self) -> List[str]:
+        return self.document_loader.get_supported_extensions()

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/__init__.py ADDED Viewed

File without changes

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/rtc_call_gpt.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import fastapi
+from fastapi.middleware.cors import CORSMiddleware
+from fastrtc import ReplyOnPause, Stream, AlgoOptions, SileroVadOptions, get_cloudflare_turn_credentials_async, get_cloudflare_turn_credentials
+from fastrtc.utils import audio_to_int16
+from openai import OpenAI
+from elevenlabs.client import ElevenLabs
+from dotenv import load_dotenv
+from tts.audio_edge_tts import EdgeTTS
+from rag import document_retriever
+import logging
+import time
+import platform
+import socket
+import os
+import numpy as np
+import io
+import wave
+import asyncio
+import librosa
+from pydub import AudioSegment
+# from stt.whisper_stt import WhisperSTT
+from collections import deque
+import torch
+import torchaudio.transforms as T
+import asyncio
+import concurrent.futures
+import threading
+from config.constant import HF_TOKEN
+import threading
+import re
+from openai import OpenAI
+from langchain_core.documents import Document
+from rag import ddgs
+# Load .env
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+class RTCHandler:
+    def __init__(self, openai_client: OpenAI,  whisper_stt = None, edge_tts : EdgeTTS = None):
+        """Initialize RTC handler with OpenAI, ElevenLabs, and EdgeTTS"""
+        self.whisper_stt = whisper_stt
+        self.edge_tts = edge_tts
+        self.prompt = ""
+        self.sys_prompt = """
+        Kamu adalah customer service yang berbahasa Indonesia dengan baik sopan, santun, tapi santai pembawaannya.
+        Kamu bisa menjelaskan sesuatu secara baik dan membimbing customer dalam menghadapi masalah yang ada!
+        Kamu akan menjawab customer dengan media call /telepon jadi anda harus memberikan respon seperlunya saja
+        Tidak kepanjanngan, dan sangat jelas,
+        Tidak lebih dari 50 kata.
+        """
+        self.openai_client = openai_client
+        self.messages = [
+            {
+             "role": "system",
+             "content": self.sys_prompt
+             }
+            ]
+        self.full_response = ""
+        self.stream = None
+        self.app = None
+        self._setup_webrtc_ip()
+    def _setup_webrtc_ip(self):
+        """Setup WebRTC IP for Windows"""
+        if platform.system() == 'Windows':
+            s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            try:
+                s.connect(('8.8.8.8', 80))
+                local_ip = s.getsockname()[0]
+            except Exception:
+                local_ip = '127.0.0.1'
+            finally:
+                s.close()
+            os.environ['WEBRTC_IP'] = local_ip
+    def audio_to_bytes(self, audio_tuple, sample_rate=24000) -> io.BufferedReader:
+        sr, audio_data = audio_tuple
+        audio_int16 = audio_to_int16(audio_tuple)
+        buffer = io.BytesIO()
+        with wave.open(buffer, "wb") as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(sr)
+            wf.writeframes(audio_int16.tobytes())
+        buffer.seek(0)
+        buffer.name = "audio.wav"
+        return buffer
+    def echo(self, audio):
+            """Process audio input and generate audio response - Optimized version"""
+            try:
+                stt_time = time.time()
+                logging.info("Performing STT")
+                # transcription = self.whisper_stt.transcribe(self.audio_to_bytes(audio))
+                transcription = self.openai_client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=self.audio_to_bytes(audio),
+                    language="id"
+                )
+                self.prompt = transcription.text
+                if self.prompt == "":
+                    logging.info("STT returned empty string")
+                    return
+                logging.info(f"STT response: {transcription}")
+                logging.info(f"STT took {time.time() - stt_time} seconds")
+                llm_time = time.time()
+                self.full_response = ""
+                # Single async function to handle both text streaming and audio generation
+                async def stream_text_to_audio():
+                    # self.prompt = "Perhitungan BPJS"
+                    retrieval_result = await document_retriever.retrieve(query = self.prompt)
+                    contexts = ""
+                    search_results = []
+                    async for result in ddgs.search(self.prompt, max_results=5):
+                        # self.logger.info(f"Processing SEO Result: {result[:100]}...")
+                        doc = Document(
+                            page_content=result,
+                            metadata={"source": "internet_search", "query": self.prompt}
+                        )
+                        print(doc)
+                        search_results.append(doc)
+                    await document_retriever.add_documents([doc])
+                    i = 1
+                    for ctx in retrieval_result.documents:
+                        contexts += f"{i}. {ctx.page_content}" + "\n"
+                    print("Retrieved Contexts :", contexts)
+                    self.messages.append({"role": "user", "content": f"""
+                                        Dari Konteks yang diberikan (jika diperlukan) :
+                                        {contexts}
+                                        Berikan jawaban atas pertanyaan yang diberikan :
+                                        {self.prompt}
+                                          """})
+                    response = self.openai_client.chat.completions.create(
+                        model="gpt-3.5-turbo",
+                        messages=self.messages,
+                        max_tokens=200,
+                        stream=True
+                    )
+                    chunk_size = 1024
+                    no_buffer = 0
+                    text_buffer = ""
+                    for stream_data in response:
+                        print(stream_data.choices[0].delta.content)
+                        if stream_data.choices[0].finish_reason == "stop":
+                            if text_buffer:  # Yield sisa text
+                                yield text_buffer
+                            break
+                        if stream_data.choices[0].delta.content:
+                            chunk = stream_data.choices[0].delta.content
+                            self.full_response += chunk
+                            text_buffer += chunk
+                            # Generate audio immediately for each text chunk
+                            if re.search(r'[.,?;!]', chunk):
+                                try:
+                                    audio_buffer_gen =  await self.edge_tts.generate_audio_buffer(text_buffer)
+                                    audio_buffer = audio_buffer_gen[0]
+                                    audio_buffer.seek(0)
+                                    # Convert MP3 to PCM
+                                    audio_segment = AudioSegment.from_file(audio_buffer, format="mp3")
+                                    samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32) / (2 ** 15)
+                                    # Handle stereo to mono
+                                    if audio_segment.channels == 2:
+                                        samples = samples.reshape((-1, 2)).mean(axis=1)
+                                    # # Resample to 24kHz
+                                    # resampled = librosa.resample(samples, orig_sr=audio_segment.frame_rate, target_sr=24000)
+                                    import torch
+                                    import torchaudio
+                                    # Check if CUDA is available
+                                    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                                    # Convert numpy array to torch tensor and move to GPU
+                                    audio_tensor = torch.from_numpy(samples).unsqueeze(0).to(device)  # Add batch dimension and move to GPU
+                                    # Create resampler and move to GPU
+                                    resampler = torchaudio.transforms.Resample(
+                                        orig_freq=audio_segment.frame_rate,
+                                        new_freq=24000
+                                    ).to(device)
+                                    # Apply resampling on GPU
+                                    resampled_tensor = resampler(audio_tensor)
+                                    # Convert back to numpy (move to CPU first)
+                                    resampled = resampled_tensor.squeeze(0).cpu().numpy()
+                                    # Yield audio chunks
+                                    for i in range(0, len(resampled), chunk_size):
+                                        yield (24000, resampled[i:i + chunk_size])
+                                    no_buffer = 0
+                                    text_buffer = ""
+                                except Exception as e:
+                                    logging.error(f"TTS generation failed for chunk: {e}")
+                                    continue
+                        # elif stream_data["type"] == "metadata":
+                        #     setup_time = stream_data['data']['setup_time']
+                        #     print(f"\nSetup completed in {setup_time:.2f}s")
+                        # elif stream_data["type"] == "complete":
+                        #     total_time = stream_data['data']['total_time']
+                        #     print(f"\nTotal time: {total_time:.2f}s")
+                        #     break
+                # Run the single async function
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                try:
+                    async_gen = stream_text_to_audio()
+                    while True:
+                        try:
+                            chunk = loop.run_until_complete(async_gen.__anext__())
+                            yield chunk
+                        except StopAsyncIteration:
+                            break
+                finally:
+                    loop.close()
+                self.messages.append({"role": "assistant", "content": self.full_response + " "})
+                logging.info(f"LLM response: {self.full_response}")
+                logging.info(f"LLM took {time.time() - llm_time} seconds")
+            except Exception as e:
+                logging.error(f"Error in echo function: {e}")
+                error_audio = np.zeros(24000, dtype=np.float32)
+                yield (24000, error_audio)
+    def reset_conversation(self):
+        logging.info("Resetting chat")
+        self.messages = [{"role": "system", "content": self.sys_prompt}]
+        self.full_response = ""
+    def create_stream(self):
+        try:
+            async def get_credentials():
+                return await get_cloudflare_turn_credentials_async(hf_token=HF_TOKEN)
+            self.stream = Stream(
+                rtc_configuration=get_credentials,
+                server_rtc_configuration=get_cloudflare_turn_credentials(ttl=360_000),
+                handler = ReplyOnPause(
+                    self.echo,
+                    algo_options=AlgoOptions(
+                        audio_chunk_duration=0.5,
+                        started_talking_threshold=0.1,
+                        speech_threshold=0.03
+                    ),
+                    model_options=SileroVadOptions(
+                        threshold=0.90,
+                        min_speech_duration_ms=250,
+                        min_silence_duration_ms=2000,
+                        speech_pad_ms=400,
+                        max_speech_duration_s=15
+                    )
+                ),
+                modality="audio",
+                mode="send-receive"
+            )
+            return self.stream
+        except Exception as e:
+            logging.error(f"Error creating stream: {e}")
+            raise
+    def create_fastapi_app(self):
+        try:
+            self.app = fastapi.FastAPI()
+            self.app.add_middleware(
+                CORSMiddleware,
+                allow_origins=["*"],
+                allow_credentials=True,
+                allow_methods=["*"],
+                allow_headers=["*"],
+            )
+            if not self.stream:
+                self.create_stream()
+            self.stream.mount(self.app)
+            @self.app.get("/reset")
+            async def reset():
+                try:
+                    self.reset_conversation()
+                    return {"status": "success"}
+                except Exception as e:
+                    logging.error(f"Error in reset endpoint: {e}")
+                    return {"status": "error", "message": str(e)}
+            @self.app.get("/status")
+            async def status():
+                try:
+                    return {
+                        "status": "running",
+                        "messages_count": len(self.messages),
+                        "last_response": self.full_response
+                    }
+                except Exception as e:
+                    logging.error(f"Error in status endpoint: {e}")
+                    return {"status": "error", "message": str(e)}
+            return self.app
+        except Exception as e:
+            logging.error(f"Error creating FastAPI app: {e}")
+            raise
+    def start_server(self, host: str = "0.0.0.0", port: int = 7860):
+        import uvicorn
+        if not self.app:
+            self.create_fastapi_app()
+        logging.info(f"Starting server on {host}:{port}")
+        try:
+            uvicorn.run(self.app, host=host, port=port, log_level="info")
+        except Exception as e:
+            logging.error(f"Error starting server: {e}")
+            raise
+    def launch_ui(self, browser: bool = True):
+        try:
+            if not self.stream:
+                self.create_stream()
+            if not self.app:
+                self.create_fastapi_app()
+            logging.info("Launching RTC UI...")
+            self.stream.ui.launch(self.app,
+                                  server_name="0.0.0.0",
+                                  server_port=7860,
+                                  )
+        except Exception as e:
+            logging.error(f"Error launching UI: {e}")
+            raise
+    def get_conversation_history(self):
+        return self.messages.copy()
+    def set_system_prompt(self, new_prompt: str):
+        self.sys_prompt = new_prompt
+        self.messages[0] = {"role": "system", "content": new_prompt}
+    def get_last_response(self):
+        return self.full_response

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/tests/qwen_llm_test.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from rag.retriever.retriever_types import *
-from rag.pipeline.qwen_llm import QwenLLM, QwenConfig
 import warnings
 warnings.filterwarnings("ignore")
-async def test_qwen_llm():
     print(" ===== Testing QWEN LLM ==== ")
-    """Example usage of async QwenLLM"""
-    config = QwenConfig(
         temperature=0.5,
         max_length=512,
         generation_timeout=30
@@ -23,20 +23,20 @@ async def test_qwen_llm():
     )
     # Using async context manager
-    async with QwenLLM(config) as llm:
           await test_qwen_single_generation(llm)
           await test_qwen_single_rag_generation(llm, contexts)
           await test_qwen_multiple_template_rag_generation(llm, contexts)
           await test_qwen_batch_generation(llm, contexts)
     print(" ===== Testing LLM DONE ==== ")
-async def test_qwen_single_generation(llm : QwenLLM):
     print(" * Test Single Generation * ")
     response = await llm.generate("Jelaskan tentang AI")
     print(f"Response: {response}")
     print(" * Test Single Generation Done * ")
-async def test_qwen_single_rag_generation(llm : QwenLLM, ctx : RetrievalResult):
     print(" * Test Single RAG Generation * ")
     rag_response = await llm.rag_generate(
             question="Apa itu AI dan machine learning?",
@@ -46,7 +46,7 @@ async def test_qwen_single_rag_generation(llm : QwenLLM, ctx : RetrievalResult):
     print(f"RAG Response: {rag_response}")
     print(" * Test Single RAG Generation Done * ")
-async def test_qwen_multiple_template_rag_generation(llm : QwenLLM,ctx : RetrievalResult):
         print(" * Test Multiple Template Generation * ")
         multi_responses = await llm.multi_template_generate(
              question="Apa itu AI?",
@@ -57,7 +57,7 @@ async def test_qwen_multiple_template_rag_generation(llm : QwenLLM,ctx : Retriev
         print(" * Test Multiple Template Generation Done* ")
-async def test_qwen_batch_generation(llm : QwenLLM, ctx : RetrievalResult):
         print(" * Test Batch Generation * ")
         batch_responses = await llm.batch_generate([
              "Jelaskan tentang Python",

 from rag.retriever.retriever_types import *
+from rag.pipeline.language_model import LM, LMConfig
 import warnings
 warnings.filterwarnings("ignore")
+async def test_language_model():
     print(" ===== Testing QWEN LLM ==== ")
+    """Example usage of async LM"""
+    config = LMConfig(
         temperature=0.5,
         max_length=512,
         generation_timeout=30
     )
     # Using async context manager
+    async with LM(config) as llm:
           await test_qwen_single_generation(llm)
           await test_qwen_single_rag_generation(llm, contexts)
           await test_qwen_multiple_template_rag_generation(llm, contexts)
           await test_qwen_batch_generation(llm, contexts)
     print(" ===== Testing LLM DONE ==== ")
+async def test_qwen_single_generation(llm : LM):
     print(" * Test Single Generation * ")
     response = await llm.generate("Jelaskan tentang AI")
     print(f"Response: {response}")
     print(" * Test Single Generation Done * ")
+async def test_qwen_single_rag_generation(llm : LM, ctx : RetrievalResult):
     print(" * Test Single RAG Generation * ")
     rag_response = await llm.rag_generate(
             question="Apa itu AI dan machine learning?",
     print(f"RAG Response: {rag_response}")
     print(" * Test Single RAG Generation Done * ")
+async def test_qwen_multiple_template_rag_generation(llm : LM,ctx : RetrievalResult):
         print(" * Test Multiple Template Generation * ")
         multi_responses = await llm.multi_template_generate(
              question="Apa itu AI?",
         print(" * Test Multiple Template Generation Done* ")
+async def test_qwen_batch_generation(llm : LM, ctx : RetrievalResult):
         print(" * Test Batch Generation * ")
         batch_responses = await llm.batch_generate([
              "Jelaskan tentang Python",

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__chat__.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from tests.inference_test import test_inference
 import warnings
 warnings.filterwarnings("ignore")
 import asyncio
 def run_test():
     try:
         # await test_document_retriever()
-        # await test_qwen_llm()
-        asyncio.run(test_inference())
     except Exception as e:
         print(e)

 from tests.inference_test import test_inference
+from huggingface_hub import login
+login(new_session=False)
 import warnings
 warnings.filterwarnings("ignore")
 import asyncio
 def run_test():
     try:
         # await test_document_retriever()
+        # await test_language_model()
+        test_inference()
     except Exception as e:
         print(e)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/__test__.py CHANGED Viewed

@@ -1,8 +1,3 @@
-# from tests.document_retriever_test import test_document_retriever
-# from tests.document_retriever_test import test_document_retriever
-# from tests.qwen_llm_test import test_qwen_llm
-# from tests.inference_test import test_inference
 from tests.rtc_test import test_rtc
 import warnings
 warnings.filterwarnings("ignore")

 from tests.rtc_test import test_rtc
 import warnings
 warnings.filterwarnings("ignore")

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/app.log ADDED Viewed

File without changes

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/__init__.py CHANGED Viewed

@@ -1,17 +1,44 @@
-from rag.pipeline.qwen_llm import QwenLLM, QwenConfig
 from rag.retriever.langchain_retriever import LangChainRetriever
 from rag.inference.inferencer import Inferencer, InferencerConfig
-config = QwenConfig(
                 temperature=0.3,
                 max_length=512,
-                generation_timeout=30,
                 repetition_penalty=1.1,
-                max_workers = 1,
-                do_sample = True,
-        )
-llm = QwenLLM(
         config = config
 )
@@ -22,29 +49,42 @@ inferencer_config = InferencerConfig(
 )
 document_retriever = LangChainRetriever(
-        embedding_model="all-MiniLM-L6-v2",
         vectorstore_type="chroma",
-        vectorstore_path="./vectorstore",
         use_hybrid_search=True,
         chunk_size=1000,
         chunk_overlap=200
 )
-inferencer = Inferencer(
         model=llm,
         retriever=document_retriever,
         reranker=None,
         config=inferencer_config
 )
-async def get_response(question):
-    result = await inferencer.infer(question, "rag_response")
-    return result
-async def get_stream_response(question):
-    async for item in inferencer.infer_stream(query = question,
-                                             enable_reranking=False,
-                                             template_type="main_template",
-                                             k=3):
-            print("Stream Response :", item)
-            yield item

+from rag.pipeline.language_model import LM, LMConfig
 from rag.retriever.langchain_retriever import LangChainRetriever
 from rag.inference.inferencer import Inferencer, InferencerConfig
+from rag.agents.customer_service_agent import CSAgent
+from rag.agents.query_maker_agent import QueryMakerAgent
+from langchain_core.documents import Document
+from rag.web_search.duckduckgo_search import DuckDuckGoSearch
+from rag.chat_template import get_chat_template
+from transformers import BitsAndBytesConfig
+import torch
+import logging
+import sys
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(funcName)s() - %(message)s',
+    handlers=[
+        logging.FileHandler('app.log'),
+        logging.StreamHandler(sys.stdout)
+    ]
+)
+bnb = BitsAndBytesConfig(
+                            load_in_4bit=True,                      # Enable 4-bit quantization
+                            bnb_4bit_use_double_quant=True,         # Use double quantization
+                            bnb_4bit_quant_type="nf4",              # Use NF4 quantization
+                            bnb_4bit_compute_dtype=torch.bfloat16,  # Compute dtype for 4bit base models
+        )
+config = LMConfig(
+                model_name = "Qwen/Qwen2.5-1.5B-Instruct",
                 temperature=0.3,
                 max_length=512,
+                generation_timeout=100,
                 repetition_penalty=1.1,
+                max_workers = 2,
+                quantization_config = bnb
+)
+llm = LM(
         config = config
 )
 )
 document_retriever = LangChainRetriever(
+        embedding_model="sentence-transformers/all-MiniLM-L6-v2",
         vectorstore_type="chroma",
+        vectorstore_path="vectorstore/",
         use_hybrid_search=True,
         chunk_size=1000,
         chunk_overlap=200
 )
+ddgs = DuckDuckGoSearch()
+cs_inferencer = Inferencer(
         model=llm,
         retriever=document_retriever,
+        # search_engine = ddgs,
         reranker=None,
         config=inferencer_config
 )
+query_maker_inferencer = Inferencer(
+        model=llm,
+        config=inferencer_config
+)
+cs_agent = CSAgent(
+    inferencer = cs_inferencer,
+    prompt_template = get_chat_template("customer_service")
+)
+query_maker_chat_template = get_chat_template("query_maker")
+query_maker_chat_template[1]["content"] = """{question}"""
+query_maker_agent = QueryMakerAgent(
+    inferencer = query_maker_inferencer,
+    prompt_template = query_maker_chat_template
+)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/inference/inferencer.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from rag.retriever.langchain_retriever import LangChainRetriever
-from rag.pipeline.qwen_llm import QwenLLM, QwenConfig
 from rag.retriever.retriever_types import RetrievalResult
 # from rag.pipeline.reranker import BGEM3Reranker
 from typing import List, Union, Dict, Any, Optional, AsyncGenerator
 import asyncio
@@ -29,15 +31,16 @@ class Inferencer:
     """
     def __init__(self,
-                 model: QwenLLM,
-                 retriever: LangChainRetriever,
                  reranker=None,
                  config: Optional[InferencerConfig] = None):
         """
         Initialize Inferencer
         Args:
-            model: QwenLLM instance
             retriever: LangChainRetriever instance
             reranker: Reranker instance (optional)
             config: InferencerConfig (optional)
@@ -45,6 +48,7 @@ class Inferencer:
         self.model = model
         self.retriever = retriever
         self.reranker = reranker
         self.config = config or InferencerConfig()
         # Setup logging
@@ -85,6 +89,7 @@ class Inferencer:
         try:
             start_time = datetime.now()
             contexts = await self.retriever.retrieve(query, k=k)
             retrieval_time = (datetime.now() - start_time).total_seconds()
             self.logger.info(f"Retrieved {len(contexts.documents) if hasattr(contexts, 'documents') else len(contexts)} contexts in {retrieval_time:.2f}s")
@@ -292,7 +297,7 @@ class Inferencer:
             yield chunk
     async def infer(self,
-                   query: Union[str, List[str]],
                    response_type: Union[List[str], str] = None,
                    k: Optional[int] = None,
                    enable_reranking: Optional[bool] = None,
@@ -321,8 +326,12 @@ class Inferencer:
         try:
             # Step 1: Retrieve contexts
-            retrieved_contexts = await self.retrieve_context(main_query, k=k)
             # Step 2: Rerank contexts (if enabled)
             enable_rerank = enable_reranking if enable_reranking is not None else self.config.enable_reranking
             if enable_rerank:
@@ -363,7 +372,34 @@ class Inferencer:
         except Exception as e:
             self.logger.error(f"Error during inference: {e}")
             raise
     async def infer_stream(self,
                           query: str,
                           k: Optional[int] = None,
@@ -389,8 +425,14 @@ class Inferencer:
         try:
             # Step 1: Retrieve contexts
-            retrieved_contexts = await self.retrieve_context(query, k=k)
             # Step 2: Rerank contexts (if enabled)
             enable_rerank = enable_reranking if enable_reranking is not None else self.config.enable_reranking
             if enable_rerank:

 from rag.retriever.langchain_retriever import LangChainRetriever
+from rag.pipeline.language_model import LM, LMConfig
 from rag.retriever.retriever_types import RetrievalResult
+from rag.web_search.duckduckgo_search import DuckDuckGoSearch
+from langchain_core.documents import Document
 # from rag.pipeline.reranker import BGEM3Reranker
 from typing import List, Union, Dict, Any, Optional, AsyncGenerator
 import asyncio
     """
     def __init__(self,
+                 model: LM,
+                 retriever: LangChainRetriever = None,
+                 search_engine = None,
                  reranker=None,
                  config: Optional[InferencerConfig] = None):
         """
         Initialize Inferencer
         Args:
+            model: LM instance
             retriever: LangChainRetriever instance
             reranker: Reranker instance (optional)
             config: InferencerConfig (optional)
         self.model = model
         self.retriever = retriever
         self.reranker = reranker
+        self.search_engine = search_engine
         self.config = config or InferencerConfig()
         # Setup logging
         try:
             start_time = datetime.now()
             contexts = await self.retriever.retrieve(query, k=k)
+            self.logger.info(f"Retrieved Contexts : {contexts}")
             retrieval_time = (datetime.now() - start_time).total_seconds()
             self.logger.info(f"Retrieved {len(contexts.documents) if hasattr(contexts, 'documents') else len(contexts)} contexts in {retrieval_time:.2f}s")
             yield chunk
     async def infer(self,
+                   query: str,
                    response_type: Union[List[str], str] = None,
                    k: Optional[int] = None,
                    enable_reranking: Optional[bool] = None,
         try:
             # Step 1: Retrieve contexts
+            if(self.search_engine):
+                await self.retrieve_from_search_engine(query, k = k)
+            if(self.retriever):
+                retrieved_contexts = await self.retrieve_context(main_query, k=k)
+            else:
+                retrieved_contexts  = ""
             # Step 2: Rerank contexts (if enabled)
             enable_rerank = enable_reranking if enable_reranking is not None else self.config.enable_reranking
             if enable_rerank:
         except Exception as e:
             self.logger.error(f"Error during inference: {e}")
             raise
+    async def retrieve_from_search_engine(self, query: str, k: int = 3):
+        """
+        Alternative method: Process results as they come
+        """
+        from langchain_core.documents import Document
+        search_results = []
+        try:
+            # Process results one by one as they come
+            async for result in self.search_engine.search(query, max_results=k):
+                self.logger.info(f"Processing SEO Result: {result[:100]}...")
+                doc = Document(
+                    page_content=result,
+                    metadata={"source": "internet_search", "query": query}
+                )
+                search_results.append(doc)
+                # Optionally add to retriever immediately
+                await self.retriever.add_documents([doc])
+            self.logger.info(f"Processed {len(search_results)} search results")
+            return search_results
+        except Exception as e:
+            self.logger.error(f"Error in retrieve_from_search_engine_alternative: {e}", exc_info=True)
+            raise
     async def infer_stream(self,
                           query: str,
                           k: Optional[int] = None,
         try:
             # Step 1: Retrieve contexts
+            if(self.search_engine):
+                await self.retrieve_from_search_engine(query, k = k)
+            if(self.retriever is not None):
+                retrieved_contexts = await self.retrieve_context(query, k=k)
+            else:
+                retrieved_contexts = ""
             # Step 2: Rerank contexts (if enabled)
             enable_rerank = enable_reranking if enable_reranking is not None else self.config.enable_reranking
             if enable_rerank:

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/pipeline/qwen_llm.py CHANGED Viewed

@@ -17,7 +17,7 @@ import copy
 @dataclass
 class QwenConfig:
     """Konfigurasi untuk model Qwen 0.5B"""
-    model_name: str = "Qwen/Qwen2.5-0.5B-Instruct"
     device: str = "cuda"
     torch_dtype: torch.dtype = torch.float16
     max_length: int = 2048
@@ -286,14 +286,35 @@ class QwenLLM:
             formatted_template = []
             for cht in template_data:
-                # print("question for template = ", question)
-                if("{context}" in cht["content"]):
-                    cht["content"] = cht["content"].format(context=formatted_context)
-                if("{question}" in cht["content"]):
-                    cht["content"] = cht["content"].format(question=question)
-                formatted_template.append(cht)
             self.logger.info("Formatted Template", formatted_template)
             print("Forrmatted Template", formatted_template)

 @dataclass
 class QwenConfig:
     """Konfigurasi untuk model Qwen 0.5B"""
+    model_name: str = "Qwen/Qwen2.5-1.5B-Instruct"
     device: str = "cuda"
     torch_dtype: torch.dtype = torch.float16
     max_length: int = 2048
             formatted_template = []
             for cht in template_data:
+                    # Create a copy of the content to avoid modifying the original
+                content = cht["content"]
+                # Format both placeholders at once to avoid KeyError
+                if "{context}" in content or "{question}" in content:
+                    try:
+                        content = content.format(
+                            context=formatted_context,
+                            question=question
+                        )
+                    except KeyError as e:
+                        self.logger.error(f"Missing placeholder in template: {e}")
+                        # Fallback: format only available placeholders
+                        if "{context}" in content:
+                            content = content.replace("{context}", formatted_context)
+                        if "{question}" in content:
+                            content = content.replace("{question}", question)
+                # Create new dict with formatted content
+                formatted_chat = {
+                    "role": cht["role"],
+                    "content": content
+                }
+                # Copy other fields if they exist
+                if "description" in cht:
+                    formatted_chat["description"] = cht["description"]
+                formatted_template.append(formatted_chat)
             self.logger.info("Formatted Template", formatted_template)
             print("Forrmatted Template", formatted_template)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/prompt_tuner/chat_template.py CHANGED Viewed

@@ -8,18 +8,20 @@ def RAG_TEMPLATES():
             1. Selalu berikan sapaan yang ramah dan profesional
             2. Gunakan HANYA informasi dari knowledge base yang tersedia
-            3. Berikan jawaban yang jelas, mudah dipahami, dan terstruktur semuanya berdasarkan konteks yang diberikan yaitu :
-            {context}
             4. Jika informasi tidak tersedia, tawarkan alternatif bantuan atau arahkan ke channel yang tepat
             5. Gunakan bahasa yang sopan dan empati terhadap kebutuhan pelanggan
             6. Akhiri dengan penawaran bantuan lebih lanjut
             """,
             "description": "Template dengan system prompt untuk customer service professional"
             },
             {
             "role" : "user",
-            "content" : """
-            Dari konteks yang diberikan context berikan jawaban atas pertanyaan saya yaitu : {question}
             """
             },
         ],

             1. Selalu berikan sapaan yang ramah dan profesional
             2. Gunakan HANYA informasi dari knowledge base yang tersedia
+            3. Berikan jawaban yang jelas, mudah dipahami, dan terstruktur semuanya berdasarkan konteks yang diberikan user.
             4. Jika informasi tidak tersedia, tawarkan alternatif bantuan atau arahkan ke channel yang tepat
             5. Gunakan bahasa yang sopan dan empati terhadap kebutuhan pelanggan
             6. Akhiri dengan penawaran bantuan lebih lanjut
             """,
             "description": "Template dengan system prompt untuk customer service professional"
             },
             {
             "role" : "user",
+            "content" : """Dari konteks yang diberikan : {context}
+            berikan jawaban atas pertanyaan saya yaitu : {question}
             """
             },
         ],

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rag/web_search/duckduckgo_search.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from ddgs import DDGS
+from langchain_community.document_loaders import AsyncChromiumLoader
+from langchain_community.document_transformers import BeautifulSoupTransformer
+import re
+import logging
+from typing import AsyncGenerator, List
+class DuckDuckGoSearch:
+    def __init__(self, html_loader: AsyncChromiumLoader = None, html_parser = None):
+        # Initialize dengan default values jika tidak diberikan
+        self.html_loader = html_loader or AsyncChromiumLoader([])
+        self.html_parser = html_parser or BeautifulSoupTransformer()
+        self.logger = logging.getLogger("ddgs_logger")
+    async def get_page(self, urls: List[str]):
+        """Get page content from URLs - returns list of documents"""
+        try:
+            self.html_loader.urls = urls
+            html = await self.html_loader.aload()  # This returns a LIST
+            self.logger.info(f"search engine aload result: {len(html)} documents loaded")
+            docs_transformed = self.html_parser.transform_documents(
+                html,
+                tags_to_extract=["p"],
+                remove_unwanted_tags=["a"]
+            )
+            return docs_transformed  # Returns LIST of documents
+        except Exception as e:
+            self.logger.error(f"Error loading pages: {e}", exc_info=True)
+            return []  # Return empty list on error
+    def truncate(self, text: str, max_words: int = 400) -> str:
+        """Truncate text to specified number of words"""
+        if not text:
+            return ""
+        words = text.split()
+        if len(words) <= max_words:
+            return text
+        truncated = " ".join(words[:max_words])
+        return truncated + "..." if len(words) > max_words else truncated
+    async def search(self, query: str, max_results: int = 5) -> AsyncGenerator[str, None]:
+        """
+        Search and yield page contents one by one
+        FIXED VERSION: Properly handle async iteration
+        """
+        try:
+            self.logger.info(f"Searching for: {query} (max_results: {max_results})")
+            # Step 1: Get search results from DDGS (regular iterator)
+            results = DDGS().text(query, max_results=max_results)
+            urls = []
+            # Step 2: Extract URLs using regular for loop (NOT async for)
+            for result in results:  # ← FIXED: Regular for loop
+                url = result.get('href')
+                if url:
+                    urls.append(url)
+            self.logger.info(f"Found {len(urls)} URLs to process")
+            if not urls:
+                self.logger.warning("No URLs found from search results")
+                return
+            # Step 3: Get page content (await the coroutine first)
+            docs = await self.get_page(urls)  # ← FIXED: Await first, get list
+            # Step 4: Process documents using regular for loop (NOT async for)
+            for doc in docs:  # ← FIXED: Regular for loop on list
+                try:
+                    if hasattr(doc, 'page_content') and doc.page_content:
+                        # Clean up text
+                        page_text = re.sub(r"\n\n+", "\n", doc.page_content)
+                        page_text = page_text.strip()
+                        if page_text:  # Only yield if there's actual content
+                            text = self.truncate(page_text)
+                            yield text  # Yield makes this an async generator
+                except Exception as e:
+                    self.logger.error(f"Error processing document: {e}")
+                    continue
+        except Exception as e:
+            self.logger.error(f"Error in search method: {e}", exc_info=True)
+            # Don't re-raise, just log and return (generator will be empty)
+    async def search_with_metadata(self, query: str, max_results: int = 5) -> AsyncGenerator[dict, None]:
+        """
+        Alternative method that yields dictionaries with metadata
+        """
+        try:
+            results = DDGS().text(query, max_results=max_results)
+            urls_and_titles = []
+            # Collect URLs and titles
+            for result in results:
+                url = result.get('href')
+                title = result.get('title', 'No title')
+                if url:
+                    urls_and_titles.append({'url': url, 'title': title})
+            if not urls_and_titles:
+                return
+            # Get page content
+            urls = [item['url'] for item in urls_and_titles]
+            docs = await self.get_page(urls)
+            # Process and yield with metadata
+            for i, doc in enumerate(docs):
+                try:
+                    if hasattr(doc, 'page_content') and doc.page_content:
+                        page_text = re.sub(r"\n\n+", "\n", doc.page_content)
+                        page_text = page_text.strip()
+                        if page_text:
+                            text = self.truncate(page_text)
+                            # Get metadata if available
+                            metadata = {}
+                            if i < len(urls_and_titles):
+                                metadata = urls_and_titles[i]
+                            yield {
+                                'content': text,
+                                'url': metadata.get('url', 'Unknown'),
+                                'title': metadata.get('title', 'No title'),
+                                'word_count': len(text.split())
+                            }
+                except Exception as e:
+                    self.logger.error(f"Error processing document {i}: {e}")
+                    continue
+        except Exception as e:
+            self.logger.error(f"Error in search_with_metadata: {e}", exc_info=True)

space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/space/app/rtc/__init__.py CHANGED Viewed

@@ -2,11 +2,13 @@ from openai import OpenAI
 from elevenlabs.client import ElevenLabs
 from tts.audio_edge_tts import EdgeTTS
 from config.constant import OPENAI_API_KEY, ELEVENLABS_API_KEY
 from rtc.rtc_call import RTCHandler
 from stt.whisper_stt import WhisperSTT
-whisper_stt = WhisperSTT("turbo")
 edge_tts = EdgeTTS("id-ID-ArdiNeural",  "+0%", "+0%")
 rtc_handler = RTCHandler(whisper_stt, edge_tts)
 def handle_rtc():

 from elevenlabs.client import ElevenLabs
 from tts.audio_edge_tts import EdgeTTS
 from config.constant import OPENAI_API_KEY, ELEVENLABS_API_KEY
+# from rtc.rtc_call import RTCHandler
 from rtc.rtc_call import RTCHandler
 from stt.whisper_stt import WhisperSTT
+whisper_stt = WhisperSTT(model_size = "base", device = "cuda")
 edge_tts = EdgeTTS("id-ID-ArdiNeural",  "+0%", "+0%")
+openai_client = OpenAI(api_key = OPENAI_API_KEY)
 rtc_handler = RTCHandler(whisper_stt, edge_tts)
 def handle_rtc():