Spaces:

minh9972t12
/

chatbot_test

Running

App Files Files Community

minh9972t12 commited on 6 days ago

Commit

eb10851

verified ·

1 Parent(s): 7ab804d

Upload 18 files

Browse files

Files changed (18) hide show

advanced_rag.py +410 -0
agent_chat_stream.py +111 -0
agent_service.py +503 -0
app.py +31 -404
batch_index_pdfs.py +151 -0
cag_service.py +229 -0
conversation_service.py +308 -0
embedding_service.py +173 -0
feedback_tracking_service.py +103 -0
main.py +1326 -0
multimodal_pdf_parser.py +390 -0
pdf_parser.py +371 -0
prompts/feedback_agent.txt +51 -0
prompts/sales_agent.txt +47 -0
qdrant_service.py +446 -0
requirements.txt +38 -0
stream_utils.py +86 -0
tools_service.py +242 -0

advanced_rag.py ADDED Viewed

	@@ -0,0 +1,410 @@

+"""
+Advanced RAG techniques for improved retrieval and generation (Best Case 2025)
+Includes: LLM-Based Query Expansion, Cross-Encoder Reranking, Contextual Compression, Hybrid Search
+"""
+from typing import List, Dict, Optional, Tuple
+import numpy as np
+from dataclasses import dataclass
+import re
+from sentence_transformers import CrossEncoder
+@dataclass
+class RetrievedDocument:
+    """Document retrieved from vector database"""
+    id: str
+    text: str
+    confidence: float
+    metadata: Dict
+class AdvancedRAG:
+    """Advanced RAG system with 2025 best practices"""
+    def __init__(self, embedding_service, qdrant_service):
+        self.embedding_service = embedding_service
+        self.qdrant_service = qdrant_service
+        # Initialize Cross-Encoder for reranking (multilingual for Vietnamese support)
+        print("Loading Cross-Encoder model for reranking...")
+        # Use multilingual model instead of English-only ms-marco
+        self.cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1')
+        print("✓ Cross-Encoder loaded (multilingual)")
+    def expand_query_llm(
+        self,
+        query: str,
+        hf_client=None
+    ) -> List[str]:
+        """
+        Expand query using LLM (Best Case 2025)
+        Generates query variations, sub-queries, and hypothetical answers
+        Args:
+            query: Original user query
+            hf_client: HuggingFace InferenceClient (optional)
+        Returns:
+            List of expanded queries
+        """
+        queries = [query]
+        # Fallback to rule-based if no LLM client
+        if not hf_client:
+            return self._expand_query_rule_based(query)
+        try:
+            # LLM-based expansion prompt
+            expansion_prompt = f"""Given this user question, generate 2-3 alternative phrasings or sub-questions that would help retrieve relevant information.
+User Question: {query}
+Alternative queries (one per line):"""
+            # Generate expansions
+            response = ""
+            for msg in hf_client.chat_completion(
+                messages=[{"role": "user", "content": expansion_prompt}],
+                max_tokens=256,
+                stream=True,
+                temperature=0.7,
+                model="openai/gpt-oss-20b"
+            ):
+                if msg.choices and msg.choices[0].delta.content:
+                    response += msg.choices[0].delta.content
+            # Parse expansions
+            lines = [line.strip() for line in response.split('\n') if line.strip()]
+            # Filter out numbered lists, dashes, etc.
+            clean_lines = []
+            for line in lines:
+                # Remove common list markers
+                cleaned = re.sub(r'^[\d\-\*\•]+[\.\)]\s*', '', line)
+                if cleaned and len(cleaned) > 5:
+                    clean_lines.append(cleaned)
+            queries.extend(clean_lines[:3])  # Add top 3 expansions
+        except Exception as e:
+            print(f"LLM expansion failed, using rule-based: {e}")
+            return self._expand_query_rule_based(query)
+        return queries[:4]  # Original + 3 expansions
+    def _expand_query_rule_based(self, query: str) -> List[str]:
+        """
+        Fallback rule-based query expansion
+        Simple but effective Vietnamese-aware expansion
+        """
+        queries = [query]
+        # Vietnamese question words
+        question_words = ['ai', 'gì', 'nào', 'đâu', 'khi nào', 'như thế nào',
+                         'sao', 'tại sao', 'có', 'là', 'được', 'không', 'làm sao']
+        query_lower = query.lower()
+        for qw in question_words:
+            if qw in query_lower:
+                variant = query_lower.replace(qw, '').strip()
+                if variant and variant != query_lower:
+                    queries.append(variant)
+                    break  # One variation is enough
+        # Extract key phrases
+        words = query.split()
+        if len(words) > 3:
+            key_phrases = ' '.join(words[1:]) if words[0].lower() in question_words else ' '.join(words[:3])
+            if key_phrases not in queries:
+                queries.append(key_phrases)
+        return queries[:3]
+    def multi_query_retrieval(
+        self,
+        query: str,
+        top_k: int = 5,
+        score_threshold: float = 0.5,
+        expanded_queries: Optional[List[str]] = None
+    ) -> List[RetrievedDocument]:
+        """
+        Retrieve documents using multiple query variations
+        Combines results from all query variations with deduplication
+        """
+        if expanded_queries is None:
+            expanded_queries = [query]
+        all_results = {}  # Deduplicate by doc_id
+        for q in expanded_queries:
+            # Generate embedding for each query variant
+            query_embedding = self.embedding_service.encode_text(q)
+            # Search in Qdrant
+            results = self.qdrant_service.search(
+                query_embedding=query_embedding,
+                limit=top_k,
+                score_threshold=score_threshold
+            )
+            # Add to results (keep highest score for duplicates)
+            for result in results:
+                doc_id = result["id"]
+                if doc_id not in all_results or result["confidence"] > all_results[doc_id].confidence:
+                    # Lấy text từ metadata - hỗ trợ cả "text" (string) và "texts" (array)
+                    metadata = result["metadata"]
+                    doc_text = metadata.get("text", "")
+                    if not doc_text and "texts" in metadata:
+                        # Nếu là array, join thành string
+                        texts_arr = metadata.get("texts", [])
+                        if isinstance(texts_arr, list):
+                            doc_text = "\n".join(texts_arr)
+                        else:
+                            doc_text = str(texts_arr)
+                    all_results[doc_id] = RetrievedDocument(
+                        id=doc_id,
+                        text=doc_text,
+                        confidence=result["confidence"],
+                        metadata=metadata
+                    )
+        # Sort by confidence and return top_k
+        sorted_results = sorted(all_results.values(), key=lambda x: x.confidence, reverse=True)
+        return sorted_results[:top_k * 2]  # Return more for reranking
+    def rerank_documents_cross_encoder(
+        self,
+        query: str,
+        documents: List[RetrievedDocument],
+        top_k: int = 5
+    ) -> List[RetrievedDocument]:
+        """
+        Rerank documents using Cross-Encoder (Best Case 2025)
+        Cross-Encoder provides superior relevance scoring compared to bi-encoders
+        Args:
+            query: Original user query
+            documents: Retrieved documents to rerank
+            top_k: Number of top documents to return
+        Returns:
+            Reranked documents
+        """
+        if not documents:
+            return documents
+        # Prepare query-document pairs for Cross-Encoder
+        pairs = [[query, doc.text] for doc in documents]
+        # Get Cross-Encoder scores (raw logits)
+        ce_scores = self.cross_encoder.predict(pairs)
+        ce_scores = [float(s) for s in ce_scores]
+        # Min-Max normalization để scale về 0-1
+        # Thay vì sigmoid (cho điểm rất thấp với logits âm)
+        min_score = min(ce_scores)
+        max_score = max(ce_scores)
+        if max_score - min_score > 0.001:  # Có sự khác biệt giữa các scores
+            ce_scores_normalized = [
+                (score - min_score) / (max_score - min_score)
+                for score in ce_scores
+            ]
+        else:
+            # Tất cả scores gần như bằng nhau -> giữ original confidence
+            ce_scores_normalized = [doc.confidence for doc in documents]
+        # Combine: 70% Cross-Encoder ranking + 30% original cosine similarity
+        # Để giữ lại một phần semantic similarity từ embedding
+        reranked = []
+        for doc, ce_norm in zip(documents, ce_scores_normalized):
+            combined_score = 0.7 * ce_norm + 0.3 * doc.confidence
+            reranked.append(RetrievedDocument(
+                id=doc.id,
+                text=doc.text,
+                confidence=float(combined_score),
+                metadata=doc.metadata
+            ))
+        # Sort by combined score
+        reranked.sort(key=lambda x: x.confidence, reverse=True)
+        return reranked[:top_k]
+    def compress_context(
+        self,
+        query: str,
+        documents: List[RetrievedDocument],
+        max_tokens: int = 500
+    ) -> List[RetrievedDocument]:
+        """
+        Compress context - giữ nguyên nội dung quan trọng, chỉ truncate nếu quá dài
+        KHÔNG dùng word overlap vì nó loại bỏ sai thông tin quan trọng
+        """
+        compressed_docs = []
+        for doc in documents:
+            text = doc.text.strip()
+            # Chỉ truncate nếu text quá dài (ước tính ~4 chars/token)
+            max_chars = max_tokens * 4
+            if len(text) > max_chars:
+                # Cắt thông minh tại câu gần nhất
+                truncated = text[:max_chars]
+                last_period = max(
+                    truncated.rfind('.'),
+                    truncated.rfind('!'),
+                    truncated.rfind('?'),
+                    truncated.rfind('\n')
+                )
+                if last_period > max_chars * 0.5:  # Nếu tìm thấy dấu câu ở nửa sau
+                    truncated = truncated[:last_period + 1]
+                text = truncated.strip()
+            compressed_docs.append(RetrievedDocument(
+                id=doc.id,
+                text=text,
+                confidence=doc.confidence,
+                metadata=doc.metadata
+            ))
+        return compressed_docs
+    def _split_sentences(self, text: str) -> List[str]:
+        """Split text into sentences (Vietnamese-aware)"""
+        sentences = re.split(r'[.!?]+', text)
+        return [s.strip() for s in sentences if s.strip()]
+    def hybrid_rag_pipeline(
+        self,
+        query: str,
+        top_k: int = 5,
+        score_threshold: float = 0.5,
+        use_reranking: bool = True,
+        use_compression: bool = True,
+        use_query_expansion: bool = True,
+        max_context_tokens: int = 500,
+        hf_client=None
+    ) -> Tuple[List[RetrievedDocument], Dict]:
+        """
+        Complete advanced RAG pipeline (Best Case 2025)
+        1. LLM-based query expansion
+        2. Multi-query retrieval
+        3. Cross-Encoder reranking
+        4. Contextual compression
+        Args:
+            query: User query
+            top_k: Number of documents to return
+            score_threshold: Minimum relevance score
+            use_reranking: Enable Cross-Encoder reranking
+            use_compression: Enable context compression
+            use_query_expansion: Enable LLM-based query expansion
+            max_context_tokens: Max tokens for compression
+            hf_client: HuggingFace InferenceClient for expansion
+        Returns:
+            (documents, stats)
+        """
+        stats = {
+            "original_query": query,
+            "expanded_queries": [],
+            "initial_results": 0,
+            "after_rerank": 0,
+            "after_compression": 0,
+            "used_cross_encoder": use_reranking,
+            "used_llm_expansion": use_query_expansion and hf_client is not None
+        }
+        # Step 1: Query Expansion (LLM-based or rule-based)
+        if use_query_expansion:
+            expanded_queries = self.expand_query_llm(query, hf_client)
+        else:
+            expanded_queries = [query]
+        stats["expanded_queries"] = expanded_queries
+        # Step 2: Multi-query retrieval
+        documents = self.multi_query_retrieval(
+            query=query,
+            top_k=top_k * 2,  # Get more candidates for reranking
+            score_threshold=score_threshold,
+            expanded_queries=expanded_queries
+        )
+        stats["initial_results"] = len(documents)
+        # Step 3: Cross-Encoder Reranking (Best Case 2025)
+        if use_reranking and documents:
+            documents = self.rerank_documents_cross_encoder(
+                query=query,
+                documents=documents,
+                top_k=top_k
+            )
+        else:
+            documents = documents[:top_k]
+        stats["after_rerank"] = len(documents)
+        # Step 4: Contextual compression (optional)
+        if use_compression and documents:
+            documents = self.compress_context(
+                query=query,
+                documents=documents,
+                max_tokens=max_context_tokens
+            )
+        stats["after_compression"] = len(documents)
+        return documents, stats
+    def format_context_for_llm(
+        self,
+        documents: List[RetrievedDocument],
+        include_metadata: bool = True
+    ) -> str:
+        """
+        Format retrieved documents into context string for LLM
+        Uses better structure for improved LLM understanding
+        """
+        if not documents:
+            return ""
+        context_parts = ["RELEVANT CONTEXT:\n"]
+        for i, doc in enumerate(documents, 1):
+            context_parts.append(f"\n--- Document {i} (Relevance: {doc.confidence:.2%}) ---")
+            context_parts.append(doc.text)
+            if include_metadata and doc.metadata:
+                # Add useful metadata
+                meta_str = []
+                for key, value in doc.metadata.items():
+                    if key not in ['text', 'texts'] and value:
+                        meta_str.append(f"{key}: {value}")
+                if meta_str:
+                    context_parts.append(f"[Metadata: {', '.join(meta_str)}]")
+        context_parts.append("\n--- End of Context ---\n")
+        return "\n".join(context_parts)
+    def build_rag_prompt(
+        self,
+        query: str,
+        context: str,
+        system_message: str = "You are a helpful AI assistant."
+    ) -> str:
+        """
+        Build optimized RAG system prompt for LLM
+        Query sẽ được gửi riêng trong user message
+        """
+        prompt_template = f"""{system_message}
+{context}
+HƯỚNG DẪN TRẢ LỜI:
+1. Đóng vai trò là một trợ lý ảo thân thiện, trả lời tự nhiên bằng tiếng Việt.
+2. Dựa vào CONTEXT được cung cấp để trả lời câu hỏi.
+3. KHÔNG copy nguyên văn text từ context. Hãy tổng hợp lại thông tin một cách mạch lạc.
+4. Bắt đầu câu trả lời bằng các cụm từ tự nhiên như: "Dựa trên dữ liệu tôi tìm thấy...", "Tôi có thông tin về các sự kiện sau...", "Có vẻ như đây là những gì bạn ��ang tìm...".
+5. Nếu có nhiều kết quả, hãy liệt kê ngắn gọn các điểm chính (Tên, Thời gian, Địa điểm).
+6. Nếu context không liên quan, hãy lịch sự nói rằng bạn chưa tìm thấy thông tin phù hợp trong hệ thống."""
+        return prompt_template

agent_chat_stream.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""
+Agent Chat Streaming Endpoint
+SSE-based real-time streaming for Sales & Feedback agents
+"""
+from typing import AsyncGenerator
+from stream_utils import format_sse, EVENT_STATUS, EVENT_TOKEN, EVENT_DONE, EVENT_ERROR, EVENT_METADATA
+from datetime import datetime
+async def agent_chat_stream(
+    request,
+    agent_service,
+    conversation_service
+) -> AsyncGenerator[str, None]:
+    """
+    Stream agent responses in real-time (SSE format)
+    Args:
+        request: ChatRequest with message, session_id, mode, user_id
+        agent_service: AgentService instance
+        conversation_service: ConversationService instance
+    Yields SSE events:
+        - status: Processing updates
+        - token: Text chunks
+        - metadata: Session info
+        - done: Completion signal
+        - error: Error messages
+    """
+    try:
+        # === SESSION MANAGEMENT ===
+        session_id = request.session_id
+        if not session_id:
+            session_id = conversation_service.create_session(
+                metadata={"user_agent": "api", "created_via": "agent_stream"},
+                user_id=request.user_id
+            )
+            yield format_sse(EVENT_METADATA, {"session_id": session_id})
+        # Get conversation history
+        history = conversation_service.get_conversation_history(session_id)
+        # Convert to messages format
+        messages = []
+        for h in history:
+            messages.append({"role": h["role"], "content": h["content"]})
+        # Determine mode
+        mode = getattr(request, 'mode', 'sales')  # Default to sales
+        user_id = getattr(request, 'user_id', None)
+        access_token = getattr(request, 'access_token', None)
+        # Debug logging
+        print(f"📋 Request Info:")
+        print(f"  - Mode: {mode}")
+        print(f"  - User ID: {user_id}")
+        print(f"  - Access Token: {'✅ Present' if access_token else '❌ Missing'}")
+        if access_token:
+            print(f"  - Token preview: {access_token[:20]}...")
+        # === STATUS UPDATE ===
+        if mode == 'feedback':
+            yield format_sse(EVENT_STATUS, "Đang kiểm tra lịch sử sự kiện của bạn...")
+        else:
+            yield format_sse(EVENT_STATUS, "Đang tư vấn...")
+        # === CALL AGENT ===
+        result = await agent_service.chat(
+            user_message=request.message,
+            conversation_history=messages,
+            mode=mode,
+            user_id=user_id,
+            access_token=access_token
+        )
+        agent_response = result["message"]
+        # === STREAM RESPONSE TOKEN BY TOKEN ===
+        # Simple character-by-character streaming
+        chunk_size = 5  # Characters per chunk
+        for i in range(0, len(agent_response), chunk_size):
+            chunk = agent_response[i:i+chunk_size]
+            yield format_sse(EVENT_TOKEN, chunk)
+            # Small delay for smoother streaming
+            import asyncio
+            await asyncio.sleep(0.02)
+        # === SAVE HISTORY ===
+        conversation_service.add_message(
+            session_id=session_id,
+            role="user",
+            content=request.message
+        )
+        conversation_service.add_message(
+            session_id=session_id,
+            role="assistant",
+            content=agent_response
+        )
+        # === DONE ===
+        yield format_sse(EVENT_DONE, {
+            "session_id": session_id,
+            "timestamp": datetime.utcnow().isoformat(),
+            "mode": mode,
+            "tool_calls": len(result.get("tool_calls", []))
+        })
+    except Exception as e:
+        print(f"⚠️ Agent Stream Error: {e}")
+        yield format_sse(EVENT_ERROR, str(e))

agent_service.py ADDED Viewed

	@@ -0,0 +1,503 @@

+"""
+Agent Service - Central Brain for Sales & Feedback Agents
+Manages LLM conversation loop with tool calling
+"""
+from typing import Dict, Any, List, Optional
+import os
+from tools_service import ToolsService
+class AgentService:
+    """
+    Manages the conversation loop between User -> LLM -> Tools -> Response
+    """
+    def __init__(
+        self,
+        tools_service: ToolsService,
+        embedding_service,
+        qdrant_service,
+        advanced_rag,
+        hf_token: str,
+        feedback_tracking=None  # NEW: Optional feedback tracking
+    ):
+        self.tools_service = tools_service
+        self.embedding_service = embedding_service
+        self.qdrant_service = qdrant_service
+        self.advanced_rag = advanced_rag
+        self.hf_token = hf_token
+        self.feedback_tracking = feedback_tracking
+        # Load system prompts
+        self.prompts = self._load_prompts()
+    def _load_prompts(self) -> Dict[str, str]:
+        """Load system prompts from files"""
+        prompts = {}
+        prompts_dir = "prompts"
+        for mode in ["sales_agent", "feedback_agent"]:
+            filepath = os.path.join(prompts_dir, f"{mode}.txt")
+            try:
+                with open(filepath, 'r', encoding='utf-8') as f:
+                    prompts[mode] = f.read()
+                print(f"✓ Loaded prompt: {mode}")
+            except Exception as e:
+                print(f"⚠️ Error loading {mode} prompt: {e}")
+                prompts[mode] = ""
+        return prompts
+    async def chat(
+        self,
+        user_message: str,
+        conversation_history: List[Dict],
+        mode: str = "sales",  # "sales" or "feedback"
+        user_id: Optional[str] = None,
+        access_token: Optional[str] = None,  # NEW: For authenticated API calls
+        max_iterations: int = 3
+    ) -> Dict[str, Any]:
+        """
+        Main conversation loop
+        Args:
+            user_message: User's input
+            conversation_history: Previous messages [{"role": "user", "content": ...}, ...]
+            mode: "sales" or "feedback"
+            user_id: User ID (for feedback mode to check purchase history)
+            access_token: JWT token for authenticated API calls
+            max_iterations: Maximum tool call iterations to prevent infinite loops
+        Returns:
+            {
+                "message": "Bot response",
+                "tool_calls": [...],  # List of tools called (for debugging)
+                "mode": mode
+            }
+        """
+        print(f"\n🤖 Agent Mode: {mode}")
+        print(f"👤 User Message: {user_message}")
+        print(f"🔑 Auth Info:")
+        print(f"  - User ID: {user_id}")
+        print(f"  - Access Token: {'✅ Received' if access_token else '❌ None'}")
+        # Store user_id and access_token for tool calls
+        self.current_user_id = user_id
+        self.current_access_token = access_token
+        if access_token:
+            print(f"  - Stored access_token for tools: {access_token[:20]}...")
+        if user_id:
+            print(f"  - Stored user_id for tools: {user_id}")
+        # Select system prompt
+        system_prompt = self._get_system_prompt(mode)
+        # Build conversation context
+        messages = self._build_messages(system_prompt, conversation_history, user_message)
+        # Agentic loop: LLM may call tools multiple times
+        tool_calls_made = []
+        current_response = None
+        for iteration in range(max_iterations):
+            print(f"\n🔄 Iteration {iteration + 1}")
+            # Call LLM
+            llm_response = await self._call_llm(messages)
+            print(f"🧠 LLM Response: {llm_response[:200]}...")
+            # Check if LLM wants to call a tool
+            tool_call = self._parse_tool_call(llm_response)
+            if not tool_call:
+                # No tool call -> This is the final response
+                current_response = llm_response
+                break
+            # Execute tool
+            print(f"🔧 Tool Called: {tool_call['tool_name']}")
+            # Auto-inject real user_id for get_purchased_events
+            if tool_call['tool_name'] == 'get_purchased_events' and self.current_user_id:
+                print(f"🔄 Auto-injecting real user_id: {self.current_user_id}")
+                tool_call['arguments']['user_id'] = self.current_user_id
+            tool_result = await self.tools_service.execute_tool(
+                tool_call['tool_name'],
+                tool_call['arguments'],
+                access_token=self.current_access_token  # Pass access_token
+            )
+            # Record tool call
+            tool_calls_made.append({
+                "function": tool_call['tool_name'],
+                "arguments": tool_call['arguments'],
+                "result": tool_result
+            })
+            # Add tool result to conversation
+            messages.append({
+                "role": "assistant",
+                "content": llm_response
+            })
+            messages.append({
+                "role": "system",
+                "content": f"Tool Result:\n{self._format_tool_result({'result': tool_result})}"
+            })
+            # If tool returns "run_rag_search", handle it specially
+            if isinstance(tool_result, dict) and tool_result.get("action") == "run_rag_search":
+                rag_results = await self._execute_rag_search(tool_result["query"])
+                messages[-1]["content"] = f"RAG Search Results:\n{rag_results}"
+        # Clean up response
+        final_response = current_response or llm_response
+        final_response = self._clean_response(final_response)
+        return {
+            "message": final_response,
+            "tool_calls": tool_calls_made,
+            "mode": mode
+        }
+    def _get_system_prompt(self, mode: str) -> str:
+        """Get system prompt for selected mode with tools definition"""
+        prompt_key = f"{mode}_agent" if mode in ["sales", "feedback"] else "sales_agent"
+        base_prompt = self.prompts.get(prompt_key, "")
+        # Add tools definition
+        tools_definition = self._get_tools_definition()
+        return f"{base_prompt}\n\n{tools_definition}"
+    def _get_tools_definition(self) -> str:
+        """Get tools definition in text format for prompt"""
+        return """
+# AVAILABLE TOOLS
+You can call the following tools when needed. To call a tool, output a JSON block like this:
+```json
+{
+  "tool_call": "tool_name",
+  "arguments": {
+    "arg1": "value1",
+    "arg2": "value2"
+  }
+}
+```
+## Tools List:
+### 1. search_events
+Search for events matching user criteria.
+Arguments:
+- query (string): Search keywords
+- vibe (string, optional): Mood/vibe (e.g., "chill", "sôi động")
+- time (string, optional): Time period (e.g., "cuối tuần này")
+Example:
+```json
+{"tool_call": "search_events", "arguments": {"query": "nhạc rock", "vibe": "sôi động"}}
+```
+### 2. get_event_details
+Get detailed information about a specific event.
+Arguments:
+- event_id (string): Event ID from search results
+Example:
+```json
+{"tool_call": "get_event_details", "arguments": {"event_id": "6900ae38eb03f29702c7fd1d"}}
+```
+### 3. get_purchased_events (Feedback mode only)
+Check which events the user has attended.
+Arguments:
+- user_id (string): User ID
+Example:
+```json
+{"tool_call": "get_purchased_events", "arguments": {"user_id": "user_123"}}
+```
+### 4. save_feedback
+Save user's feedback/review for an event.
+Arguments:
+- event_id (string): Event ID
+- rating (integer): 1-5 stars
+- comment (string, optional): User's comment
+Example:
+```json
+{"tool_call": "save_feedback", "arguments": {"event_id": "abc123", "rating": 5, "comment": "Tuyệt vời!"}}
+```
+### 5. save_lead
+Save customer contact information.
+Arguments:
+- email (string, optional): Email address
+- phone (string, optional): Phone number
+- interest (string, optional): What they're interested in
+Example:
+```json
+{"tool_call": "save_lead", "arguments": {"email": "user@example.com", "interest": "Rock show"}}
+```
+**IMPORTANT:**
+- Call tools ONLY when you need real-time data
+- After receiving tool results, respond naturally to the user
+- Don't expose raw JSON to users - always format nicely
+"""
+    def _build_messages(
+        self,
+        system_prompt: str,
+        history: List[Dict],
+        user_message: str
+    ) -> List[Dict]:
+        """Build messages array for LLM"""
+        messages = [{"role": "system", "content": system_prompt}]
+        # Add conversation history
+        messages.extend(history)
+        # Add current user message
+        messages.append({"role": "user", "content": user_message})
+        return messages
+    async def _call_llm(self, messages: List[Dict]) -> str:
+        """
+        Call HuggingFace LLM directly using chat_completion (conversational)
+        """
+        try:
+            from huggingface_hub import AsyncInferenceClient
+            # Create async client
+            client = AsyncInferenceClient(token=self.hf_token)
+            # Call HF API with chat completion (conversational)
+            response_text = ""
+            async for message in await client.chat_completion(
+                messages=messages,  # Use messages directly
+                model="openai/gpt-oss-20b",  # GPT-OSS 20B
+                max_tokens=512,
+                temperature=0.7,
+                stream=True
+            ):
+                if message.choices and message.choices[0].delta.content:
+                    response_text += message.choices[0].delta.content
+            return response_text
+        except Exception as e:
+            print(f"⚠️ LLM Call Error: {e}")
+            return "Xin lỗi, tôi đang gặp chút vấn đề kỹ thuật. Bạn thử lại sau nhé!"
+    def _messages_to_prompt(self, messages: List[Dict]) -> str:
+        """Convert messages array to single prompt string"""
+        prompt_parts = []
+        for msg in messages:
+            role = msg["role"]
+            content = msg["content"]
+            if role == "system":
+                prompt_parts.append(f"[SYSTEM]\n{content}\n")
+            elif role == "user":
+                prompt_parts.append(f"[USER]\n{content}\n")
+            elif role == "assistant":
+                prompt_parts.append(f"[ASSISTANT]\n{content}\n")
+        return "\n".join(prompt_parts)
+    def _format_tool_result(self, tool_result: Dict) -> str:
+        """Format tool result for feeding back to LLM"""
+        result = tool_result.get("result", {})
+        # Special handling for purchased events list
+        if isinstance(result, list):
+            print(f"\n🔍 Formatting {len(result)} purchased events for LLM")
+            if not result:
+                return "User has not purchased any events yet."
+            # Format each event clearly
+            formatted_events = []
+            for i, event in enumerate(result, 1):
+                event_info = []
+                event_info.append(f"Event {i}:")
+                # Extract key fields
+                if 'eventName' in event:
+                    event_info.append(f"  Name: {event['eventName']}")
+                if 'eventCode' in event:
+                    event_info.append(f"  Code: {event['eventCode']}")
+                if '_id' in event:
+                    event_info.append(f"  ID: {event['_id']}")
+                if 'startTimeEventTime' in event:
+                    event_info.append(f"  Date: {event['startTimeEventTime']}")
+                formatted_events.append("\n".join(event_info))
+            formatted = "User's Purchased Events:\n\n" + "\n\n".join(formatted_events)
+            print(f"📤 Sending to LLM:\n{formatted}")
+            return formatted
+        # Default formatting for other results
+        if isinstance(result, dict):
+            # Pretty print key info
+            formatted = []
+            for key, value in result.items():
+                if key not in ["success", "error"]:
+                    formatted.append(f"{key}: {value}")
+            return "\n".join(formatted)
+        return str(result)
+    async def _execute_rag_search(self, query_params: Dict) -> str:
+        """
+        Execute RAG search for event discovery
+        Called when LLM wants to search_events
+        """
+        query = query_params.get("query", "")
+        vibe = query_params.get("vibe", "")
+        # Build search query
+        search_text = f"{query} {vibe}".strip()
+        print(f"🔍 RAG Search: {search_text}")
+        # Use embedding + qdrant
+        embedding = self.embedding_service.encode_text(search_text)
+        results = self.qdrant_service.search(
+            query_embedding=embedding,
+            limit=5
+        )
+        # Format results
+        formatted = []
+        for i, result in enumerate(results, 1):
+            # Result is a dict with keys: id, score, payload
+            payload = result.get("payload", {})
+            texts = payload.get("texts", [])
+            text = texts[0] if texts else ""
+            event_id = payload.get("id_use", "")
+            formatted.append(f"{i}. {text[:100]}... (ID: {event_id})")
+        return "\n".join(formatted) if formatted else "Không tìm thấy sự kiện phù hợp."
+    def _parse_tool_call(self, llm_response: str) -> Optional[Dict]:
+        """
+        Parse LLM response to detect tool calls using structured JSON
+        Returns:
+            {"tool_name": "...", "arguments": {...}} or None
+        """
+        import json
+        import re
+        # Method 1: Look for JSON code block
+        json_match = re.search(r'```json\s*(\{.*?\})\s*```', llm_response, re.DOTALL)
+        if json_match:
+            try:
+                data = json.loads(json_match.group(1))
+                return self._extract_tool_from_json(data)
+            except json.JSONDecodeError:
+                pass
+        # Method 2: Look for inline JSON object
+        # Find all potential JSON objects
+        json_objects = re.findall(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', llm_response)
+        for json_str in json_objects:
+            try:
+                data = json.loads(json_str)
+                tool_call = self._extract_tool_from_json(data)
+                if tool_call:
+                    return tool_call
+            except json.JSONDecodeError:
+                continue
+        # Method 3: Nested JSON (for complex structures)
+        try:
+            # Find outermost curly braces
+            if '{' in llm_response and '}' in llm_response:
+                start = llm_response.find('{')
+                # Find matching closing brace
+                count = 0
+                for i, char in enumerate(llm_response[start:], start):
+                    if char == '{':
+                        count += 1
+                    elif char == '}':
+                        count -= 1
+                        if count == 0:
+                            json_str = llm_response[start:i+1]
+                            data = json.loads(json_str)
+                            return self._extract_tool_from_json(data)
+        except (json.JSONDecodeError, ValueError):
+            pass
+        return None
+    def _extract_tool_from_json(self, data: dict) -> Optional[Dict]:
+        """
+        Extract tool call information from parsed JSON
+        Supports multiple formats:
+        - {"tool_call": "search_events", "arguments": {...}}
+        - {"function": "search_events", "parameters": {...}}
+        - {"name": "search_events", "args": {...}}
+        """
+        # Format 1: tool_call + arguments
+        if "tool_call" in data and isinstance(data["tool_call"], str):
+            return {
+                "tool_name": data["tool_call"],
+                "arguments": data.get("arguments", {})
+            }
+        # Format 2: function + parameters
+        if "function" in data:
+            return {
+                "tool_name": data["function"],
+                "arguments": data.get("parameters", data.get("arguments", {}))
+            }
+        # Format 3: name + args
+        if "name" in data:
+            return {
+                "tool_name": data["name"],
+                "arguments": data.get("args", data.get("arguments", {}))
+            }
+        # Format 4: Direct tool name as key
+        valid_tools = ["search_events", "get_event_details", "get_purchased_events", "save_feedback", "save_lead"]
+        for tool in valid_tools:
+            if tool in data:
+                return {
+                    "tool_name": tool,
+                    "arguments": data[tool] if isinstance(data[tool], dict) else {}
+                }
+        return None
+    def _clean_response(self, response: str) -> str:
+        """Remove JSON artifacts from final response"""
+        # Remove JSON blocks
+        if "```json" in response:
+            response = response.split("```json")[0]
+        if "```" in response:
+            response = response.split("```")[0]
+        # Remove tool call markers
+        if "{" in response and "tool_call" in response:
+            # Find the last natural sentence before JSON
+            lines = response.split("\n")
+            cleaned = []
+            for line in lines:
+                if "{" in line and "tool_call" in line:
+                    break
+                cleaned.append(line)
+            response = "\n".join(cleaned)
+        return response.strip()

app.py CHANGED Viewed

@@ -1,420 +1,47 @@
 """
-Event Tags Generator - AI Chatbot for automatic tag generation
-Generates relevant tags, keywords, and categories from event information
 """
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from typing import Optional, List
-from datetime import datetime
 import os
-from huggingface_hub import InferenceClient
-import uvicorn
-# Initialize FastAPI
-app = FastAPI(
-    title="Event Tags Generator API",
-    description="AI-powered automatic tag generation for events using LLM",
-    version="1.0.0"
-)
-# CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Hugging Face token
-hf_token = os.getenv("HUGGINGFACE_TOKEN")
-if hf_token:
-    print("✓ Hugging Face token configured")
-else:
-    print("⚠ Warning: No HUGGINGFACE_TOKEN found. Set it in environment variable.")
-# Pydantic models
-class EventTagsRequest(BaseModel):
-    event_name: str
-    category: str
-    short_description: str
-    detailed_description: str
-    max_tags: Optional[int] = 10
-    language: Optional[str] = "vi"  # vi = Vietnamese, en = English
-    hf_token: Optional[str] = None
-class EventTagsResponse(BaseModel):
-    event_name: str
-    generated_tags: List[str]
-    primary_category: str
-    secondary_categories: List[str]
-    keywords: List[str]
-    hashtags: List[str]
-    target_audience: List[str]
-    sentiment: str
-    confidence_score: float
-    generation_time: str
-    model_used: str
-@app.get("/")
-async def root():
-    """API Information"""
-    return {
-        "status": "running",
-        "service": "Event Tags Generator API",
-        "version": "1.0.0",
-        "description": "Generate tags, keywords, categories automatically from event info",
-        "endpoints": {
-            "POST /generate-tags": {
-                "description": "Generate tags from event information",
-                "request_body": {
-                    "event_name": "string - Tên sự kiện",
-                    "category": "string - Danh mục (âm nhạc, thể thao, công nghệ...)",
-                    "short_description": "string - Mô tả ngắn (1-2 câu)",
-                    "detailed_description": "string - Mô tả chi tiết",
-                    "max_tags": "integer (optional, default: 10) - Số lượng tags tối đa",
-                    "language": "string (optional, default: 'vi') - Ngôn ngữ output",
-                    "hf_token": "string (optional) - Hugging Face token"
-                },
-                "response": {
-                    "generated_tags": "array - Danh sách tags",
-                    "primary_category": "string - Danh mục chính",
-                    "secondary_categories": "array - Danh mục phụ",
-                    "keywords": "array - Keywords SEO",
-                    "hashtags": "array - Social media hashtags",
-                    "target_audience": "array - Đối tượng mục tiêu",
-                    "sentiment": "string - Cảm xúc (positive/neutral/negative)",
-                    "confidence_score": "float - Độ tin cậy (0-1)"
-                },
-                "example": {
-                    "request": {
-                        "event_name": "Vietnam Music Festival 2025",
-                        "category": "Âm nhạc",
-                        "short_description": "Lễ hội âm nhạc quốc tế lớn nhất Việt Nam",
-                        "detailed_description": "Sự kiện quy tụ các nghệ sĩ nổi tiếng trong nước và quốc tế..."
-                    },
-                    "response": {
-                        "generated_tags": ["âm nhạc", "festival", "concert", "việt nam", "quốc tế"],
-                        "hashtags": ["#VietnamMusicFest", "#MusicFestival2025", "#LiveMusic"]
-                    }
-                }
-            }
-        },
-        "usage": "POST /generate-tags with event information in JSON body"
-    }
-def build_powerful_prompt(
-    event_name: str,
-    category: str,
-    short_desc: str,
-    detailed_desc: str,
-    max_tags: int,
-    language: str
-) -> str:
     """
-    Build a powerful, structured prompt for LLM to generate high-quality tags
     """
-    lang_instruction = "in Vietnamese" if language == "vi" else "in English"
-    prompt = f"""You are an expert AI system specialized in event marketing, SEO, and content categorization. Your task is to analyze event information and generate comprehensive, relevant tags and metadata.
-**EVENT INFORMATION:**
-• Event Name: {event_name}
-• Primary Category: {category}
-• Short Description: {short_desc}
-• Detailed Description: {detailed_desc}
-**YOUR TASK:**
-Analyze the event information above and generate the following {lang_instruction}:
-1. **TAGS** ({max_tags} tags maximum):
-   - Generate specific, relevant, searchable tags
-   - Include event type, theme, activities, location references
-   - Mix broad and specific tags for better discoverability
-   - Use lowercase, single words or short phrases
-   - Example format: âm nhạc, festival, concert, outdoor, hà nội
-2. **PRIMARY CATEGORY** (1 category):
-   - The main category that best describes this event
-   - Choose from: Âm nhạc, Thể thao, Công nghệ, Nghệ thuật, Ẩm thực, Giáo dục, Kinh doanh, Du lịch, Giải trí, Khác
-3. **SECONDARY CATEGORIES** (2-3 categories):
-   - Additional relevant categories
-   - Help with cross-categorization
-4. **KEYWORDS** (5-8 keywords):
-   - SEO-optimized keywords for search engines
-   - Include long-tail keywords
-   - Example: "lễ hội âm nhạc hà nội", "concert quốc tế việt nam"
-5. **HASHTAGS** (5-7 hashtags):
-   - Social media friendly hashtags
-   - Mix of popular and unique hashtags
-   - Example: #VietnamMusicFest, #LiveMusic, #HanoiEvents
-6. **TARGET AUDIENCE** (2-4 audience groups):
-   - Who would be interested in this event?
-   - Example: Giới trẻ, Gia đình, Dân văn phòng, Sinh viên
-7. **SENTIMENT** (one word):
-   - Overall emotion/feeling: positive, neutral, or negative
-   - Based on event description tone
-**OUTPUT FORMAT (JSON-like structure):**
-TAGS: tag1, tag2, tag3, ...
-PRIMARY_CATEGORY: category_name
-SECONDARY_CATEGORIES: cat1, cat2, cat3
-KEYWORDS: keyword1, keyword2, keyword3, ...
-HASHTAGS: #tag1, #tag2, #tag3, ...
-TARGET_AUDIENCE: audience1, audience2, audience3
-SENTIMENT: positive/neutral/negative
-**IMPORTANT GUIDELINES:**
-- Be specific and relevant to the event
-- Use terms people would actually search for
-- Balance between popular and niche terms
-- Consider SEO and social media best practices
-- Keep tags concise and meaningful
-- Generate output {lang_instruction}
-Now, analyze the event and generate the metadata:"""
-    return prompt
-def parse_llm_response(response_text: str, max_tags: int) -> dict:
-    """
-    Parse LLM response into structured format
-    Handles various response formats robustly
-    """
-    result = {
-        "generated_tags": [],
-        "primary_category": "",
-        "secondary_categories": [],
-        "keywords": [],
-        "hashtags": [],
-        "target_audience": [],
-        "sentiment": "neutral"
-    }
-    lines = response_text.strip().split('\n')
-    for line in lines:
-        line = line.strip()
-        if not line:
-            continue
-        # Parse TAGS
-        if line.upper().startswith('TAGS:'):
-            tags_text = line.split(':', 1)[1].strip()
-            tags = [t.strip().lower() for t in tags_text.split(',') if t.strip()]
-            result["generated_tags"] = tags[:max_tags]
-        # Parse PRIMARY_CATEGORY
-        elif line.upper().startswith('PRIMARY_CATEGORY:'):
-            result["primary_category"] = line.split(':', 1)[1].strip()
-        # Parse SECONDARY_CATEGORIES
-        elif line.upper().startswith('SECONDARY_CATEGORIES:'):
-            cats_text = line.split(':', 1)[1].strip()
-            result["secondary_categories"] = [c.strip() for c in cats_text.split(',') if c.strip()]
-        # Parse KEYWORDS
-        elif line.upper().startswith('KEYWORDS:'):
-            kw_text = line.split(':', 1)[1].strip()
-            result["keywords"] = [k.strip() for k in kw_text.split(',') if k.strip()]
-        # Parse HASHTAGS
-        elif line.upper().startswith('HASHTAGS:'):
-            ht_text = line.split(':', 1)[1].strip()
-            hashtags = [h.strip() for h in ht_text.split(',') if h.strip()]
-            # Ensure hashtags start with #
-            result["hashtags"] = [h if h.startswith('#') else f"#{h}" for h in hashtags]
-        # Parse TARGET_AUDIENCE
-        elif line.upper().startswith('TARGET_AUDIENCE:'):
-            aud_text = line.split(':', 1)[1].strip()
-            result["target_audience"] = [a.strip() for a in aud_text.split(',') if a.strip()]
-        # Parse SENTIMENT
-        elif line.upper().startswith('SENTIMENT:'):
-            sentiment = line.split(':', 1)[1].strip().lower()
-            if sentiment in ['positive', 'neutral', 'negative']:
-                result["sentiment"] = sentiment
-    return result
-@app.post("/generate-tags", response_model=EventTagsResponse)
-async def generate_tags(request: EventTagsRequest):
-    """
-    Generate comprehensive tags and metadata for an event
-    This endpoint uses advanced LLM prompting to generate:
-    - Relevant tags for searchability
-    - Category classification
-    - SEO keywords
-    - Social media hashtags
-    - Target audience identification
-    - Sentiment analysis
-    **Input:**
-    - event_name: Name of the event
-    - category: Primary category (music, sports, tech, etc.)
-    - short_description: Brief 1-2 sentence description
-    - detailed_description: Full event description with details
-    **Output:**
-    - Structured metadata ready for use in event management system
-    - All fields optimized for search and discovery
-    """
-    try:
-        start_time = datetime.utcnow()
-        # Get token
-        token = request.hf_token or hf_token
-        if not token:
-            raise HTTPException(
-                status_code=401,
-                detail="HUGGINGFACE_TOKEN required. Set environment variable or pass in request body."
-            )
-        # Build powerful prompt
-        prompt = build_powerful_prompt(
-            event_name=request.event_name,
-            category=request.category,
-            short_desc=request.short_description,
-            detailed_desc=request.detailed_description,
-            max_tags=request.max_tags,
-            language=request.language
-        )
-        # Initialize HF client
-        client = InferenceClient(token=token)
-        # Try multiple models for best results
-        models_to_try = [
-            "microsoft/Phi-3-mini-4k-instruct",
-            "mistralai/Mistral-7B-Instruct-v0.3",
-            "HuggingFaceH4/zephyr-7b-beta",
-            "meta-llama/Llama-3.2-3B-Instruct"
-        ]
-        llm_response = ""
-        model_used = ""
-        last_error = None
-        for model_name in models_to_try:
-            try:
-                print(f"Trying model: {model_name}")
-                # Generate with LLM
-                llm_response = client.text_generation(
-                    prompt,
-                    model=model_name,
-                    max_new_tokens=800,
-                    temperature=0.7,
-                    top_p=0.9,
-                    do_sample=True,
-                    return_full_text=False
-                )
-                if llm_response and len(llm_response.strip()) > 50:
-                    model_used = model_name
-                    print(f"✓ Success with {model_name}")
-                    break
-            except Exception as model_error:
-                print(f"✗ Failed with {model_name}: {str(model_error)}")
-                last_error = model_error
-                continue
-        # Check if generation succeeded
-        if not llm_response or len(llm_response.strip()) < 50:
-            raise HTTPException(
-                status_code=500,
-                detail=f"All models failed. Last error: {str(last_error)}\n\nPlease check:\n1. Token has correct permissions\n2. Token is valid and not expired\n3. Try regenerating token"
-            )
-        # Parse LLM response into structured format
-        parsed_result = parse_llm_response(llm_response, request.max_tags)
-        # Calculate confidence score (basic heuristic)
-        confidence = 0.0
-        if parsed_result["generated_tags"]:
-            confidence += 0.3
-        if parsed_result["primary_category"]:
-            confidence += 0.2
-        if parsed_result["keywords"]:
-            confidence += 0.2
-        if parsed_result["hashtags"]:
-            confidence += 0.15
-        if parsed_result["target_audience"]:
-            confidence += 0.15
-        end_time = datetime.utcnow()
-        generation_time = (end_time - start_time).total_seconds()
-        # Build response
-        return EventTagsResponse(
-            event_name=request.event_name,
-            generated_tags=parsed_result["generated_tags"],
-            primary_category=parsed_result["primary_category"],
-            secondary_categories=parsed_result["secondary_categories"],
-            keywords=parsed_result["keywords"],
-            hashtags=parsed_result["hashtags"],
-            target_audience=parsed_result["target_audience"],
-            sentiment=parsed_result["sentiment"],
-            confidence_score=round(confidence, 2),
-            generation_time=f"{generation_time:.2f}s",
-            model_used=model_used.split('/')[-1] if model_used else "unknown"
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Error generating tags: {str(e)}"
-        )
-@app.post("/generate-tags/batch")
-async def generate_tags_batch(events: List[EventTagsRequest]):
-    """
-    Batch generate tags for multiple events
-    Useful for bulk processing or migrating existing events
-    """
-    results = []
-    for event in events:
-        try:
-            result = await generate_tags(event)
-            results.append({
-                "event_name": event.event_name,
-                "success": True,
-                "data": result
-            })
-        except Exception as e:
-            results.append({
-                "event_name": event.event_name,
-                "success": False,
-                "error": str(e)
-            })
-    return {
-        "total": len(events),
-        "successful": sum(1 for r in results if r["success"]),
-        "failed": sum(1 for r in results if not r["success"]),
-        "results": results
-    }
 if __name__ == "__main__":
-    import os
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=int(os.environ.get("PORT", 7860)),
-        reload=False,
-        log_level="info"
-    )

 """
+Hugging Face Spaces compatible app
 """
 import os
+import gradio as gr
+from main import app as fastapi_app
+# Gradio wrapper cho Hugging Face Spaces
+def create_gradio_interface():
     """
+    Tạo Gradio interface để deploy trên Hugging Face Spaces
     """
+    with gr.Blocks(title="Event Social Media Embeddings API") as demo:
+        gr.Markdown("""
+        # 🔍 Event Social Media Embeddings API
+        API để embeddings và search multimodal (text + images) với **Jina CLIP v2** + **Qdrant Cloud**
+        ## 🌟 Features:
+        - ✅ Multimodal: Text + Image embeddings
+        - ✅ Tiếng Việt: 100% support
+        - ✅ High Performance: ONNX + HNSW
+        - ✅ Cloud: Qdrant Cloud
+        ## 📡 API Endpoints:
+        - `POST /index` - Index data
+        - `POST /search` - Hybrid search
+        - `POST /search/text` - Text search
+        - `POST /search/image` - Image search
+        ### 🔗 API Docs:
+        Truy cập `/docs` để xem API documentation đầy đủ
+        """)
+        gr.Markdown("### API is running at the `/docs` endpoint")
+    return demo
+# Mount FastAPI app
+demo = create_gradio_interface()
+# Wrap FastAPI với Gradio
+app = gr.mount_gradio_app(fastapi_app, demo, path="/")
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

batch_index_pdfs.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""
+Batch script to index PDF files into RAG knowledge base
+Usage: python batch_index_pdfs.py <pdf_directory> [options]
+"""
+import os
+import sys
+from pathlib import Path
+from pymongo import MongoClient
+from embedding_service import JinaClipEmbeddingService
+from qdrant_service import QdrantVectorService
+from pdf_parser import PDFIndexer
+def index_pdf_directory(
+    pdf_dir: str,
+    category: str = "user_guide",
+    force: bool = False
+):
+    """
+    Index all PDF files in a directory
+    Args:
+        pdf_dir: Directory containing PDF files
+        category: Category for the PDFs (default: "user_guide")
+        force: Force reindex even if already indexed (default: False)
+    """
+    print("="*60)
+    print("PDF Batch Indexer")
+    print("="*60)
+    # Initialize services (same as main.py)
+    print("\n[1/5] Initializing services...")
+    embedding_service = JinaClipEmbeddingService(model_path="jinaai/jina-clip-v2")
+    collection_name = os.getenv("COLLECTION_NAME", "event_social_media")
+    qdrant_service = QdrantVectorService(
+        collection_name=collection_name,
+        vector_size=embedding_service.get_embedding_dimension()
+    )
+    # MongoDB
+    mongodb_uri = os.getenv("MONGODB_URI", "mongodb+srv://truongtn7122003:7KaI9OT5KTUxWjVI@truongtn7122003.xogin4q.mongodb.net/")
+    mongo_client = MongoClient(mongodb_uri)
+    db = mongo_client[os.getenv("MONGODB_DB_NAME", "chatbot_rag")]
+    documents_collection = db["documents"]
+    # Initialize PDF indexer
+    pdf_indexer = PDFIndexer(
+        embedding_service=embedding_service,
+        qdrant_service=qdrant_service,
+        documents_collection=documents_collection
+    )
+    print("✓ Services initialized")
+    # Find all PDF files
+    print(f"\n[2/5] Scanning directory: {pdf_dir}")
+    pdf_files = list(Path(pdf_dir).glob("*.pdf"))
+    if not pdf_files:
+        print("✗ No PDF files found in directory")
+        return
+    print(f"✓ Found {len(pdf_files)} PDF file(s)")
+    # Index each PDF
+    print(f"\n[3/5] Indexing PDFs...")
+    indexed_count = 0
+    skipped_count = 0
+    error_count = 0
+    for i, pdf_path in enumerate(pdf_files, 1):
+        print(f"\n--- [{i}/{len(pdf_files)}] Processing: {pdf_path.name} ---")
+        # Generate document ID
+        doc_id = f"pdf_{pdf_path.stem}"
+        # Check if already indexed
+        if not force:
+            existing = documents_collection.find_one({"document_id": doc_id})
+            if existing:
+                print(f"⊘ Already indexed (use --force to reindex)")
+                skipped_count += 1
+                continue
+        try:
+            # Index PDF
+            metadata = {
+                'title': pdf_path.stem.replace('_', ' ').title(),
+                'category': category,
+                'source_file': str(pdf_path)
+            }
+            result = pdf_indexer.index_pdf(
+                pdf_path=str(pdf_path),
+                document_id=doc_id,
+                document_metadata=metadata
+            )
+            print(f"✓ Indexed: {result['chunks_indexed']} chunks")
+            indexed_count += 1
+        except Exception as e:
+            print(f"✗ Error: {str(e)}")
+            error_count += 1
+    # Summary
+    print("\n" + "="*60)
+    print("SUMMARY")
+    print("="*60)
+    print(f"Total PDFs found: {len(pdf_files)}")
+    print(f"✓ Successfully indexed: {indexed_count}")
+    print(f"⊘ Skipped (already indexed): {skipped_count}")
+    print(f"✗ Errors: {error_count}")
+    if indexed_count > 0:
+        print(f"\n✓ Knowledge base updated successfully!")
+        print(f"You can now chat with your chatbot about the content in these PDFs.")
+def main():
+    """Main entry point"""
+    if len(sys.argv) < 2:
+        print("Usage: python batch_index_pdfs.py <pdf_directory> [--category=<category>] [--force]")
+        print("\nExample:")
+        print("  python batch_index_pdfs.py ./docs/guides")
+        print("  python batch_index_pdfs.py ./docs/guides --category=user_guide --force")
+        sys.exit(1)
+    pdf_dir = sys.argv[1]
+    if not os.path.isdir(pdf_dir):
+        print(f"Error: Directory not found: {pdf_dir}")
+        sys.exit(1)
+    # Parse options
+    category = "user_guide"
+    force = False
+    for arg in sys.argv[2:]:
+        if arg.startswith("--category="):
+            category = arg.split("=")[1]
+        elif arg == "--force":
+            force = True
+    # Index PDFs
+    index_pdf_directory(pdf_dir, category=category, force=force)
+if __name__ == "__main__":
+    main()

cag_service.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""
+CAG Service (Cache-Augmented Generation)
+Semantic caching layer for RAG system using Qdrant
+This module implements intelligent caching to reduce latency and LLM costs
+by serving semantically similar queries from cache.
+"""
+from typing import Optional, Dict, Any, Tuple
+from datetime import datetime, timedelta
+import numpy as np
+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    Distance, VectorParams, PointStruct,
+    SearchParams, Filter, FieldCondition, MatchValue, Range
+)
+import uuid
+import os
+class CAGService:
+    """
+    Cache-Augmented Generation Service
+    Features:
+    - Semantic similarity-based cache lookup (cosine similarity)
+    - TTL (Time-To-Live) for automatic cache expiration
+    - Configurable similarity threshold
+    """
+    def __init__(
+        self,
+        embedding_service,
+        qdrant_url: Optional[str] = None,
+        qdrant_api_key: Optional[str] = None,
+        cache_collection: str = "semantic_cache",
+        vector_size: int = 1024,
+        similarity_threshold: float = 0.9,
+        ttl_hours: int = 24
+    ):
+        """
+        Initialize CAG Service
+        Args:
+            embedding_service: Embedding service for query encoding
+            qdrant_url: Qdrant Cloud URL
+            qdrant_api_key: Qdrant API key
+            cache_collection: Collection name for cache
+            vector_size: Embedding dimension
+            similarity_threshold: Min similarity for cache hit (0-1)
+            ttl_hours: Cache entry lifetime in hours
+        """
+        self.embedding_service = embedding_service
+        self.cache_collection = cache_collection
+        self.similarity_threshold = similarity_threshold
+        self.ttl_hours = ttl_hours
+        # Initialize Qdrant client
+        url = qdrant_url or os.getenv("QDRANT_URL")
+        api_key = qdrant_api_key or os.getenv("QDRANT_API_KEY")
+        if not url or not api_key:
+            raise ValueError("QDRANT_URL and QDRANT_API_KEY required for CAG")
+        self.client = QdrantClient(url=url, api_key=api_key)
+        self.vector_size = vector_size
+        # Ensure cache collection exists
+        self._ensure_cache_collection()
+        print(f"✓ CAG Service initialized (cache: {cache_collection}, threshold: {similarity_threshold})")
+    def _ensure_cache_collection(self):
+        """Create cache collection if it doesn't exist"""
+        collections = self.client.get_collections().collections
+        exists = any(c.name == self.cache_collection for c in collections)
+        if not exists:
+            print(f"Creating semantic cache collection: {self.cache_collection}")
+            self.client.create_collection(
+                collection_name=self.cache_collection,
+                vectors_config=VectorParams(
+                    size=self.vector_size,
+                    distance=Distance.COSINE
+                )
+            )
+            print("✓ Semantic cache collection created")
+    def check_cache(
+        self,
+        query: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Check if query has a cached response
+        Args:
+            query: User query string
+        Returns:
+            Cached data if found (with response, context, metadata), None otherwise
+        """
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Search for similar queries in cache
+        search_result = self.client.query_points(
+        collection_name=self.cache_collection,
+        query=query_embedding.tolist(),
+        limit=1,
+        score_threshold=self.similarity_threshold,
+        with_payload=True
+    ).points
+        if not search_result:
+            return None
+        hit = search_result[0]
+        # Check TTL
+        cached_at = datetime.fromisoformat(hit.payload.get("cached_at"))
+        expires_at = cached_at + timedelta(hours=self.ttl_hours)
+        if datetime.utcnow() > expires_at:
+            # Cache expired, delete it
+            self.client.delete(
+                collection_name=self.cache_collection,
+                points_selector=[hit.id]
+            )
+            return None
+        # Cache hit!
+        return {
+            "response": hit.payload.get("response"),
+            "context_used": hit.payload.get("context_used", []),
+            "rag_stats": hit.payload.get("rag_stats"),
+            "cached_query": hit.payload.get("original_query"),
+            "similarity_score": float(hit.score),
+            "cached_at": cached_at.isoformat(),
+            "cache_hit": True
+        }
+    def save_to_cache(
+        self,
+        query: str,
+        response: str,
+        context_used: list,
+        rag_stats: Optional[Dict] = None
+    ) -> str:
+        """
+        Save query-response pair to cache
+        Args:
+            query: Original user query
+            response: Generated response
+            context_used: Retrieved context documents
+            rag_stats: RAG pipeline statistics
+        Returns:
+            Cache entry ID
+        """
+        # Generate query embedding
+        query_embedding = self.embedding_service.encode_text(query)
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Create cache entry
+        cache_id = str(uuid.uuid4())
+        point = PointStruct(
+            id=cache_id,
+            vector=query_embedding.tolist(),
+            payload={
+                "original_query": query,
+                "response": response,
+                "context_used": context_used,
+                "rag_stats": rag_stats or {},
+                "cached_at": datetime.utcnow().isoformat(),
+                "cache_type": "semantic"
+            }
+        )
+        # Save to Qdrant
+        self.client.upsert(
+            collection_name=self.cache_collection,
+            points=[point]
+        )
+        return cache_id
+    def clear_cache(self) -> bool:
+        """
+        Clear all cache entries
+        Returns:
+            Success status
+        """
+        try:
+            # Delete and recreate collection
+            self.client.delete_collection(collection_name=self.cache_collection)
+            self._ensure_cache_collection()
+            print("✓ Semantic cache cleared")
+            return True
+        except Exception as e:
+            print(f"Error clearing cache: {e}")
+            return False
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """
+        Get cache statistics
+        Returns:
+            Cache statistics (size, hit rate, etc.)
+        """
+        try:
+            info = self.client.get_collection(collection_name=self.cache_collection)
+            return {
+                "total_entries": info.points_count,
+                "vectors_count": info.vectors_count,
+                "status": info.status,
+                "ttl_hours": self.ttl_hours,
+                "similarity_threshold": self.similarity_threshold
+            }
+        except Exception as e:
+            print(f"Error getting cache stats: {e}")
+            return {}

conversation_service.py ADDED Viewed

	@@ -0,0 +1,308 @@

+"""
+Conversation Service for Multi-turn Chat
+Server-side session management
+"""
+from typing import List, Dict, Optional
+from datetime import datetime
+from pymongo.collection import Collection
+import uuid
+class ConversationService:
+    """
+    Manages multi-turn conversation history với server-side session
+    """
+    def __init__(self, mongo_collection: Collection, max_history: int = 10):
+        """
+        Args:
+            mongo_collection: MongoDB collection for storing conversations
+            max_history: Maximum số messages giữ lại (sliding window)
+        """
+        self.collection = mongo_collection
+        self.max_history = max_history
+        # Create indexes
+        self._ensure_indexes()
+    def _ensure_indexes(self):
+        """Create necessary indexes"""
+        try:
+            self.collection.create_index("session_id", unique=True)
+            self.collection.create_index("user_id")  # NEW: Index for user filtering
+            # Auto-delete sessions sau 7 ngày không dùng
+            self.collection.create_index(
+                "updated_at",
+                expireAfterSeconds=604800  # 7 days
+            )
+            print("✓ Conversation indexes created")
+        except Exception as e:
+            print(f"Conversation indexes already exist or error: {e}")
+    def create_session(self, metadata: Optional[Dict] = None, user_id: Optional[str] = None) -> str:
+        """
+        Create new conversation session
+        Args:
+            metadata: Additional metadata
+            user_id: User identifier (optional)
+        Returns:
+            session_id (UUID string)
+        """
+        session_id = str(uuid.uuid4())
+        self.collection.insert_one({
+            "session_id": session_id,
+            "user_id": user_id,  # NEW: Store user_id
+            "messages": [],
+            "scenario_state": None,  # NEW: Scenario state
+            "metadata": metadata or {},
+            "created_at": datetime.utcnow(),
+            "updated_at": datetime.utcnow()
+        })
+        return session_id
+    def add_message(
+        self,
+        session_id: str,
+        role: str,
+        content: str,
+        metadata: Optional[Dict] = None
+    ):
+        """
+        Add message to conversation history
+        Args:
+            session_id: Session identifier
+            role: "user" or "assistant"
+            content: Message text
+            metadata: Additional info (rag_stats, tool_calls, etc.)
+        """
+        message = {
+            "role": role,
+            "content": content,
+            "timestamp": datetime.utcnow().isoformat(),
+            "metadata": metadata or {}
+        }
+        # Upsert: tạo session nếu chưa tồn tại
+        self.collection.update_one(
+            {"session_id": session_id},
+            {
+                "$push": {
+                    "messages": {
+                        "$each": [message],
+                        "$slice": -self.max_history  # Keep only last N messages
+                    }
+                },
+                "$set": {"updated_at": datetime.utcnow()}
+            },
+            upsert=True
+        )
+    def get_conversation_history(
+        self,
+        session_id: str,
+        limit: Optional[int] = None,
+        include_metadata: bool = False
+    ) -> List[Dict]:
+        """
+        Get conversation messages for LLM context
+        Args:
+            session_id: Session identifier
+            limit: Override max_history với số lượng tùy chỉnh
+            include_metadata: Include metadata trong response
+        Returns:
+            List of messages in format: [{"role": "user", "content": "..."}, ...]
+        """
+        session = self.collection.find_one({"session_id": session_id})
+        if not session:
+            return []
+        messages = session.get("messages", [])
+        # Limit to recent messages
+        if limit:
+            messages = messages[-limit:]
+        else:
+            messages = messages[-self.max_history:]
+        # Format for LLM
+        if include_metadata:
+            return messages
+        else:
+            return [
+                {
+                    "role": msg["role"],
+                    "content": msg["content"]
+                }
+                for msg in messages
+            ]
+    def get_session_info(self, session_id: str) -> Optional[Dict]:
+        """
+        Get session metadata
+        Returns:
+            Session info hoặc None nếu không tồn tại
+        """
+        session = self.collection.find_one(
+            {"session_id": session_id},
+            {"_id": 0, "session_id": 1, "user_id": 1, "created_at": 1, "updated_at": 1, "metadata": 1}
+        )
+        return session
+    def clear_session(self, session_id: str) -> bool:
+        """
+        Clear conversation history for session
+        Returns:
+            True nếu xóa thành công, False nếu session không tồn tại
+        """
+        result = self.collection.delete_one({"session_id": session_id})
+        return result.deleted_count > 0
+    def session_exists(self, session_id: str) -> bool:
+        """
+        Check if session exists
+        """
+        return self.collection.count_documents({"session_id": session_id}) > 0
+    def get_last_user_message(self, session_id: str) -> Optional[str]:
+        """
+        Get the last user message in conversation
+        Useful for context extraction
+        """
+        session = self.collection.find_one({"session_id": session_id})
+        if not session:
+            return None
+        messages = session.get("messages", [])
+        # Tìm message cuối cùng từ user
+        for msg in reversed(messages):
+            if msg["role"] == "user":
+                return msg["content"]
+        return None
+    def list_sessions(
+        self,
+        limit: int = 50,
+        skip: int = 0,
+        sort_by: str = "updated_at",
+        descending: bool = True,
+        user_id: Optional[str] = None  # NEW: Filter by user
+    ) -> List[Dict]:
+        """
+        List all conversation sessions
+        Args:
+            limit: Maximum number of sessions to return
+            skip: Number of sessions to skip (for pagination)
+            sort_by: Field to sort by (created_at, updated_at)
+            descending: Sort in descending order
+            user_id: Filter sessions by user_id (optional)
+        Returns:
+            List of session summaries
+        """
+        sort_order = -1 if descending else 1
+        # Build query filter
+        query = {}
+        if user_id:
+            query["user_id"] = user_id
+        sessions = self.collection.find(
+            query,  # Use query filter
+            {"_id": 0, "session_id": 1, "user_id": 1, "created_at": 1, "updated_at": 1, "metadata": 1}
+        ).sort(sort_by, sort_order).skip(skip).limit(limit)
+        result = []
+        for session in sessions:
+            # Count messages
+            message_count = len(
+                self.collection.find_one({"session_id": session["session_id"]}, {"messages": 1})
+                .get("messages", [])
+            )
+            result.append({
+                "session_id": session["session_id"],
+                "user_id": session.get("user_id"),  # NEW: Include user_id
+                "created_at": session["created_at"],
+                "updated_at": session["updated_at"],
+                "message_count": message_count,
+                "metadata": session.get("metadata", {})
+            })
+        return result
+    def count_sessions(self, user_id: Optional[str] = None) -> int:
+        """
+        Get total number of sessions
+        Args:
+            user_id: Filter count by user_id (optional)
+        """
+        query = {}
+        if user_id:
+            query["user_id"] = user_id
+        return self.collection.count_documents(query)
+    # ===== Scenario State Management =====
+    def get_scenario_state(self, session_id: str) -> Optional[Dict]:
+        """
+        Get current scenario state for session
+        Returns:
+            {
+                "active_scenario": "price_inquiry",
+                "scenario_step": 3,
+                "scenario_data": {...},
+                "last_activity": "..."
+            }
+            or None if no active scenario
+        """
+        session = self.collection.find_one({"session_id": session_id})
+        if not session:
+            return None
+        return session.get("scenario_state")
+    def set_scenario_state(self, session_id: str, state: Dict):
+        """
+        Set scenario state for session
+        Args:
+            session_id: Session ID
+            state: Scenario state dict
+        """
+        self.collection.update_one(
+            {"session_id": session_id},
+            {
+                "$set": {
+                    "scenario_state": state,
+                    "updated_at": datetime.utcnow()
+                }
+            },
+            upsert=True
+        )
+    def clear_scenario(self, session_id: str):
+        """
+        Clear scenario state (end scenario)
+        """
+        self.collection.update_one(
+            {"session_id": session_id},
+            {
+                "$set": {
+                    "scenario_state": None,
+                    "updated_at": datetime.utcnow()
+                }
+            }
+        )

embedding_service.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoModel
+from typing import Union, List
+import io
+class JinaClipEmbeddingService:
+    """
+    Jina CLIP v2 Embedding Service với hỗ trợ tiếng Việt
+    Sử dụng AutoModel với trust_remote_code
+    """
+    def __init__(self, model_path: str = "jinaai/jina-clip-v2"):
+        """
+        Initialize Jina CLIP v2 model
+        Args:
+            model_path: Path to model hoặc HuggingFace model name
+        """
+        print(f"Loading Jina CLIP v2 model from {model_path}...")
+        # Load model với trust_remote_code
+        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
+        # Chuyển sang eval mode
+        self.model.eval()
+        # Sử dụng GPU nếu có
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(self.device)
+        print(f"✓ Loaded Jina CLIP v2 model on: {self.device}")
+    def encode_text(
+        self,
+        text: Union[str, List[str]],
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode text thành vector embeddings (hỗ trợ tiếng Việt)
+        Args:
+            text: Text hoặc list of texts (tiếng Việt)
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        if isinstance(text, str):
+            text = [text]
+        # Jina CLIP v2 encode_text method
+        # Automatically handles tokenization internally
+        embeddings = self.model.encode_text(
+            text,
+            truncate_dim=truncate_dim  # Optional: 64, 128, 256, 512, 1024
+        )
+        # Convert to numpy
+        if isinstance(embeddings, torch.Tensor):
+            embeddings = embeddings.cpu().detach().numpy()
+        # Normalize nếu cần
+        if normalize:
+            embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings
+    def encode_image(
+        self,
+        image: Union[Image.Image, bytes, List, str],
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode image thành vector embeddings
+        Args:
+            image: PIL Image, bytes, URL string, hoặc list of images
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        # Convert bytes to PIL Image nếu cần
+        if isinstance(image, bytes):
+            image = Image.open(io.BytesIO(image)).convert('RGB')
+        elif isinstance(image, list):
+            processed_images = []
+            for img in image:
+                if isinstance(img, bytes):
+                    processed_images.append(Image.open(io.BytesIO(img)).convert('RGB'))
+                elif isinstance(img, str):
+                    # URL string - keep as is, Jina CLIP can handle URLs
+                    processed_images.append(img)
+                else:
+                    processed_images.append(img)
+            image = processed_images
+        elif not isinstance(image, list) and not isinstance(image, str):
+            # Single PIL Image
+            image = [image]
+        # Jina CLIP v2 encode_image method
+        # Supports PIL Images, file paths, or URLs
+        embeddings = self.model.encode_image(
+            image,
+            truncate_dim=truncate_dim  # Optional: 64, 128, 256, 512, 1024
+        )
+        # Convert to numpy
+        if isinstance(embeddings, torch.Tensor):
+            embeddings = embeddings.cpu().detach().numpy()
+        # Normalize nếu cần
+        if normalize:
+            embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
+        return embeddings
+    def encode_multimodal(
+        self,
+        text: Union[str, List[str]] = None,
+        image: Union[Image.Image, bytes, List] = None,
+        truncate_dim: int = None,
+        normalize: bool = True
+    ) -> np.ndarray:
+        """
+        Encode cả text và image, trả về embeddings kết hợp
+        Args:
+            text: Text hoặc list of texts (tiếng Việt)
+            image: PIL Image, bytes, hoặc list of images
+            truncate_dim: Matryoshka dimension (64-1024, None = full 1024)
+            normalize: Có normalize embeddings không
+        Returns:
+            numpy array của embeddings
+        """
+        embeddings = []
+        if text is not None:
+            text_emb = self.encode_text(text, truncate_dim=truncate_dim, normalize=False)
+            embeddings.append(text_emb)
+        if image is not None:
+            image_emb = self.encode_image(image, truncate_dim=truncate_dim, normalize=False)
+            embeddings.append(image_emb)
+        # Combine embeddings (average)
+        if len(embeddings) == 2:
+            # Average của text và image embeddings
+            combined = np.mean(embeddings, axis=0)
+        elif len(embeddings) == 1:
+            combined = embeddings[0]
+        else:
+            raise ValueError("Phải cung cấp ít nhất text hoặc image")
+        # Normalize nếu cần
+        if normalize:
+            combined = combined / np.linalg.norm(combined, axis=1, keepdims=True)
+        return combined
+    def get_embedding_dimension(self) -> int:
+        """
+        Trả về dimension của embeddings (1024 cho Jina CLIP v2)
+        """
+        return 1024

feedback_tracking_service.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+Feedback Tracking Service
+Tracks which events users have already given feedback for
+"""
+from typing import Optional, Dict
+from pymongo.collection import Collection
+from datetime import datetime
+class FeedbackTrackingService:
+    """
+    Track feedback status per user per event
+    Prevents redundant "check purchase history" calls
+    """
+    def __init__(self, mongo_collection: Collection):
+        self.collection = mongo_collection
+        self._ensure_indexes()
+    def _ensure_indexes(self):
+        """Create indexes for fast lookup"""
+        try:
+            # Compound index for quick lookup
+            self.collection.create_index([("user_id", 1), ("event_code", 1)], unique=True)
+            self.collection.create_index("user_id")
+            print("✓ Feedback tracking indexes created")
+        except Exception as e:
+            print(f"Feedback tracking indexes exist: {e}")
+    def has_given_feedback(self, user_id: str, event_code: str) -> bool:
+        """
+        Check if user has already given feedback for this event
+        Args:
+            user_id: User ID
+            event_code: Event code
+        Returns:
+            True if feedback already given, False otherwise
+        """
+        result = self.collection.find_one({
+            "user_id": user_id,
+            "event_code": event_code,
+            "is_feedback": True
+        })
+        return result is not None
+    def mark_feedback_given(self, user_id: str, event_code: str, rating: int, comment: str = "") -> bool:
+        """
+        Mark that user has given feedback for this event
+        Args:
+            user_id: User ID
+            event_code: Event code
+            rating: Rating given (1-5)
+            comment: Feedback comment
+        Returns:
+            True if saved successfully
+        """
+        try:
+            self.collection.update_one(
+                {
+                    "user_id": user_id,
+                    "event_code": event_code
+                },
+                {
+                    "$set": {
+                        "is_feedback": True,
+                        "rating": rating,
+                        "comment": comment,
+                        "feedback_date": datetime.utcnow(),
+                        "updated_at": datetime.utcnow()
+                    },
+                    "$setOnInsert": {
+                        "created_at": datetime.utcnow()
+                    }
+                },
+                upsert=True
+            )
+            print(f"✅ Marked feedback: {user_id} → {event_code} (rating: {rating})")
+            return True
+        except Exception as e:
+            print(f"❌ Error marking feedback: {e}")
+            return False
+    def get_pending_events(self, user_id: str, purchased_events: list) -> list:
+        """
+        Filter purchased events to only those without feedback
+        Args:
+            user_id: User ID
+            purchased_events: List of events user has purchased
+        Returns:
+            List of events that need feedback
+        """
+        pending = []
+        for event in purchased_events:
+            event_code = event.get("eventCode")
+            if event_code and not self.has_given_feedback(user_id, event_code):
+                pending.append(event)
+        return pending

main.py ADDED Viewed

	@@ -0,0 +1,1326 @@

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse  # Add StreamingResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+from PIL import Image
+import io
+import numpy as np
+import os
+from datetime import datetime
+from pymongo import MongoClient
+from huggingface_hub import InferenceClient
+from embedding_service import JinaClipEmbeddingService
+from qdrant_service import QdrantVectorService
+from advanced_rag import AdvancedRAG
+from cag_service import CAGService
+from pdf_parser import PDFIndexer
+from multimodal_pdf_parser import MultimodalPDFIndexer
+from conversation_service import ConversationService
+from tools_service import ToolsService
+from agent_service import AgentService
+from agent_chat_stream import agent_chat_stream  # NEW: Agent Streaming
+from feedback_tracking_service import FeedbackTrackingService  # NEW: Feedback tracking
+# Initialize FastAPI app
+app = FastAPI(
+    title="Event Social Media Embeddings & ChatbotRAG API",
+    description="API để embeddings, search và ChatbotRAG với Jina CLIP v2 + Qdrant + MongoDB + LLM",
+    version="2.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize services
+print("Initializing services...")
+embedding_service = JinaClipEmbeddingService(model_path="jinaai/jina-clip-v2")
+collection_name = os.getenv("COLLECTION_NAME", "event_social_media")
+qdrant_service = QdrantVectorService(
+    collection_name=collection_name,
+    vector_size=embedding_service.get_embedding_dimension()
+)
+print(f"✓ Qdrant collection: {collection_name}")
+# MongoDB connection
+mongodb_uri = os.getenv("MONGODB_URI", "mongodb+srv://truongtn7122003:7KaI9OT5KTUxWjVI@truongtn7122003.xogin4q.mongodb.net/")
+mongo_client = MongoClient(mongodb_uri)
+db = mongo_client[os.getenv("MONGODB_DB_NAME", "chatbot_rag")]
+documents_collection = db["documents"]
+chat_history_collection = db["chat_history"]
+print("✓ MongoDB connected")
+# Hugging Face token
+hf_token = os.getenv("HUGGINGFACE_TOKEN")
+if hf_token:
+    print("✓ Hugging Face token configured")
+# Initialize Advanced RAG (Best Case 2025)
+advanced_rag = AdvancedRAG(
+    embedding_service=embedding_service,
+    qdrant_service=qdrant_service
+)
+print("✓ Advanced RAG pipeline initialized (with Cross-Encoder)")
+# Initialize CAG Service (Semantic Cache)
+try:
+    cag_service = CAGService(
+        embedding_service=embedding_service,
+        cache_collection="semantic_cache",
+        vector_size=embedding_service.get_embedding_dimension(),
+        similarity_threshold=0.9,
+        ttl_hours=24
+    )
+    print("✓ CAG Service initialized (Semantic Caching enabled)")
+except Exception as e:
+    print(f"Warning: CAG Service initialization failed: {e}")
+    print("Continuing without semantic caching...")
+    cag_service = None
+# Initialize PDF Indexer
+pdf_indexer = PDFIndexer(
+    embedding_service=embedding_service,
+    qdrant_service=qdrant_service,
+    documents_collection=documents_collection
+)
+print("✓ PDF Indexer initialized")
+# Initialize Multimodal PDF Indexer
+multimodal_pdf_indexer = MultimodalPDFIndexer(
+    embedding_service=embedding_service,
+    qdrant_service=qdrant_service,
+    documents_collection=documents_collection
+)
+print("✓ Multimodal PDF Indexer initialized")
+# Initialize Conversation Service
+conversations_collection = db["conversations"]
+conversation_service = ConversationService(conversations_collection, max_history=10)
+print("✓ Conversation Service initialized")
+# Initialize Feedback Tracking Service
+feedback_tracking_collection = db["feedback_tracking"]
+feedback_tracking = FeedbackTrackingService(feedback_tracking_collection)
+print("✓ Feedback Tracking Service initialized")
+# Initialize Tools Service
+tools_service = ToolsService(
+    base_url="https://hoalacrent.io.vn/api/v0",
+    feedback_tracking=feedback_tracking
+)
+print("✓ Tools Service initialized (Function Calling enabled)")
+# Initialize Agent Service (Agentic Workflow)
+agent_service = AgentService(
+    tools_service=tools_service,
+    embedding_service=embedding_service,
+    qdrant_service=qdrant_service,
+    advanced_rag=advanced_rag,
+    hf_token=hf_token,
+    feedback_tracking=feedback_tracking  # Pass feedback tracking
+)
+print("✓ Agent Service initialized (Agentic Workflow enabled)")
+print("✓ Services initialized successfully")
+# Pydantic models for embeddings
+class SearchRequest(BaseModel):
+    text: Optional[str] = None
+    limit: int = 10
+    score_threshold: Optional[float] = None
+    text_weight: float = 0.5
+    image_weight: float = 0.5
+class SearchResponse(BaseModel):
+    id: str
+    confidence: float
+    metadata: dict
+class IndexResponse(BaseModel):
+    success: bool
+    id: str
+    message: str
+# Pydantic models for ChatbotRAG
+class ChatRequest(BaseModel):
+    message: str
+    session_id: Optional[str] = None  # Multi-turn conversation
+    user_id: Optional[str] = None  # User identifier for session tracking
+    access_token: Optional[str] = None  # NEW: For authenticated API calls (feedback mode)
+    mode: str = "sales"  # NEW: "sales" or "feedback" for agent selection
+    event_code: Optional[str] = None  # NEW: For targeted feedback on specific event
+    use_rag: bool = True
+    top_k: int = 3
+    system_message: Optional[str] = """Bạn là trợ lý AI chuyên biệt cho hệ thống quản lý sự kiện và bán vé.
+Vai trò của bạn là trả lời các câu hỏi CHÍNH XÁC dựa trên dữ liệu được cung cấp từ hệ thống.
+Quy tắc tuyệt đối:
+- CHỈ trả lời câu hỏi liên quan đến: events, social media posts, PDFs đã upload, và dữ liệu trong knowledge base
+- KHÔNG trả lời câu hỏi ngoài phạm vi (tin tức, thời tiết, toán học, lập trình, tư vấn cá nhân, v.v.)
+- Nếu câu hỏi nằm ngoài phạm vi: BẮT BUỘC trả lời "Chúng tôi không thể trả lời câu hỏi này vì nó nằm ngoài vùng application xử lí."
+- Luôn ưu tiên thông tin từ context được cung cấp"""
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.95
+    hf_token: Optional[str] = None
+    # Advanced RAG options
+    use_advanced_rag: bool = True
+    use_query_expansion: bool = True
+    use_reranking: bool = False  # Disabled - Cross-Encoder not good for Vietnamese
+    use_compression: bool = True
+    score_threshold: float = 0.5
+    # Function calling
+    enable_tools: bool = True  # Enable API tool calling
+class ChatResponse(BaseModel):
+    response: str
+    context_used: List[Dict]
+    timestamp: str
+    rag_stats: Optional[Dict] = None  # Stats from advanced RAG pipeline
+    session_id: Optional[str] = None  # Session identifier for multi-turn (auto-generated if not provided)
+    tool_calls: Optional[List[Dict]] = None  # Track API calls made
+class AddDocumentRequest(BaseModel):
+    text: str
+    metadata: Optional[Dict] = None
+class AddDocumentResponse(BaseModel):
+    success: bool
+    doc_id: str
+    message: str
+@app.get("/")
+async def root():
+    """Health check endpoint with comprehensive API documentation"""
+    return {
+        "status": "running",
+        "service": "ChatbotRAG API",
+        "version": "2.0.0",
+        "vector_db": "Qdrant",
+        "document_db": "MongoDB",
+        "endpoints": {
+            "chatbot_rag": {
+                "API endpoint": "https://minhvtt-ChatbotRAG.hf.space/",
+                "POST /chat": {
+                    "description": "Chat với AI sử dụng RAG (Retrieval-Augmented Generation)",
+                    "request": {
+                        "method": "POST",
+                        "content_type": "application/json",
+                        "body": {
+                            "message": "string (required) - User message/question",
+                            "use_rag": "boolean (optional, default: true) - Enable RAG context retrieval",
+                            "top_k": "integer (optional, default: 3) - Number of context documents to retrieve",
+                            "system_message": "string (optional) - Custom system prompt",
+                            "max_tokens": "integer (optional, default: 512) - Max response length",
+                            "temperature": "float (optional, default: 0.7, range: 0-1) - Creativity level",
+                            "top_p": "float (optional, default: 0.95) - Nucleus sampling",
+                            "hf_token": "string (optional) - Hugging Face token (fallback to env)"
+                        }
+                    },
+                    "response": {
+                        "response": "string - AI generated response",
+                        "context_used": [
+                            {
+                                "id": "string - Document ID",
+                                "confidence": "float - Relevance score",
+                                "metadata": {
+                                    "text": "string - Retrieved context"
+                                }
+                            }
+                        ],
+                        "timestamp": "string - ISO 8601 timestamp"
+                    },
+                    "example_request": {
+                        "message": "Dao có nguy hiểm không?",
+                        "use_rag": True,
+                        "top_k": 3,
+                        "temperature": 0.7
+                    },
+                    "example_response": {
+                        "response": "Dựa trên thông tin trong database, dao được phân loại là vũ khí nguy hiểm. Dao sắc có thể gây thương tích nghiêm trọng nếu không sử dụng đúng cách. Cần tuân thủ các quy định an toàn khi sử dụng.",
+                        "context_used": [
+                            {
+                                "id": "68a3fc14c853d7621e8977b5",
+                                "confidence": 0.92,
+                                "metadata": {
+                                    "text": "Vũ khí"
+                                }
+                            },
+                            {
+                                "id": "68a3fc4cc853d7621e8977b6",
+                                "confidence": 0.85,
+                                "metadata": {
+                                    "text": "Con dao sắc"
+                                }
+                            }
+                        ],
+                        "timestamp": "2025-10-13T10:30:45.123456"
+                    },
+                    "notes": [
+                        "RAG retrieves relevant context from vector DB before generating response",
+                        "LLM uses context to provide accurate, grounded answers",
+                        "Requires HUGGINGFACE_TOKEN environment variable or hf_token in request"
+                    ]
+                },
+                "POST /documents": {
+                    "description": "Add document to knowledge base for RAG",
+                    "request": {
+                        "method": "POST",
+                        "content_type": "application/json",
+                        "body": {
+                            "text": "string (required) - Document text content",
+                            "metadata": "object (optional) - Additional metadata (source, category, etc.)"
+                        }
+                    },
+                    "response": {
+                        "success": "boolean",
+                        "doc_id": "string - MongoDB ObjectId",
+                        "message": "string - Status message"
+                    },
+                    "example_request": {
+                        "text": "Để tạo event mới: Click nút 'Tạo Event' ở góc trên bên phải màn hình. Điền thông tin sự kiện bao gồm tên, ngày giờ, địa điểm. Click Lưu để hoàn tất.",
+                        "metadata": {
+                            "source": "user_guide.pdf",
+                            "section": "create_event",
+                            "page": 5,
+                            "category": "tutorial"
+                        }
+                    },
+                    "example_response": {
+                        "success": True,
+                        "doc_id": "67a9876543210fedcba98765",
+                        "message": "Document added successfully with ID: 67a9876543210fedcba98765"
+                    }
+                },
+                "POST /rag/search": {
+                    "description": "Search in knowledge base (similar to /search/text but for RAG documents)",
+                    "request": {
+                        "method": "POST",
+                        "content_type": "multipart/form-data",
+                        "body": {
+                            "query": "string (required) - Search query",
+                            "top_k": "integer (optional, default: 5) - Number of results",
+                            "score_threshold": "float (optional, default: 0.5) - Minimum relevance score"
+                        }
+                    },
+                    "response": [
+                        {
+                            "id": "string",
+                            "confidence": "float",
+                            "metadata": {
+                                "text": "string",
+                                "source": "string"
+                            }
+                        }
+                    ],
+                    "example_request": {
+                        "query": "cách tạo sự kiện mới",
+                        "top_k": 3,
+                        "score_threshold": 0.6
+                    }
+                },
+                "GET /history": {
+                    "description": "Get chat conversation history",
+                    "request": {
+                        "method": "GET",
+                        "query_params": {
+                            "limit": "integer (optional, default: 10) - Number of messages",
+                            "skip": "integer (optional, default: 0) - Pagination offset"
+                        }
+                    },
+                    "response": {
+                        "history": [
+                            {
+                                "user_message": "string",
+                                "assistant_response": "string",
+                                "context_used": "array",
+                                "timestamp": "string - ISO 8601"
+                            }
+                        ],
+                        "total": "integer - Total messages count"
+                    },
+                    "example_request": "GET /history?limit=5&skip=0",
+                    "example_response": {
+                        "history": [
+                            {
+                                "user_message": "Dao có nguy hiểm không?",
+                                "assistant_response": "Dao được phân loại là vũ khí...",
+                                "context_used": [],
+                                "timestamp": "2025-10-13T10:30:45.123456"
+                            }
+                        ],
+                        "total": 15
+                    }
+                },
+                "DELETE /documents/{doc_id}": {
+                    "description": "Delete document from knowledge base",
+                    "request": {
+                        "method": "DELETE",
+                        "path_params": {
+                            "doc_id": "string - MongoDB ObjectId"
+                        }
+                    },
+                    "response": {
+                        "success": "boolean",
+                        "message": "string"
+                    }
+                }
+            }
+        },
+        "usage_examples": {
+            "curl_chat": "curl -X POST 'http://localhost:8000/chat' -H 'Content-Type: application/json' -d '{\"message\": \"Dao có nguy hiểm không?\", \"use_rag\": true}'",
+            "python_chat": """
+import requests
+response = requests.post(
+    'http://localhost:8000/chat',
+    json={
+        'message': 'Nút tạo event ở đâu?',
+        'use_rag': True,
+        'top_k': 3
+    }
+)
+print(response.json()['response'])
+            """
+        },
+        "authentication": {
+            "embeddings_apis": "No authentication required",
+            "chat_api": "Requires HUGGINGFACE_TOKEN (env variable or request body)"
+        },
+        "rate_limits": {
+            "embeddings": "No limit",
+            "chat_with_llm": "Limited by Hugging Face API (free tier: ~1000 requests/hour)"
+        },
+        "error_codes": {
+            "400": "Bad Request - Missing required fields or invalid input",
+            "401": "Unauthorized - Invalid Hugging Face token",
+            "404": "Not Found - Document ID not found",
+            "500": "Internal Server Error - Server or database error"
+        },
+        "links": {
+            "docs": "http://localhost:8000/docs",
+            "redoc": "http://localhost:8000/redoc",
+            "openapi": "http://localhost:8000/openapi.json"
+        }
+    }
+@app.post("/index", response_model=IndexResponse)
+async def index_data(
+    id: str = Form(...),
+    text: str = Form(...),
+    image: Optional[UploadFile] = File(None)
+):
+    """
+    Index data vào vector database
+    Body:
+    - id: Document ID (event ID, post ID, etc.)
+    - text: Text content (tiếng Việt supported)
+    - image: Image file (optional)
+    Returns:
+    - success: True/False
+    - id: Document ID
+    - message: Status message
+    """
+    try:
+        # Prepare embeddings
+        text_embedding = None
+        image_embedding = None
+        # Encode text (tiếng Việt)
+        if text and text.strip():
+            text_embedding = embedding_service.encode_text(text)
+        # Encode image nếu có
+        if image:
+            image_bytes = await image.read()
+            pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+            image_embedding = embedding_service.encode_image(pil_image)
+        # Combine embeddings
+        if text_embedding is not None and image_embedding is not None:
+            # Average của text và image embeddings
+            combined_embedding = np.mean([text_embedding, image_embedding], axis=0)
+        elif text_embedding is not None:
+            combined_embedding = text_embedding
+        elif image_embedding is not None:
+            combined_embedding = image_embedding
+        else:
+            raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất text hoặc image")
+        # Normalize
+        combined_embedding = combined_embedding / np.linalg.norm(combined_embedding, axis=1, keepdims=True)
+        # Index vào Qdrant
+        metadata = {
+            "text": text,
+            "has_image": image is not None,
+            "image_filename": image.filename if image else None
+        }
+        result = qdrant_service.index_data(
+            doc_id=id,
+            embedding=combined_embedding,
+            metadata=metadata
+        )
+        return IndexResponse(
+            success=True,
+            id=result["original_id"],  # Trả về MongoDB ObjectId
+            message=f"Đã index thành công document {result['original_id']} (Qdrant UUID: {result['qdrant_id']})"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi index: {str(e)}")
+@app.post("/search", response_model=List[SearchResponse])
+async def search(
+    text: Optional[str] = Form(None),
+    image: Optional[UploadFile] = File(None),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None),
+    text_weight: float = Form(0.5),
+    image_weight: float = Form(0.5)
+):
+    """
+    Search similar documents bằng text và/hoặc image
+    Body:
+    - text: Query text (tiếng Việt supported)
+    - image: Query image (optional)
+    - limit: Số lượng kết quả (default: 10)
+    - score_threshold: Minimum confidence score (0-1)
+    - text_weight: Weight cho text search (default: 0.5)
+    - image_weight: Weight cho image search (default: 0.5)
+    Returns:
+    - List of results với id, confidence, và metadata
+    """
+    try:
+        # Prepare query embeddings
+        text_embedding = None
+        image_embedding = None
+        # Encode text query
+        if text and text.strip():
+            text_embedding = embedding_service.encode_text(text)
+        # Encode image query
+        if image:
+            image_bytes = await image.read()
+            pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+            image_embedding = embedding_service.encode_image(pil_image)
+        # Validate input
+        if text_embedding is None and image_embedding is None:
+            raise HTTPException(status_code=400, detail="Phải cung cấp ít nhất text hoặc image để search")
+        # Hybrid search với Qdrant
+        results = qdrant_service.hybrid_search(
+            text_embedding=text_embedding,
+            image_embedding=image_embedding,
+            text_weight=text_weight,
+            image_weight=image_weight,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256  # High accuracy search
+        )
+        # Format response
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.post("/search/text", response_model=List[SearchResponse])
+async def search_by_text(
+    text: str = Form(...),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None)
+):
+    """
+    Search chỉ bằng text (tiếng Việt)
+    Body:
+    - text: Query text (tiếng Việt)
+    - limit: Số lượng kết quả
+    - score_threshold: Minimum confidence score
+    Returns:
+    - List of results
+    """
+    try:
+        # Encode text
+        text_embedding = embedding_service.encode_text(text)
+        # Search
+        results = qdrant_service.search(
+            query_embedding=text_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.post("/search/image", response_model=List[SearchResponse])
+async def search_by_image(
+    image: UploadFile = File(...),
+    limit: int = Form(10),
+    score_threshold: Optional[float] = Form(None)
+):
+    """
+    Search chỉ bằng image
+    Body:
+    - image: Query image
+    - limit: Số lượng kết quả
+    - score_threshold: Minimum confidence score
+    Returns:
+    - List of results
+    """
+    try:
+        # Encode image
+        image_bytes = await image.read()
+        pil_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+        image_embedding = embedding_service.encode_image(pil_image)
+        # Search
+        results = qdrant_service.search(
+            query_embedding=image_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=256
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi search: {str(e)}")
+@app.delete("/delete/{doc_id}")
+async def delete_document(doc_id: str):
+    """
+    Delete document by ID (MongoDB ObjectId hoặc UUID)
+    Args:
+    - doc_id: Document ID to delete
+    Returns:
+    - Success message
+    """
+    try:
+        qdrant_service.delete_by_id(doc_id)
+        return {"success": True, "message": f"Đã xóa document {doc_id}"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi xóa: {str(e)}")
+@app.get("/document/{doc_id}")
+async def get_document(doc_id: str):
+    """
+    Get document by ID (MongoDB ObjectId hoặc UUID)
+    Args:
+    - doc_id: Document ID (MongoDB ObjectId)
+    Returns:
+    - Document data
+    """
+    try:
+        doc = qdrant_service.get_by_id(doc_id)
+        if doc:
+            return {
+                "success": True,
+                "data": doc
+            }
+        raise HTTPException(status_code=404, detail=f"Không tìm thấy document {doc_id}")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi get document: {str(e)}")
+@app.get("/stats")
+async def get_stats():
+    """
+    Lấy thông tin thống kê collection
+    Returns:
+    - Collection statistics
+    """
+    try:
+        info = qdrant_service.get_collection_info()
+        return info
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Lỗi khi lấy stats: {str(e)}")
+# ============================================
+# ChatbotRAG Endpoints - DEPRECATED
+# USE /agent/chat INSTEAD
+# ============================================
+# Old endpoints removed - now using Agentic Workflow via /agent/chat
+@app.get("/chat/history/{session_id}")
+async def get_conversation_history(session_id: str, include_metadata: bool = False):
+    """
+    Get conversation history for a session
+    Args:
+        session_id: Session identifier
+        include_metadata: Include metadata (rag_stats, tool_calls) in response
+    Returns:
+        List of messages with role and content
+    Example:
+    ```
+    GET /chat/history/abc-123?include_metadata=true
+    ```
+    """
+    if not conversation_service.session_exists(session_id):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Session {session_id} not found or has expired"
+        )
+    history = conversation_service.get_conversation_history(
+        session_id,
+        include_metadata=include_metadata
+    )
+    session_info = conversation_service.get_session_info(session_id)
+    return {
+        "session_id": session_id,
+        "message_count": len(history),
+        "messages": history,
+        "created_at": session_info.get("created_at") if session_info else None,
+        "updated_at": session_info.get("updated_at") if session_info else None
+    }
+@app.get("/chat/sessions")
+async def list_sessions(
+    limit: int = 50,
+    skip: int = 0,
+    sort_by: str = "updated_at",
+    user_id: Optional[str] = None  # NEW: Filter by user
+):
+    """
+    List all conversation sessions
+    Query Parameters:
+        limit: Maximum sessions to return (default: 50, max: 100)
+        skip: Number of sessions to skip for pagination (default: 0)
+        sort_by: Field to sort by - 'created_at' or 'updated_at' (default: updated_at)
+        user_id: Filter sessions by user_id (optional)
+    Returns:
+        List of sessions with metadata and message counts
+    Examples:
+    ```
+    GET /chat/sessions  # All sessions
+    GET /chat/sessions?user_id=user_123  # Only user_123's sessions
+    GET /chat/sessions?limit=20&skip=0&sort_by=updated_at
+    ```
+    """
+    # Validate limit
+    if limit > 100:
+        limit = 100
+    if limit < 1:
+        limit = 1
+    # Validate sort_by
+    if sort_by not in ["created_at", "updated_at"]:
+        raise HTTPException(
+            status_code=400,
+            detail="sort_by must be 'created_at' or 'updated_at'"
+        )
+    sessions = conversation_service.list_sessions(
+        limit=limit,
+        skip=skip,
+        sort_by=sort_by,
+        descending=True,
+        user_id=user_id  # NEW: Pass user_id filter
+    )
+    total_sessions = conversation_service.count_sessions(user_id=user_id)  # NEW: Count with filter
+    return {
+        "total": total_sessions,
+        "limit": limit,
+        "skip": skip,
+        "count": len(sessions),
+        "user_id": user_id,  # NEW: Include filter in response
+        "sessions": sessions
+    }
+@app.get("/scenarios")
+async def list_scenarios():
+    """
+    Get list of all available scenarios for proactive chat
+    FE use case:
+    - Random pick scenario để bắt đầu chat chủ động
+    - Hiển thị menu các scenario available
+    Returns:
+        List of scenarios with metadata
+    Example:
+    ```
+    GET /scenarios
+    Response:
+    {
+      "scenarios": [
+        {
+          "scenario_id": "price_inquiry",
+          "name": "Hỏi giá vé",
+          "description": "Tư vấn giá vé và gửi PDF",
+          "triggers": ["giá vé", "bao nhiêu"],
+          "category": "sales"
+        },
+        ...
+      ]
+    }
+    ```
+    """
+    scenarios_list = []
+    for scenario_id, scenario_data in scenario_engine.scenarios.items():
+        scenarios_list.append({
+            "scenario_id": scenario_id,
+            "name": scenario_data.get("name", scenario_id),
+            "description": scenario_data.get("description", ""),
+            "triggers": scenario_data.get("triggers", []),
+            "category": scenario_data.get("category", "general"),
+            "priority": scenario_data.get("priority", "normal"),
+            "estimated_duration": scenario_data.get("estimated_duration", "unknown")
+        })
+    return {
+        "total": len(scenarios_list),
+        "scenarios": scenarios_list
+    }
+@app.post("/scenarios/{scenario_id}/start")
+async def start_scenario_proactive(
+    scenario_id: str,
+    request_body: Optional[Dict] = None
+):
+    """
+    Start a scenario proactively with optional initial data
+    Use cases:
+    1. FE picks random scenario
+    2. BE triggers scenario based on user action (after purchase, exit intent, etc.)
+    3. Inject context data (event_name, mood, etc.)
+    Example 1 - Simple start:
+    ```
+    POST /scenarios/price_inquiry/start
+    {}
+    Response:
+    {
+      "session_id": "abc-123",
+      "message": "Hello 👋 Bạn muốn xem giá..."
+    }
+    ```
+    Example 2 - With initial data (post-event feedback):
+    ```
+    POST /scenarios/post_event_feedback/start
+    {
+      "initial_data": {
+        "event_name": "Hòa Nhạc Mùa Xuân",
+        "event_date": "2024-11-29",
+        "event_id": "evt_123"
+      },
+      "session_id": "existing-session",  // optional
+      "user_id": "user_456"               // optional
+    }
+    Response:
+    {
+      "session_id": "abc-123",
+      "message": "Cảm ơn bạn đã tham dự *Hòa Nhạc Mùa Xuân* hôm qua!"
+    }
+    ```
+    Example 3 - Mood recommendation:
+    ```
+    POST /scenarios/mood_recommendation/start
+    {
+      "initial_data": {
+        "mood": "chill",
+        "preferred_genre": "acoustic"
+      }
+    }
+    ```
+    """
+    # Parse request body
+    body = request_body or {}
+    initial_data = body.get("initial_data", {})
+    session_id = body.get("session_id")
+    user_id = body.get("user_id")
+    # Create or use existing session
+    if not session_id:
+        session_id = conversation_service.create_session(
+            metadata={"started_by": "proactive", "scenario": scenario_id},
+            user_id=user_id
+        )
+    # Start scenario with initial data
+    result = scenario_engine.start_scenario(scenario_id, initial_data)
+    if result.get("new_state"):
+        conversation_service.set_scenario_state(session_id, result["new_state"])
+    # Save bot message to history
+    conversation_service.add_message(
+        session_id,
+        "assistant",
+        result["message"],
+        metadata={"proactive": True, "scenario": scenario_id, "initial_data": initial_data}
+    )
+    return {
+        "session_id": session_id,
+        "scenario_id": scenario_id,
+        "message": result["message"],
+        "scenario_active": True,
+        "proactive": True
+    }
+@app.post("/chat/clear-session")
+async def clear_chat_session(session_id: str):
+    """
+    Clear conversation history for a session
+    Args:
+        session_id: Session identifier to clear
+    Returns:
+        Success message
+    Example:
+    ```
+    POST /chat/clear-session?session_id=abc-123
+    ```
+    """
+    success = conversation_service.clear_session(session_id)
+    if success:
+        return {
+            "success": True,
+            "message": f"Session {session_id} cleared successfully"
+        }
+    else:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Session {session_id} not found or already cleared"
+        )
+@app.get("/chat/session/{session_id}")
+async def get_session_info(session_id: str):
+    """
+    Get metadata about a conversation session
+    Args:
+        session_id: Session identifier
+    Returns:
+        Session info including creation time and message count
+    Example:
+    ```
+    GET /chat/session/abc-123
+    ```
+    """
+    session = conversation_service.get_session_info(session_id)
+    if not session:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Session {session_id} not found"
+        )
+    # Get message count
+    history = conversation_service.get_conversation_history(
+        session_id,
+        include_metadata=True
+    )
+    return {
+        "session_id": session["session_id"],
+        "created_at": session["created_at"],
+        "updated_at": session["updated_at"],
+        "message_count": len(history),
+        "metadata": session.get("metadata", {})
+    }
+@app.post("/documents", response_model=AddDocumentResponse)
+async def add_document(request: AddDocumentRequest):
+    """
+    Add document to knowledge base
+    Body:
+    - text: Document text
+    - metadata: Additional metadata (optional)
+    Returns:
+    - success: True/False
+    - doc_id: MongoDB document ID
+    - message: Status message
+    """
+    try:
+        # Save to MongoDB
+        doc_data = {
+            "text": request.text,
+            "metadata": request.metadata or {},
+            "created_at": datetime.utcnow()
+        }
+        result = documents_collection.insert_one(doc_data)
+        doc_id = str(result.inserted_id)
+        # Generate embedding
+        embedding = embedding_service.encode_text(request.text)
+        # Index to Qdrant
+        qdrant_service.index_data(
+            doc_id=doc_id,
+            embedding=embedding,
+            metadata={
+                "text": request.text,
+                "source": "api",
+                **(request.metadata or {})
+            }
+        )
+        return AddDocumentResponse(
+            success=True,
+            doc_id=doc_id,
+            message=f"Document added successfully with ID: {doc_id}"
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.post("/documents/upload/pdf")
+async def upload_pdf(
+    file: UploadFile = File(...),
+    metadata: Optional[str] = Form(None)
+):
+    """
+    Upload PDF file and index into knowledge base
+    Features:
+    - Extracts text from PDF
+    - Detects image URLs in text/markdown
+    - Chunks content intelligently
+    - Indexes all chunks into Qdrant for RAG
+    Args:
+        file: PDF file to upload
+        metadata: Optional JSON string with metadata (title, author, etc.)
+    Returns:
+        Success status, document ID, and indexing stats
+    Example:
+    ```bash
+    curl -X POST http://localhost:8000/documents/upload/pdf \
+      -F "file=@document.pdf" \
+      -F 'metadata={"title": "User Guide", "category": "documentation"}'
+    ```
+    """
+    try:
+        # Validate file type
+        if not file.filename.endswith('.pdf'):
+            raise HTTPException(
+                status_code=400,
+                detail="Only PDF files are supported"
+            )
+        # Read file bytes
+        pdf_bytes = await file.read()
+        # Parse metadata if provided
+        import json
+        doc_metadata = {}
+        if metadata:
+            try:
+                doc_metadata = json.loads(metadata)
+            except json.JSONDecodeError:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Invalid metadata JSON format"
+                )
+        # Generate unique document ID
+        from bson import ObjectId
+        document_id = str(ObjectId())
+        # Add upload timestamp
+        doc_metadata['uploaded_at'] = datetime.utcnow().isoformat()
+        doc_metadata['original_filename'] = file.filename
+        # Index PDF using multimodal parser
+        result = multimodal_pdf_indexer.index_pdf_bytes(
+            pdf_bytes=pdf_bytes,
+            document_id=document_id,
+            filename=file.filename,
+            document_metadata=doc_metadata
+        )
+        return {
+            "success": True,
+            "document_id": document_id,
+            "filename": file.filename,
+            "chunks_indexed": result['chunks_indexed'],
+            "images_found": result.get('images_found', 0),
+            "message": f"PDF uploaded and indexed: {result['chunks_indexed']} chunks, {result.get('images_found', 0)} image URLs found"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error processing PDF: {str(e)}"
+        )
+@app.post("/rag/search", response_model=List[SearchResponse])
+async def rag_search(
+    query: str = Form(...),
+    top_k: int = Form(5),
+    score_threshold: Optional[float] = Form(0.5)
+):
+    """
+    Search in knowledge base
+    Body:
+    - query: Search query
+    - top_k: Number of results (default: 5)
+    - score_threshold: Minimum score (default: 0.5)
+    Returns:
+    - results: List of matching documents
+    """
+    try:
+        # Generate query embedding
+        query_embedding = embedding_service.encode_text(query)
+        # Search in Qdrant
+        results = qdrant_service.search(
+            query_embedding=query_embedding,
+            limit=top_k,
+            score_threshold=score_threshold
+        )
+        return [
+            SearchResponse(
+                id=result["id"],
+                confidence=result["confidence"],
+                metadata=result["metadata"]
+            )
+            for result in results
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.get("/history")
+async def get_history(limit: int = 10, skip: int = 0):
+    """
+    Get chat history
+    Query params:
+    - limit: Number of messages to return (default: 10)
+    - skip: Number of messages to skip (default: 0)
+    Returns:
+    - history: List of chat messages
+    """
+    try:
+        history = list(
+            chat_history_collection
+            .find({}, {"_id": 0})
+            .sort("timestamp", -1)
+            .skip(skip)
+            .limit(limit)
+        )
+        # Convert datetime to string
+        for msg in history:
+            if "timestamp" in msg:
+                msg["timestamp"] = msg["timestamp"].isoformat()
+        return {
+            "history": history,
+            "total": chat_history_collection.count_documents({})
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+@app.delete("/documents/{doc_id}")
+async def delete_document_from_kb(doc_id: str):
+    """
+    Delete document from knowledge base
+    Args:
+    - doc_id: Document ID (MongoDB ObjectId)
+    Returns:
+    - success: True/False
+    - message: Status message
+    """
+    try:
+        # Delete from MongoDB
+        result = documents_collection.delete_one({"_id": doc_id})
+        # Delete from Qdrant
+        if result.deleted_count > 0:
+            qdrant_service.delete_by_id(doc_id)
+            return {"success": True, "message": f"Document {doc_id} deleted from knowledge base"}
+        else:
+            raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
+# ===================================
+# AGENT CHAT STREAMING ENDPOINT (NEW)
+# ===================================
+@app.post("/agent/chat")
+async def agent_chat(request: ChatRequest):
+    """
+    🤖 **Agentic Chatbot với SSE Streaming**
+    **Modes:**
+    - `sales`: Sales Agent - Tư vấn sự kiện, chốt sale
+    - `feedback`: Feedback Agent - CSKH, thu thập đánh giá
+    **Features:**
+    - ✅ LLM-driven conversation (no hard-coded scenarios)
+    - ✅ Automatic tool calling (search, get_event_details, save_lead...)
+    - ✅ Real-time SSE streaming
+    - ✅ Purchase history check (for feedback mode)
+    **Example:**
+    ```
+    POST /agent/chat
+    {
+      "message": "Tìm event cho tôi",
+      "mode": "sales",
+      "user_id": "user_123"
+    }
+    ```
+    **SSE Stream:**
+    ```
+    event: status
+    data: Đang tư vấn...
+    event: token
+    data: Hello
+    event: token
+    data:  👋
+    event: done
+    data: {"session_id": "...", "mode": "sales"}
+    ```
+    """
+    return StreamingResponse(
+        agent_chat_stream(
+            request=request,
+            agent_service=agent_service,
+            conversation_service=conversation_service
+        ),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"
+        }
+    )
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info"
+    )

multimodal_pdf_parser.py ADDED Viewed

	@@ -0,0 +1,390 @@

+"""
+Enhanced Multimodal PDF Parser for PDFs with Text + Image URLs
+Extracts text, detects image URLs, and links them together
+"""
+import pypdfium2 as pdfium
+from typing import List, Dict, Optional, Tuple
+import re
+from dataclasses import dataclass, field
+@dataclass
+class MultimodalChunk:
+    """Represents a chunk with text and associated images"""
+    text: str
+    page_number: int
+    chunk_index: int
+    image_urls: List[str] = field(default_factory=list)
+    metadata: Dict = field(default_factory=dict)
+class MultimodalPDFParser:
+    """
+    Enhanced PDF Parser that extracts text and image URLs
+    Perfect for user guides with screenshots and visual instructions
+    """
+    def __init__(
+        self,
+        chunk_size: int = 500,
+        chunk_overlap: int = 50,
+        min_chunk_size: int = 50,
+        extract_images: bool = True
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.min_chunk_size = min_chunk_size
+        self.extract_images = extract_images
+        # URL patterns
+        self.url_patterns = [
+            # Standard URLs
+            r'https?://[^\s<>"{}|\\^`\[\]]+',
+            # Markdown images: ![alt](url)
+            r'!\[.*?\]\((https?://[^\s)]+)\)',
+            # HTML images: <img src="url">
+            r'<img[^>]+src=["\']([^"\']+)["\']',
+            # Direct image extensions
+            r'https?://[^\s<>"{}|\\^`\[\]]+\.(?:jpg|jpeg|png|gif|bmp|svg|webp)',
+        ]
+    def extract_image_urls(self, text: str) -> List[str]:
+        """
+        Extract all image URLs from text
+        Args:
+            text: Text content
+        Returns:
+            List of image URLs found
+        """
+        urls = []
+        for pattern in self.url_patterns:
+            matches = re.findall(pattern, text, re.IGNORECASE)
+            urls.extend(matches)
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_urls = []
+        for url in urls:
+            if url not in seen:
+                seen.add(url)
+                unique_urls.append(url)
+        return unique_urls
+    def extract_text_from_pdf(self, pdf_path: str) -> Dict[int, Tuple[str, List[str]]]:
+        """
+        Extract text and image URLs from PDF
+        Args:
+            pdf_path: Path to PDF file
+        Returns:
+            Dictionary mapping page number to (text, image_urls) tuple
+        """
+        pdf_pages = {}
+        try:
+            pdf = pdfium.PdfDocument(pdf_path)
+            for page_num in range(len(pdf)):
+                page = pdf[page_num]
+                textpage = page.get_textpage()
+                text = textpage.get_text_range()
+                # Clean text
+                text = self._clean_text(text)
+                # Extract image URLs if enabled
+                image_urls = []
+                if self.extract_images:
+                    image_urls = self.extract_image_urls(text)
+                pdf_pages[page_num + 1] = (text, image_urls)
+            return pdf_pages
+        except Exception as e:
+            raise Exception(f"Error reading PDF: {str(e)}")
+    def _clean_text(self, text: str) -> str:
+        """Clean extracted text"""
+        # Remove excessive whitespace
+        text = re.sub(r'\s+', ' ', text)
+        # Remove special characters
+        text = text.replace('\x00', '')
+        return text.strip()
+    def chunk_text_with_images(
+        self,
+        text: str,
+        image_urls: List[str],
+        page_number: int
+    ) -> List[MultimodalChunk]:
+        """
+        Split text into chunks and associate images with relevant chunks
+        Args:
+            text: Text to chunk
+            image_urls: Image URLs from the page
+            page_number: Page number
+        Returns:
+            List of MultimodalChunk objects
+        """
+        # Split into words
+        words = text.split()
+        if len(words) < self.min_chunk_size:
+            if len(words) > 0:
+                return [MultimodalChunk(
+                    text=text,
+                    page_number=page_number,
+                    chunk_index=0,
+                    image_urls=image_urls,  # All images go to single chunk
+                    metadata={'page': page_number, 'chunk': 0}
+                )]
+            return []
+        chunks = []
+        chunk_index = 0
+        start = 0
+        # Calculate how to distribute images across chunks
+        images_per_chunk = len(image_urls) // max(1, len(words) // self.chunk_size) if image_urls else 0
+        image_index = 0
+        while start < len(words):
+            end = min(start + self.chunk_size, len(words))
+            chunk_words = words[start:end]
+            chunk_text = ' '.join(chunk_words)
+            # Assign images to this chunk
+            chunk_images = []
+            if image_urls:
+                # Simple strategy: distribute images evenly
+                # or detect if URL appears in chunk text
+                for url in image_urls:
+                    if url in chunk_text:
+                        chunk_images.append(url)
+                # If no URLs found in text, distribute evenly
+                if not chunk_images and image_index < len(image_urls):
+                    # Assign remaining images to chunks
+                    num_imgs = min(images_per_chunk + 1, len(image_urls) - image_index)
+                    chunk_images = image_urls[image_index:image_index + num_imgs]
+                    image_index += num_imgs
+            chunks.append(MultimodalChunk(
+                text=chunk_text,
+                page_number=page_number,
+                chunk_index=chunk_index,
+                image_urls=chunk_images,
+                metadata={
+                    'page': page_number,
+                    'chunk': chunk_index,
+                    'start_word': start,
+                    'end_word': end,
+                    'has_images': len(chunk_images) > 0,
+                    'num_images': len(chunk_images)
+                }
+            ))
+            chunk_index += 1
+            start = end - self.chunk_overlap
+            if start >= len(words) - self.min_chunk_size:
+                break
+        return chunks
+    def parse_pdf(
+        self,
+        pdf_path: str,
+        document_metadata: Optional[Dict] = None
+    ) -> List[MultimodalChunk]:
+        """
+        Parse PDF into multimodal chunks
+        Args:
+            pdf_path: Path to PDF file
+            document_metadata: Additional metadata
+        Returns:
+            List of MultimodalChunk objects
+        """
+        pages_data = self.extract_text_from_pdf(pdf_path)
+        all_chunks = []
+        for page_num, (text, image_urls) in pages_data.items():
+            chunks = self.chunk_text_with_images(text, image_urls, page_num)
+            # Add document metadata
+            if document_metadata:
+                for chunk in chunks:
+                    chunk.metadata.update(document_metadata)
+            all_chunks.extend(chunks)
+        return all_chunks
+    def parse_pdf_bytes(
+        self,
+        pdf_bytes: bytes,
+        document_metadata: Optional[Dict] = None
+    ) -> List[MultimodalChunk]:
+        """Parse PDF from bytes"""
+        import tempfile
+        import os
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+            tmp.write(pdf_bytes)
+            tmp_path = tmp.name
+        try:
+            chunks = self.parse_pdf(tmp_path, document_metadata)
+            return chunks
+        finally:
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+class MultimodalPDFIndexer:
+    """Index multimodal PDF chunks into RAG system"""
+    def __init__(self, embedding_service, qdrant_service, documents_collection):
+        self.embedding_service = embedding_service
+        self.qdrant_service = qdrant_service
+        self.documents_collection = documents_collection
+        self.parser = MultimodalPDFParser()
+    def index_pdf(
+        self,
+        pdf_path: str,
+        document_id: str,
+        document_metadata: Optional[Dict] = None
+    ) -> Dict:
+        """Index PDF with image URLs"""
+        chunks = self.parser.parse_pdf(pdf_path, document_metadata)
+        indexed_count = 0
+        chunk_ids = []
+        total_images = 0
+        for chunk in chunks:
+            chunk_id = f"{document_id}_p{chunk.page_number}_c{chunk.chunk_index}"
+            # Generate embedding (text-based)
+            embedding = self.embedding_service.encode_text(chunk.text)
+            # Prepare metadata with image URLs
+            metadata = {
+                'text': chunk.text,
+                'document_id': document_id,
+                'page': chunk.page_number,
+                'chunk_index': chunk.chunk_index,
+                'source': 'pdf',
+                'has_images': len(chunk.image_urls) > 0,
+                'image_urls': chunk.image_urls,  # Store image URLs!
+                'num_images': len(chunk.image_urls),
+                **chunk.metadata
+            }
+            # Index to Qdrant
+            self.qdrant_service.index_data(
+                doc_id=chunk_id,
+                embedding=embedding,
+                metadata=metadata
+            )
+            chunk_ids.append(chunk_id)
+            indexed_count += 1
+            total_images += len(chunk.image_urls)
+        # Save document info
+        doc_info = {
+            'document_id': document_id,
+            'type': 'multimodal_pdf',
+            'file_path': pdf_path,
+            'num_chunks': indexed_count,
+            'total_images': total_images,
+            'chunk_ids': chunk_ids,
+            'metadata': document_metadata or {}
+        }
+        self.documents_collection.insert_one(doc_info)
+        return {
+            'success': True,
+            'document_id': document_id,
+            'chunks_indexed': indexed_count,
+            'images_found': total_images,
+            'chunk_ids': chunk_ids[:5]
+        }
+    def index_pdf_bytes(
+        self,
+        pdf_bytes: bytes,
+        document_id: str,
+        filename: str,
+        document_metadata: Optional[Dict] = None
+    ) -> Dict:
+        """Index PDF from bytes"""
+        metadata = document_metadata or {}
+        metadata['filename'] = filename
+        chunks = self.parser.parse_pdf_bytes(pdf_bytes, metadata)
+        indexed_count = 0
+        chunk_ids = []
+        total_images = 0
+        for chunk in chunks:
+            chunk_id = f"{document_id}_p{chunk.page_number}_c{chunk.chunk_index}"
+            embedding = self.embedding_service.encode_text(chunk.text)
+            metadata = {
+                'text': chunk.text,
+                'document_id': document_id,
+                'page': chunk.page_number,
+                'chunk_index': chunk.chunk_index,
+                'source': 'multimodal_pdf',
+                'filename': filename,
+                'has_images': len(chunk.image_urls) > 0,
+                'image_urls': chunk.image_urls,
+                'num_images': len(chunk.image_urls),
+                **chunk.metadata
+            }
+            self.qdrant_service.index_data(
+                doc_id=chunk_id,
+                embedding=embedding,
+                metadata=metadata
+            )
+            chunk_ids.append(chunk_id)
+            indexed_count += 1
+            total_images += len(chunk.image_urls)
+        doc_info = {
+            'document_id': document_id,
+            'type': 'multimodal_pdf',
+            'filename': filename,
+            'num_chunks': indexed_count,
+            'total_images': total_images,
+            'chunk_ids': chunk_ids,
+            'metadata': metadata
+        }
+        self.documents_collection.insert_one(doc_info)
+        return {
+            'success': True,
+            'document_id': document_id,
+            'filename': filename,
+            'chunks_indexed': indexed_count,
+            'images_found': total_images,
+            'chunk_ids': chunk_ids[:5]
+        }

pdf_parser.py ADDED Viewed

	@@ -0,0 +1,371 @@

+"""
+PDF Parser Service for RAG Chatbot
+Extracts text from PDF and splits into chunks for indexing
+"""
+import pypdfium2 as pdfium
+from typing import List, Dict, Optional
+import re
+from dataclasses import dataclass
+@dataclass
+class PDFChunk:
+    """Represents a chunk of text from PDF"""
+    text: str
+    page_number: int
+    chunk_index: int
+    metadata: Dict
+class PDFParser:
+    """Parse PDF files and prepare for RAG indexing"""
+    def __init__(
+        self,
+        chunk_size: int = 500,  # words per chunk
+        chunk_overlap: int = 50,  # words overlap between chunks
+        min_chunk_size: int = 50  # minimum words in a chunk
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.min_chunk_size = min_chunk_size
+    def extract_text_from_pdf(self, pdf_path: str) -> Dict[int, str]:
+        """
+        Extract text from PDF file
+        Args:
+            pdf_path: Path to PDF file
+        Returns:
+            Dictionary mapping page number to text content
+        """
+        pdf_text = {}
+        try:
+            pdf = pdfium.PdfDocument(pdf_path)
+            for page_num in range(len(pdf)):
+                page = pdf[page_num]
+                textpage = page.get_textpage()
+                text = textpage.get_text_range()
+                # Clean text
+                text = self._clean_text(text)
+                pdf_text[page_num + 1] = text  # 1-indexed pages
+            return pdf_text
+        except Exception as e:
+            raise Exception(f"Error reading PDF: {str(e)}")
+    def _clean_text(self, text: str) -> str:
+        """Clean extracted text"""
+        # Remove excessive whitespace
+        text = re.sub(r'\s+', ' ', text)
+        # Remove special characters that might cause issues
+        text = text.replace('\x00', '')
+        return text.strip()
+    def chunk_text(self, text: str, page_number: int) -> List[PDFChunk]:
+        """
+        Split text into overlapping chunks
+        Args:
+            text: Text to chunk
+            page_number: Page number this text came from
+        Returns:
+            List of PDFChunk objects
+        """
+        # Split into words
+        words = text.split()
+        if len(words) < self.min_chunk_size:
+            # Text too short, return as single chunk
+            if len(words) > 0:
+                return [PDFChunk(
+                    text=text,
+                    page_number=page_number,
+                    chunk_index=0,
+                    metadata={'page': page_number, 'chunk': 0}
+                )]
+            return []
+        chunks = []
+        chunk_index = 0
+        start = 0
+        while start < len(words):
+            # Get chunk
+            end = min(start + self.chunk_size, len(words))
+            chunk_words = words[start:end]
+            chunk_text = ' '.join(chunk_words)
+            chunks.append(PDFChunk(
+                text=chunk_text,
+                page_number=page_number,
+                chunk_index=chunk_index,
+                metadata={
+                    'page': page_number,
+                    'chunk': chunk_index,
+                    'start_word': start,
+                    'end_word': end
+                }
+            ))
+            chunk_index += 1
+            # Move start position with overlap
+            start = end - self.chunk_overlap
+            # Avoid infinite loop
+            if start >= len(words) - self.min_chunk_size:
+                break
+        return chunks
+    def parse_pdf(
+        self,
+        pdf_path: str,
+        document_metadata: Optional[Dict] = None
+    ) -> List[PDFChunk]:
+        """
+        Parse entire PDF into chunks
+        Args:
+            pdf_path: Path to PDF file
+            document_metadata: Additional metadata for the document
+        Returns:
+            List of all chunks from the PDF
+        """
+        # Extract text from all pages
+        pages_text = self.extract_text_from_pdf(pdf_path)
+        # Chunk each page
+        all_chunks = []
+        for page_num, text in pages_text.items():
+            chunks = self.chunk_text(text, page_num)
+            # Add document metadata
+            if document_metadata:
+                for chunk in chunks:
+                    chunk.metadata.update(document_metadata)
+            all_chunks.extend(chunks)
+        return all_chunks
+    def parse_pdf_bytes(
+        self,
+        pdf_bytes: bytes,
+        document_metadata: Optional[Dict] = None
+    ) -> List[PDFChunk]:
+        """
+        Parse PDF from bytes (for uploaded files)
+        Args:
+            pdf_bytes: PDF file as bytes
+            document_metadata: Additional metadata
+        Returns:
+            List of chunks
+        """
+        import tempfile
+        import os
+        # Save to temp file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
+            tmp.write(pdf_bytes)
+            tmp_path = tmp.name
+        try:
+            chunks = self.parse_pdf(tmp_path, document_metadata)
+            return chunks
+        finally:
+            # Clean up temp file
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+    def get_pdf_info(self, pdf_path: str) -> Dict:
+        """
+        Get basic info about PDF
+        Args:
+            pdf_path: Path to PDF file
+        Returns:
+            Dictionary with PDF information
+        """
+        try:
+            pdf = pdfium.PdfDocument(pdf_path)
+            info = {
+                'num_pages': len(pdf),
+                'file_path': pdf_path,
+            }
+            return info
+        except Exception as e:
+            raise Exception(f"Error reading PDF info: {str(e)}")
+class PDFIndexer:
+    """Index PDF chunks into RAG system"""
+    def __init__(self, embedding_service, qdrant_service, documents_collection):
+        self.embedding_service = embedding_service
+        self.qdrant_service = qdrant_service
+        self.documents_collection = documents_collection
+        self.parser = PDFParser()
+    def index_pdf(
+        self,
+        pdf_path: str,
+        document_id: str,
+        document_metadata: Optional[Dict] = None
+    ) -> Dict:
+        """
+        Index entire PDF into RAG system
+        Args:
+            pdf_path: Path to PDF file
+            document_id: Unique ID for this document
+            document_metadata: Additional metadata (title, author, etc.)
+        Returns:
+            Indexing results
+        """
+        # Parse PDF
+        chunks = self.parser.parse_pdf(pdf_path, document_metadata)
+        # Index each chunk
+        indexed_count = 0
+        chunk_ids = []
+        for chunk in chunks:
+            # Generate unique ID for chunk
+            chunk_id = f"{document_id}_p{chunk.page_number}_c{chunk.chunk_index}"
+            # Generate embedding
+            embedding = self.embedding_service.encode_text(chunk.text)
+            # Prepare metadata
+            metadata = {
+                'text': chunk.text,
+                'document_id': document_id,
+                'page': chunk.page_number,
+                'chunk_index': chunk.chunk_index,
+                'source': 'pdf',
+                **chunk.metadata
+            }
+            # Index to Qdrant
+            self.qdrant_service.index_data(
+                doc_id=chunk_id,
+                embedding=embedding,
+                metadata=metadata
+            )
+            chunk_ids.append(chunk_id)
+            indexed_count += 1
+        # Save document info to MongoDB
+        doc_info = {
+            'document_id': document_id,
+            'type': 'pdf',
+            'file_path': pdf_path,
+            'num_chunks': indexed_count,
+            'chunk_ids': chunk_ids,
+            'metadata': document_metadata or {},
+            'pdf_info': self.parser.get_pdf_info(pdf_path)
+        }
+        self.documents_collection.insert_one(doc_info)
+        return {
+            'success': True,
+            'document_id': document_id,
+            'chunks_indexed': indexed_count,
+            'chunk_ids': chunk_ids[:5]  # Return first 5 as sample
+        }
+    def index_pdf_bytes(
+        self,
+        pdf_bytes: bytes,
+        document_id: str,
+        filename: str,
+        document_metadata: Optional[Dict] = None
+    ) -> Dict:
+        """
+        Index PDF from bytes (for uploaded files)
+        Args:
+            pdf_bytes: PDF file as bytes
+            document_id: Unique ID for this document
+            filename: Original filename
+            document_metadata: Additional metadata
+        Returns:
+            Indexing results
+        """
+        # Parse PDF
+        metadata = document_metadata or {}
+        metadata['filename'] = filename
+        chunks = self.parser.parse_pdf_bytes(pdf_bytes, metadata)
+        # Index each chunk
+        indexed_count = 0
+        chunk_ids = []
+        for chunk in chunks:
+            # Generate unique ID for chunk
+            chunk_id = f"{document_id}_p{chunk.page_number}_c{chunk.chunk_index}"
+            # Generate embedding
+            embedding = self.embedding_service.encode_text(chunk.text)
+            # Prepare metadata
+            metadata = {
+                'text': chunk.text,
+                'document_id': document_id,
+                'page': chunk.page_number,
+                'chunk_index': chunk.chunk_index,
+                'source': 'pdf',
+                'filename': filename,
+                **chunk.metadata
+            }
+            # Index to Qdrant
+            self.qdrant_service.index_data(
+                doc_id=chunk_id,
+                embedding=embedding,
+                metadata=metadata
+            )
+            chunk_ids.append(chunk_id)
+            indexed_count += 1
+        # Save document info to MongoDB
+        doc_info = {
+            'document_id': document_id,
+            'type': 'pdf',
+            'filename': filename,
+            'num_chunks': indexed_count,
+            'chunk_ids': chunk_ids,
+            'metadata': metadata
+        }
+        self.documents_collection.insert_one(doc_info)
+        return {
+            'success': True,
+            'document_id': document_id,
+            'filename': filename,
+            'chunks_indexed': indexed_count,
+            'chunk_ids': chunk_ids[:5]
+        }

prompts/feedback_agent.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+# ROLE
+Bạn là chuyên viên Chăm sóc khách hàng (CSKH) của nền tảng bán vé sự kiện.
+Nhiệm vụ của bạn là lắng nghe phản hồi của khách hàng sau khi tham gia sự kiện và hỗ trợ họ.
+# GOAL
+1. Kiểm tra xem khách hàng đã tham gia sự kiện nào chưa.
+2. Nếu CÓ: Xin đánh giá (feedback), cảm nhận để cải thiện dịch vụ.
+3. Nếu KHÔNG (hoặc đã feedback xong): Giới thiệu các sự kiện mới hấp dẫn (chuyển sang vai trò Sales).
+# CAPABILITIES (TOOLS)
+1. `get_purchased_events(user_id)`: Kiểm tra lịch sử mua vé/tham gia sự kiện của khách hàng.
+2. `save_feedback(event_id, rating, comment)`: Lưu đánh giá của khách hàng (rating 1-5 sao).
+3. `search_events(...)`: Tìm sự kiện mới (nếu khách muốn đi tiếp).
+# GUIDELINES
+## Phase 1: Check History (Luôn thực hiện đầu tiên)
+- Ngay khi bắt đầu hội thoại, hãy gọi `get_purchased_events(user_id)` ngầm (không cần hỏi khách).
+- **Trường hợp A: Khách chưa từng đi sự kiện nào (hoặc API trả về rỗng)**
+  - Chuyển ngay sang mode tư vấn: "Chào bạn! Bạn đang tìm kiếm sự kiện gì thú vị cho tuần này không? Bên mình đang có nhiều show hay lắm! 🎉"
+  - (Sau đó hành xử như Sales Agent).
+- **Trường hợp B: Khách ĐÃ đi sự kiện (ví dụ: "Show Hà Anh Tuấn")**
+  - Mở đầu bằng lời chào ấm áp: "Chào bạn! Cảm ơn bạn đã tham gia show **Hà Anh Tuấn** vừa rồi. Hy vọng bạn đã có những giây phút tuyệt vời! 🥰"
+  - Hỏi thăm cảm nhận: "Bạn thấy không khí hôm đó thế nào? Có điều gì làm bạn chưa hài lòng không?"
+## Phase 2: Collect Feedback (Nếu khách đã đi)
+- Lắng nghe khách chia sẻ.
+- Nếu khách khen: "Tuyệt quá! Bạn chấm cho sự kiện mấy sao nè? (1-5 sao) ⭐"
+- Nếu khách chê: Tỏ ra đồng cảm, xin lỗi và hứa cải thiện. "Dạ mình rất tiếc về trải nghiệm này. Mình sẽ ghi nhận ngay để BTC rút kinh nghiệm ạ."
+- Sau khi khách chấm điểm/comment -> Gọi `save_feedback`.
+## Phase 3: Transition to Sales (Sau khi feedback xong)
+- Sau khi đã lưu feedback, hãy khéo léo giới thiệu sự kiện mới:
+  "Cảm ơn bạn đã góp ý nha! À, sắp tới bên mình có show **Mỹ Tâm** cũng vibe tương tự, bạn có muốn xem qua không?"
+- Nếu khách quan tâm -> Dùng `search_events` và tư vấn tiếp.
+# EXAMPLES
+**Case 1: Có lịch sử đi event**
+System: (User ID 123 -> get_purchased_events -> ["Show Rock Việt"])
+Agent: "Chào bạn! Cảm ơn bạn đã cháy hết mình tại **Show Rock Việt** hôm qua! 🤘 Bạn thấy ban nhạc diễn có sung không?"
+User: "Sung lắm, nhưng âm thanh hơi rè."
+Agent: "Dạ mình ghi nhận góp ý về âm thanh ạ. Cảm ơn bạn nhiều. Bạn chấm show này mấy điểm trên thang 5 sao nè?"
+User: "4 sao thôi."
+Agent (Call Tool): save_feedback(event_id="rock_viet", rating=4, comment="Sung nhưng âm thanh rè")
+Agent: "Dạ mình đã lưu lại rồi ạ. À sắp tới có **RockStorm** âm thanh xịn hơn, bạn có hóng không? 🔥"
+**Case 2: Không có lịch sử**
+System: (User ID 456 -> get_purchased_events -> [])
+Agent: "Chào bạn! 👋 Cuối tuần này bạn đã có kế hoạch đi đâu chơi chưa? Bên mình đang có mấy show Acoustic chill lắm nè!"

prompts/sales_agent.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+# ROLE
+Bạn là một chuyên viên tư vấn sự kiện (Sales Agent) nhiệt tình, am hiểu và khéo léo của nền tảng bán vé sự kiện.
+Tên bạn là: "TicketBot" (hoặc xưng là "mình"/"tớ").
+# GOAL
+Mục tiêu của bạn là giúp khách hàng tìm được sự kiện phù hợp nhất và khuyến khích họ mua vé (hoặc để lại thông tin liên hệ).
+# CAPABILITIES (TOOLS)
+Bạn có quyền truy cập các công cụ sau (hãy sử dụng chúng khi cần thiết):
+1. `search_events(query, vibe, date)`: Tìm kiếm sự kiện theo từ khóa, tâm trạng (chill, sôi động...), hoặc thời gian.
+2. `get_event_details(event_id)`: Lấy thông tin chi tiết (giá vé, địa điểm, nghệ sĩ, thời gian) của một sự kiện cụ thể.
+3. `save_lead(email, phone, interest)`: Lưu thông tin khách hàng khi họ quan tâm hoặc muốn nhận tư vấn thêm.
+# GUIDELINES
+1. **Khơi gợi nhu cầu (Consultative Selling):**
+   - Đừng chỉ hỏi "Bạn muốn gì?". Hãy hỏi mở: "Cuối tuần này bạn rảnh không? Bạn đang mood muốn 'quẩy' hay chill nhẹ nhàng?"
+   - Nếu khách chưa rõ, hãy gợi ý dựa trên các vibe phổ biến: Hài kịch, Nhạc Indie, Workshop, EDM...
+2. **Tư vấn thông minh:**
+   - Khi khách hỏi giá, đừng chỉ đưa con số. Hãy kèm giá trị: "Vé hạng A giá 500k nhưng view siêu đẹp, còn hạng B 300k thì tiết kiệm hơn."
+   - Luôn đề xuất thêm (Upsell/Cross-sell) nếu phù hợp: "Đi nhóm 4 người đang có combo giảm 10% đó ạ."
+3. **Sử dụng Tools khéo léo:**
+   - Khi khách hỏi "có sự kiện gì?", HÃY gọi `search_events`. Đừng tự bịa ra sự kiện.
+   - Khi trả về danh sách sự kiện, hãy tóm tắt ngắn gọn điểm hấp dẫn nhất của từng cái.
+4. **Chốt Deal (Closing):**
+   - Khi khách có vẻ ưng ý (hỏi chi tiết, giá, chỗ ngồi...), hãy khéo léo xin thông tin:
+     "Sự kiện này đang hot lắm, bạn cho mình xin email để mình gửi link đặt vé giữ chỗ ngay nhé?"
+   - Hoặc: "Mình gửi lịch diễn chi tiết qua Zalo/Email cho bạn tiện xem nha?" -> Gọi `save_lead`.
+5. **Tone & Voice:**
+   - Thân thiện, trẻ trung, dùng emoji tự nhiên (😄, 🎉, 🔥).
+   - Không quá cứng nhắc như robot.
+   - Nếu khách hỏi ngoài lề (off-topic), hãy trả lời ngắn gọn rồi khéo léo lái về chủ đề sự kiện.
+# EXAMPLES
+User: "Cuối tuần này có gì chơi không?"
+Agent (Thought): Khách chưa nói rõ sở thích. Cần hỏi thêm vibe.
+Agent: "Cuối tuần này Sài Gòn nhiều show hay lắm! Bạn đang mood muốn 'quẩy' hết mình hay tìm một góc chill chill nghe nhạc? 🎶"
+User: "Chill thôi, nghe nhạc acoustic."
+Agent (Thought): Gọi tool search_events(vibe="chill", category="acoustic").
+Agent (Call Tool): search_events(vibe="chill", category="acoustic")
+... (Tool returns events) ...
+Agent: "À, vậy thì **Mây Lang Thang** hôm thứ 7 này là chuẩn bài! Có Lê Hiếu hát, không gian cực lãng mạn. Hoặc **Lululola** thì view hoàng hôn đỉnh chóp. Bạn thích giọng ai hơn? 🎤"

qdrant_service.py ADDED Viewed

	@@ -0,0 +1,446 @@

+from qdrant_client import QdrantClient
+from qdrant_client.models import (
+    Distance, VectorParams, PointStruct,
+    SearchRequest, SearchParams, HnswConfigDiff,
+    OptimizersConfigDiff, ScalarQuantization,
+    ScalarQuantizationConfig, ScalarType,
+    QuantizationSearchParams
+)
+from typing import List, Dict, Any, Optional
+import numpy as np
+import uuid
+import os
+class QdrantVectorService:
+    """
+    Qdrant Cloud Vector Database Service với cấu hình tối ưu
+    - HNSW algorithm với parameters mạnh mẽ nhất
+    - Scalar Quantization để tối ưu memory và speed
+    - Hỗ trợ hybrid search (text + image)
+    """
+    def __init__(
+        self,
+        url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        collection_name: str = "event_social_media",
+        vector_size: int = 1024,  # Jina CLIP v2 dimension
+    ):
+        """
+        Initialize Qdrant Cloud client
+        Args:
+            url: Qdrant Cloud URL (từ env hoặc truyền vào)
+            api_key: Qdrant API key (từ env hoặc truyền vào)
+            collection_name: Tên collection
+            vector_size: Dimension của vectors (1024 cho Jina CLIP v2)
+        """
+        # Lấy credentials từ env nếu không truyền vào
+        self.url = url or os.getenv("QDRANT_URL")
+        self.api_key = api_key or os.getenv("QDRANT_API_KEY")
+        if not self.url or not self.api_key:
+            raise ValueError("Cần cung cấp QDRANT_URL và QDRANT_API_KEY (qua env hoặc params)")
+        print(f"Connecting to Qdrant Cloud...")
+        # Initialize Qdrant Cloud client
+        self.client = QdrantClient(
+            url=self.url,
+            api_key=self.api_key,
+        )
+        self.collection_name = collection_name
+        self.vector_size = vector_size
+        # Create collection nếu chưa tồn tại
+        self._ensure_collection()
+        print(f"✓ Connected to Qdrant collection: {collection_name}")
+    def _ensure_collection(self):
+        """
+        Tạo collection với HNSW config tối ưu nhất
+        """
+        # Check nếu collection đã tồn tại
+        collections = self.client.get_collections().collections
+        collection_exists = any(c.name == self.collection_name for c in collections)
+        if not collection_exists:
+            print(f"Creating collection {self.collection_name} with optimal HNSW config...")
+            self.client.create_collection(
+                collection_name=self.collection_name,
+                vectors_config=VectorParams(
+                    size=self.vector_size,
+                    distance=Distance.COSINE,  # Cosine similarity cho embeddings
+                    hnsw_config=HnswConfigDiff(
+                        m=64,  # Số edges per node - cao nhất cho accuracy
+                        ef_construct=512,  # Search range khi build index - cao cho quality
+                        full_scan_threshold=10000,  # Threshold để switch sang full scan
+                        max_indexing_threads=0,  # Auto-detect số threads
+                        on_disk=False,  # Keep trong RAM cho speed (nếu đủ memory)
+                    )
+                ),
+                optimizers_config=OptimizersConfigDiff(
+                    deleted_threshold=0.2,
+                    vacuum_min_vector_number=1000,
+                    default_segment_number=2,
+                    max_segment_size=200000,
+                    memmap_threshold=50000,
+                    indexing_threshold=10000,
+                    flush_interval_sec=5,
+                    max_optimization_threads=0,  # Auto-detect
+                ),
+                # Sử dụng Scalar Quantization để tối ưu memory và speed
+                quantization_config=ScalarQuantization(
+                    scalar=ScalarQuantizationConfig(
+                        type=ScalarType.INT8,
+                        quantile=0.99,
+                        always_ram=True,  # Keep quantized vectors trong RAM
+                    )
+                )
+            )
+            print("✓ Collection created with optimal configuration")
+        else:
+            print("✓ Collection already exists")
+    def _convert_to_valid_id(self, doc_id: str) -> str:
+        """
+        Convert bất kỳ string ID nào thành UUID hợp lệ cho Qdrant
+        Args:
+            doc_id: Original ID (có thể là MongoDB ObjectId, string, etc.)
+        Returns:
+            UUID string hợp lệ
+        """
+        if not doc_id:
+            return str(uuid.uuid4())
+        # Nếu đã là UUID hợp lệ, giữ nguyên
+        try:
+            uuid.UUID(doc_id)
+            return doc_id
+        except ValueError:
+            pass
+        # Convert string sang UUID deterministic (cùng input = cùng UUID)
+        # Sử dụng UUID v5 với namespace DNS
+        return str(uuid.uuid5(uuid.NAMESPACE_DNS, doc_id))
+    def index_data(
+        self,
+        doc_id: str,
+        embedding: np.ndarray,
+        metadata: Dict[str, Any]
+    ) -> Dict[str, str]:
+        """
+        Index data vào Qdrant
+        Args:
+            doc_id: ID của document (MongoDB ObjectId, string, etc.)
+            embedding: Vector embedding từ Jina CLIP
+            metadata: Metadata (text, image_url, event_info, etc.)
+        Returns:
+            Dict với original_id và qdrant_id
+        """
+        # Convert ID thành UUID hợp lệ
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        # Lưu original ID vào metadata
+        metadata['original_id'] = doc_id
+        # Ensure embedding là 1D array
+        if len(embedding.shape) > 1:
+            embedding = embedding.flatten()
+        # Create point
+        point = PointStruct(
+            id=qdrant_id,
+            vector=embedding.tolist(),
+            payload=metadata
+        )
+        # Upsert vào collection
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=[point]
+        )
+        return {
+            "original_id": doc_id,
+            "qdrant_id": qdrant_id
+        }
+    def batch_index(
+        self,
+        doc_ids: List[str],
+        embeddings: np.ndarray,
+        metadata_list: List[Dict[str, Any]]
+    ) -> List[Dict[str, str]]:
+        """
+        Batch index nhiều documents cùng lúc
+        Args:
+            doc_ids: List of document IDs (MongoDB ObjectId, string, etc.)
+            embeddings: Numpy array of embeddings (n_samples, embedding_dim)
+            metadata_list: List of metadata dicts
+        Returns:
+            List of dicts với original_id và qdrant_id
+        """
+        points = []
+        id_mappings = []
+        for i, (doc_id, embedding, metadata) in enumerate(zip(doc_ids, embeddings, metadata_list)):
+            # Convert to valid UUID
+            qdrant_id = self._convert_to_valid_id(doc_id)
+            # Lưu original ID vào metadata
+            metadata['original_id'] = doc_id
+            # Ensure embedding là 1D
+            if len(embedding.shape) > 1:
+                embedding = embedding.flatten()
+            points.append(PointStruct(
+                id=qdrant_id,
+                vector=embedding.tolist(),
+                payload=metadata
+            ))
+            id_mappings.append({
+                "original_id": doc_id,
+                "qdrant_id": qdrant_id
+            })
+        # Batch upsert
+        self.client.upsert(
+            collection_name=self.collection_name,
+            points=points,
+            wait=True  # Wait for indexing to complete
+        )
+        return id_mappings
+    def search(
+        self,
+        query_embedding: np.ndarray,
+        limit: int = 10,
+        score_threshold: Optional[float] = None,
+        filter_conditions: Optional[Dict] = None,
+        ef: int = 256  # Search quality parameter - cao hơn = accurate hơn
+    ) -> List[Dict[str, Any]]:
+        """
+        Search similar vectors trong Qdrant
+        Args:
+            query_embedding: Query embedding từ Jina CLIP
+            limit: Số lượng results trả về
+            score_threshold: Minimum similarity score (0-1)
+            filter_conditions: Qdrant filter conditions
+            ef: HNSW search parameter (128-512, cao hơn = accurate hơn)
+        Returns:
+            List of search results với id, score, và metadata
+        """
+        # Ensure query embedding là 1D
+        if len(query_embedding.shape) > 1:
+            query_embedding = query_embedding.flatten()
+        # Search với HNSW parameters tối ưu (qdrant-client v1.16.0+)
+        search_result = self.client.query_points(
+            collection_name=self.collection_name,
+            query=query_embedding.tolist(),
+            limit=limit,
+            score_threshold=score_threshold,
+            query_filter=filter_conditions,
+            search_params=SearchParams(
+                hnsw_ef=ef,  # Higher ef = more accurate search
+                exact=False,  # Use HNSW (not exact search)
+                quantization=QuantizationSearchParams(
+                    ignore=False,  # Use quantization
+                    rescore=True,  # Rescore với original vectors
+                    oversampling=2.0  # Oversample factor
+                )
+            ),
+            with_payload=True,
+        ).points
+        # Format results - trả về original_id thay vì UUID
+        results = []
+        for hit in search_result:
+            # Lấy original_id từ metadata (MongoDB ObjectId)
+            original_id = hit.payload.get('original_id', hit.id)
+            results.append({
+                "id": original_id,  # Trả về MongoDB ObjectId
+                "qdrant_id": hit.id,  # UUID trong Qdrant
+                "confidence": float(hit.score),  # Cosine similarity score
+                "metadata": hit.payload
+            })
+        return results
+    def hybrid_search(
+        self,
+        text_embedding: Optional[np.ndarray] = None,
+        image_embedding: Optional[np.ndarray] = None,
+        text_weight: float = 0.5,
+        image_weight: float = 0.5,
+        limit: int = 10,
+        score_threshold: Optional[float] = None,
+        ef: int = 256
+    ) -> List[Dict[str, Any]]:
+        """
+        Hybrid search với cả text và image embeddings
+        Args:
+            text_embedding: Text query embedding
+            image_embedding: Image query embedding
+            text_weight: Weight cho text search (0-1)
+            image_weight: Weight cho image search (0-1)
+            limit: Số results
+            score_threshold: Minimum score
+            ef: HNSW search parameter
+        Returns:
+            Combined search results
+        """
+        # Combine embeddings với weights
+        combined_embedding = np.zeros(self.vector_size)
+        if text_embedding is not None:
+            if len(text_embedding.shape) > 1:
+                text_embedding = text_embedding.flatten()
+            combined_embedding += text_weight * text_embedding
+        if image_embedding is not None:
+            if len(image_embedding.shape) > 1:
+                image_embedding = image_embedding.flatten()
+            combined_embedding += image_weight * image_embedding
+        # Normalize combined embedding
+        norm = np.linalg.norm(combined_embedding)
+        if norm > 0:
+            combined_embedding = combined_embedding / norm
+        # Search với combined embedding
+        return self.search(
+            query_embedding=combined_embedding,
+            limit=limit,
+            score_threshold=score_threshold,
+            ef=ef
+        )
+    def delete_by_id(self, doc_id: str) -> bool:
+        """
+        Delete document by ID (hỗ trợ cả MongoDB ObjectId và UUID)
+        Args:
+            doc_id: Document ID to delete (MongoDB ObjectId hoặc UUID)
+        Returns:
+            Success status
+        """
+        # Convert to UUID nếu là MongoDB ObjectId
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        self.client.delete(
+            collection_name=self.collection_name,
+            points_selector=[qdrant_id]
+        )
+        return True
+    def get_by_id(self, doc_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Get document by ID (hỗ trợ cả MongoDB ObjectId và UUID)
+        Args:
+            doc_id: Document ID (MongoDB ObjectId hoặc UUID)
+        Returns:
+            Document data hoặc None nếu không tìm thấy
+        """
+        # Convert to UUID nếu là MongoDB ObjectId
+        qdrant_id = self._convert_to_valid_id(doc_id)
+        try:
+            result = self.client.retrieve(
+                collection_name=self.collection_name,
+                ids=[qdrant_id],
+                with_payload=True,
+                with_vectors=False
+            )
+            if result:
+                point = result[0]
+                original_id = point.payload.get('original_id', point.id)
+                return {
+                    "id": original_id,  # MongoDB ObjectId
+                    "qdrant_id": point.id,  # UUID trong Qdrant
+                    "metadata": point.payload
+                }
+            return None
+        except Exception as e:
+            print(f"Error retrieving document: {e}")
+            return None
+    def search_by_metadata(
+        self,
+        filter_conditions: Dict,
+        limit: int = 100
+    ) -> List[Dict[str, Any]]:
+        """
+        Search documents by metadata conditions (không cần embedding)
+        Args:
+            filter_conditions: Qdrant filter conditions
+            limit: Maximum số results
+        Returns:
+            List of matching documents
+        """
+        try:
+            result = self.client.scroll(
+                collection_name=self.collection_name,
+                scroll_filter=filter_conditions,
+                limit=limit,
+                with_payload=True,
+                with_vectors=False
+            )
+            documents = []
+            for point in result[0]:  # result is tuple (points, next_page_offset)
+                original_id = point.payload.get('original_id', point.id)
+                documents.append({
+                    "id": original_id,  # MongoDB ObjectId
+                    "qdrant_id": point.id,  # UUID trong Qdrant
+                    "metadata": point.payload
+                })
+            return documents
+        except Exception as e:
+            print(f"Error searching by metadata: {e}")
+            return []
+    def get_collection_info(self) -> Dict[str, Any]:
+        """
+        Lấy thông tin collection
+        Returns:
+            Collection info
+        """
+        info = self.client.get_collection(collection_name=self.collection_name)
+        return {
+            "vectors_count": info.vectors_count,
+            "points_count": info.points_count,
+            "status": info.status,
+            "config": {
+                "distance": info.config.params.vectors.distance,
+                "size": info.config.params.vectors.size,
+            }
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+# FastAPI và web framework
+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+python-multipart==0.0.20
+# Gradio cho Hugging Face Spaces
+gradio>=4.0.0
+# Machine Learning & Embeddings
+torch>=2.0.0
+transformers>=4.50.0
+onnxruntime==1.20.1
+torchvision>=0.15.0
+pillow>=10.0.0
+numpy>=1.24.0
+# RAG & Reranking (Best Case 2025)
+sentence-transformers>=2.2.0
+httpx>=0.25.0
+# Vector Database
+qdrant-client>=1.12.1
+grpcio>=1.60.0
+# Utilities
+pydantic>=2.0.0
+python-dotenv==1.0.0
+# MongoDB
+pymongo>=4.6.0
+huggingface-hub>=0.20.0
+timm
+einops
+# PDF Processing
+pypdfium2>=4.30.0
+httpx>=0.25.0

stream_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+SSE (Server-Sent Events) Utilities
+Format streaming responses for real-time chat
+"""
+import json
+from typing import Dict, Any, AsyncGenerator
+import asyncio
+def format_sse(event: str, data: Any) -> str:
+    """
+    Format data as SSE message
+    Args:
+        event: Event type (token, status, done, error)
+        data: Data payload (string or dict)
+    Returns:
+        Formatted SSE string
+    Example:
+        format_sse("token", "Hello")
+        # "event: token\ndata: Hello\n\n"
+    """
+    if isinstance(data, dict):
+        data_str = json.dumps(data, ensure_ascii=False)
+    else:
+        data_str = str(data)
+    return f"event: {event}\ndata: {data_str}\n\n"
+async def simulate_typing(
+    text: str,
+    chars_per_chunk: int = 3,
+    delay_ms: float = 20
+) -> AsyncGenerator[str, None]:
+    """
+    Simulate typing effect by yielding text in chunks
+    Args:
+        text: Full text to stream
+        chars_per_chunk: Characters per chunk
+        delay_ms: Milliseconds delay between chunks
+    Yields:
+        Text chunks
+    Example:
+        async for chunk in simulate_typing("Hello world", chars_per_chunk=2):
+            yield format_sse("token", chunk)
+    """
+    for i in range(0, len(text), chars_per_chunk):
+        chunk = text[i:i + chars_per_chunk]
+        yield chunk
+        await asyncio.sleep(delay_ms / 1000)
+async def stream_text_slowly(
+    text: str,
+    event_type: str = "token",
+    chars_per_chunk: int = 3,
+    delay_ms: float = 20
+) -> AsyncGenerator[str, None]:
+    """
+    Stream text with typing effect in SSE format
+    Args:
+        text: Text to stream
+        event_type: SSE event type
+        chars_per_chunk: Characters per chunk
+        delay_ms: Delay between chunks
+    Yields:
+        SSE formatted chunks
+    """
+    async for chunk in simulate_typing(text, chars_per_chunk, delay_ms):
+        yield format_sse(event_type, chunk)
+# Event type constants
+EVENT_STATUS = "status"
+EVENT_TOKEN = "token"
+EVENT_DONE = "done"
+EVENT_ERROR = "error"
+EVENT_METADATA = "metadata"

tools_service.py ADDED Viewed

	@@ -0,0 +1,242 @@

+"""
+Tools Service for LLM Function Calling
+HuggingFace-compatible với prompt engineering
+"""
+import httpx
+from typing import List, Dict, Any, Optional
+import json
+import asyncio
+class ToolsService:
+    """
+    Manages external API tools that LLM can call via prompt engineering
+    """
+    def __init__(self, base_url: str = "https://hoalacrent.io.vn/api/v0", feedback_tracking=None):
+        self.base_url = base_url
+        self.client = httpx.AsyncClient(timeout=10.0)
+        self.feedback_tracking = feedback_tracking  # NEW: Feedback tracking
+    def get_tools_definition(self) -> List[Dict]:
+        """
+        Return list of tool definitions (OpenAI format style)
+        Used for constructing system prompt
+        """
+        return [
+            {
+                "name": "search_events",
+                "description": "Tìm kiếm sự kiện phù hợp theo từ khóa, vibe, hoặc thời gian.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {"type": "string", "description": "Từ khóa tìm kiếm (VD: 'nhạc rock', 'hài kịch')"},
+                        "vibe": {"type": "string", "description": "Vibe/Mood (VD: 'chill', 'sôi động', 'hẹn hò')"},
+                        "time": {"type": "string", "description": "Thời gian (VD: 'cuối tuần này', 'tối nay')"}
+                    }
+                }
+            },
+            {
+                "name": "get_event_details",
+                "description": "Lấy thông tin chi tiết (giá, địa điểm, thời gian) của sự kiện.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "event_id": {"type": "string", "description": "ID của sự kiện (MongoDB ID)"}
+                    },
+                    "required": ["event_id"]
+                }
+            },
+            {
+                "name": "get_purchased_events",
+                "description": "Kiểm tra lịch sử các sự kiện user đã mua vé hoặc tham gia.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "user_id": {"type": "string", "description": "ID của user"}
+                    },
+                    "required": ["user_id"]
+                }
+            },
+            {
+                "name": "save_feedback",
+                "description": "Lưu đánh giá/feedback của user về sự kiện.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "event_id": {"type": "string", "description": "ID sự kiện"},
+                        "rating": {"type": "integer", "description": "Số sao đánh giá (1-5)"},
+                        "comment": {"type": "string", "description": "Nội dung nhận xét"}
+                    },
+                    "required": ["event_id", "rating"]
+                }
+            },
+            {
+                "name": "save_lead",
+                "description": "Lưu thông tin khách hàng quan tâm (Lead).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "email": {"type": "string"},
+                        "phone": {"type": "string"},
+                        "interest": {"type": "string"}
+                    }
+                }
+            }
+        ]
+    async def execute_tool(self, tool_name: str, arguments: Dict, access_token: Optional[str] = None) -> Any:
+        """
+        Execute a tool by name with arguments
+        Args:
+            tool_name: Name of the tool
+            arguments: Tool arguments
+            access_token: JWT token for authenticated API calls
+        """
+        print(f"\n🔧 ===== TOOL EXECUTION =====")
+        print(f"Tool: {tool_name}")
+        print(f"Arguments: {arguments}")
+        print(f"Access Token: {'✅ Present' if access_token else '❌ Missing'}")
+        if access_token:
+            print(f"Token preview: {access_token[:30]}...")
+        try:
+            if tool_name == "get_event_details":
+                return await self._get_event_details(arguments.get("event_id") or arguments.get("event_code"))
+            elif tool_name == "get_purchased_events":
+                print(f"→ Calling _get_purchased_events with:")
+                print(f"   user_id: {arguments.get('user_id')}")
+                print(f"   access_token: {'✅' if access_token else '❌'}")
+                return await self._get_purchased_events(
+                    arguments.get("user_id"),
+                    access_token=access_token  # Pass access_token
+                )
+            elif tool_name == "save_feedback":
+                return await self._save_feedback(
+                    arguments.get("event_id"),
+                    arguments.get("rating"),
+                    arguments.get("comment")
+                )
+            elif tool_name == "search_events":
+                # Note: This usually requires RAG service, so we return a special signal
+                # The Agent Service will handle RAG search
+                return {"action": "run_rag_search", "query": arguments}
+            elif tool_name == "save_lead":
+                # Placeholder for lead saving
+                return {"success": True, "message": "Lead saved successfully"}
+            else:
+                return {"error": f"Unknown tool: {tool_name}"}
+        except Exception as e:
+            print(f"⚠️ Tool Execution Error: {e}")
+            return {"error": str(e)}
+    async def _get_event_details(self, event_id: str) -> Dict:
+        """Call API to get event details"""
+        if not event_id:
+            return {"error": "Missing event_id"}
+        try:
+            url = f"{self.base_url}/event/get-event-by-id"
+            response = await self.client.get(url, params={"id": event_id})
+            if response.status_code == 200:
+                data = response.json()
+                if data.get("success"):
+                    return data.get("data")
+            return {"error": "Event not found", "details": response.text}
+        except Exception as e:
+            return {"error": str(e)}
+    async def _get_purchased_events(self, user_id: str, access_token: Optional[str] = None) -> List[Dict]:
+        """Call API to get purchased events for user (requires auth)"""
+        print(f"\n🎫 ===== GET PURCHASED EVENTS =====")
+        print(f"User ID: {user_id}")
+        print(f"Access Token: {'✅ Present' if access_token else '❌ Missing'}")
+        if not user_id:
+            print("⚠️ No user_id provided, returning empty list")
+            return []
+        try:
+            url = f"{self.base_url}/event/get-purchase-event-by-user-id/{user_id}"
+            print(f"🔍 API URL: {url}")
+            # Add Authorization header if access_token provided
+            headers = {}
+            if access_token:
+                headers["Authorization"] = f"Bearer {access_token}"
+                print(f"🔐 Authorization Header Added:")
+                print(f"   Bearer {access_token[:30]}...")
+            else:
+                print(f"⚠️ No access_token - calling API without auth")
+            print(f"📡 Headers: {headers}")
+            print(f"🚀 Calling API...")
+            response = await self.client.get(url, headers=headers)
+            print(f"📥 Response Status: {response.status_code}")
+            print(f"📦 Response Headers: {dict(response.headers)}")
+            if response.status_code == 200:
+                data = response.json()
+                print(f"✅ Success! Data keys: {list(data.keys())}")
+                events = data.get("data", [])
+                print(f"📊 Found {len(events)} purchased events")
+                # Log actual event data
+                if events:
+                    print(f"\n📋 Purchased Events Details:")
+                    for i, event in enumerate(events, 1):
+                        print(f"{i}. Event Code: {event.get('eventCode', 'N/A')}")
+                        print(f"   Event Name: {event.get('eventName', 'N/A')}")
+                        print(f"   Event ID: {event.get('_id', 'N/A')}")
+                        print(f"   Full data: {event}")
+                return events
+            else:
+                print(f"❌ API Error: {response.status_code}")
+                print(f"Response body: {response.text[:500]}")
+                return []
+        except Exception as e:
+            print(f"⚠️ Exception in _get_purchased_events: {type(e).__name__}: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
+    async def _save_feedback(self, event_id: str, rating: int, comment: str, user_id: str = None, event_code: str = None) -> Dict:
+        """Save feedback and mark as completed in tracking system"""
+        print(f"\n📝 ===== SAVE FEEDBACK =====")
+        print(f"Event ID: {event_id}")
+        print(f"Event Code: {event_code}")
+        print(f"User ID: {user_id}")
+        print(f"Rating: {rating}")
+        print(f"Comment: {comment}")
+        # TODO: Implement real API call to save feedback
+        # For now, just mark in tracking system
+        if self.feedback_tracking and user_id and event_code:
+            success = self.feedback_tracking.mark_feedback_given(
+                user_id=user_id,
+                event_code=event_code,
+                rating=rating,
+                comment=comment
+            )
+            if success:
+                print(f"✅ Feedback tracked in database")
+            else:
+                print(f"⚠️ Failed to track feedback")
+        return {"success": True, "message": "Feedback recorded"}
+    async def close(self):
+        """Close HTTP client"""
+        await self.client.aclose()