Spaces:

Jaheen07
/

rag-chatbot-api

Sleeping

App Files Files Community

Jaheen07 commited on Dec 10, 2025

Commit

4233eaf

verified ·

1 Parent(s): 265d329

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +68 -60

chatbot.py CHANGED Viewed

@@ -13,6 +13,7 @@ import os
 import pickle
 from datetime import datetime
 from collections import Counter
 class RAGChatbot:
@@ -23,38 +24,38 @@ class RAGChatbot:
         self.chunk_metadata = []
         self.index = None
         self.embeddings_model = None
-        self.llm_client = None
         self.chat_history = []
         self.output_dir = "./"
         self.table_csv_path = None
         self.text_chunks_path = None
         self.history_file = os.path.join(self.output_dir, "chat_history.pkl")
-        # Chat history embeddings and index
         self.chat_embeddings = []
         self.chat_index = None
         self.chat_embedding_file = os.path.join(self.output_dir, "chat_embeddings.pkl")
-        # Learning statistics
         self.query_patterns = Counter()
         self.feedback_scores = {}
         self.stats_file = os.path.join(self.output_dir, "learning_stats.pkl")
-        # ADD THIS NEW SECTION:
         self.conversation_context = {
             'current_employee': None,
             'last_mentioned_entities': []
         }
         os.makedirs(self.output_dir, exist_ok=True)
-        # Load existing chat history and learning data
         self._load_chat_history()
         self._load_learning_stats()
         self._setup()
-        # Build chat history index after setup
         self._build_chat_history_index()
     def _load_chat_history(self):
@@ -637,83 +638,73 @@ class RAGChatbot:
         print("\n" + "=" * 80)
         print("STEP 1: Loading PDF")
         print("=" * 80)
         text = self._load_pdf_text()
         print(f"Loaded PDF with {len(text)} characters")
         print("\n" + "=" * 80)
         print("STEP 2: Extracting and Merging Tables")
         print("=" * 80)
         self.table_csv_path = self._extract_and_merge_tables()
         print("\n" + "=" * 80)
         print("STEP 3: Chunking Text Content (Removing Tables)")
         print("=" * 80)
         text_chunks = self._chunk_text_content(text)
         self.text_chunks_path = self._save_text_chunks(text_chunks)
         print("\n" + "=" * 80)
         print("STEP 4: Creating Final Chunks")
         print("=" * 80)
         all_chunks = []
-        # Add text chunks
         all_chunks.extend(text_chunks)
-        # Add table chunks
         if self.table_csv_path:
             table_chunks = self._create_table_chunks(self.table_csv_path)
             all_chunks.extend(table_chunks)
-            # Save chunked table text to file
             self._save_table_chunks(table_chunks)
-        # Extract content and metadata
         self.chunks = [c['content'] for c in all_chunks]
         self.chunk_metadata = all_chunks
         print(f"\nTotal chunks created: {len(self.chunks)}")
         print(f"  - Q&A chunks: {sum(1 for c in all_chunks if c['type'] == 'qa')}")
         print(f"  - Text chunks: {sum(1 for c in all_chunks if c['type'] == 'text')}")
         print(f"  - Table full: {sum(1 for c in all_chunks if c['type'] == 'table_full')}")
         print(f"  - Employee records: {sum(1 for c in all_chunks if c['type'] == 'table_row')}")
-        # Save manifest
         self._save_manifest(all_chunks)
         print("\n" + "=" * 80)
         print("STEP 5: Creating Embeddings")
         print("=" * 80)
         print("Loading embedding model...")
         self.embeddings_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
         print("Creating embeddings for all chunks...")
         embeddings = self.embeddings_model.encode(self.chunks, show_progress_bar=True)
         print("Building FAISS index...")
         dimension = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(np.array(embeddings).astype('float32'))
         print("\n" + "=" * 80)
-        print("STEP 6: Initializing LLM")
         print("=" * 80)
-        self.llm_client = InferenceClient(token=self.hf_token)
-        self.model_name = "meta-llama/Llama-3.1-8B-Instruct"
         print("\n" + "=" * 80)
         print("SETUP COMPLETE!")
         print("=" * 80)
-        print(f"Files created in: {self.output_dir}/")
-        print(f"  - {os.path.basename(self.table_csv_path) if self.table_csv_path else 'No table CSV'}")
-        print(f"  - {os.path.basename(self.text_chunks_path)}")
-        print(f"  - chunk_manifest.json")
-        print(f"  - {os.path.basename(self.history_file)}")
-        print("=" * 80 + "\n")
     def _retrieve(self, query: str, k: int = 10) -> List[Tuple[str, Dict]]:
         """Retrieve relevant chunks with metadata"""
@@ -820,6 +811,7 @@ class RAGChatbot:
         return prompt
     def ask(self, question: str) -> str:
         if question.lower() in ["reset data", "reset"]:
             self.chat_history = []
             self.chat_embeddings = []
@@ -838,25 +830,41 @@ class RAGChatbot:
         # Search through past conversations for similar questions
         relevant_past_chats = self._search_chat_history(resolved_question, k=5)
-        # Retrieve relevant chunks (use resolved question for better retrieval)
         retrieved_data = self._retrieve(resolved_question, k=20)
-        # Build prompt with both document context and learned information
         prompt = self._build_prompt(resolved_question, retrieved_data, relevant_past_chats)
-        # ✅ CORRECT: Use text_generation for InferenceClient
-        answer = self.llm_client.text_generation(
-            prompt,
-            model=self.model_name,
-            max_new_tokens=512,
-            temperature=0.3,
-            return_full_text=False
-        )
-        # Update conversation context after each exchange
         self._update_conversation_context(question, answer)
-        # Store in history with timestamp and metadata
         chat_entry = {
             'timestamp': datetime.now().isoformat(),
             'question': question,
@@ -867,7 +875,7 @@ class RAGChatbot:
         self.chat_history.append(chat_entry)
-        # Update chat history index with new conversation
         new_text = f"Q: {question}\nA: {answer}"
         new_embedding = self.embeddings_model.encode([new_text])
@@ -880,7 +888,7 @@ class RAGChatbot:
         self.chat_index.add(np.array(new_embedding).astype('float32'))
-        # Save to disk after each conversation
         self._save_chat_history()
         self._save_learning_stats()

 import pickle
 from datetime import datetime
 from collections import Counter
+import requests
 class RAGChatbot:
         self.chunk_metadata = []
         self.index = None
         self.embeddings_model = None
+        # ✅ NEW: API configuration
+        self.api_url = "https://router.huggingface.co/v1/chat/completions"
+        self.headers = {"Authorization": f"Bearer {hf_token}"}
+        self.model_name = "meta-llama/Llama-3.1-8B-Instruct"
         self.chat_history = []
         self.output_dir = "./"
         self.table_csv_path = None
         self.text_chunks_path = None
         self.history_file = os.path.join(self.output_dir, "chat_history.pkl")
         self.chat_embeddings = []
         self.chat_index = None
         self.chat_embedding_file = os.path.join(self.output_dir, "chat_embeddings.pkl")
         self.query_patterns = Counter()
         self.feedback_scores = {}
         self.stats_file = os.path.join(self.output_dir, "learning_stats.pkl")
         self.conversation_context = {
             'current_employee': None,
             'last_mentioned_entities': []
         }
         os.makedirs(self.output_dir, exist_ok=True)
         self._load_chat_history()
         self._load_learning_stats()
         self._setup()
         self._build_chat_history_index()
     def _load_chat_history(self):
         print("\n" + "=" * 80)
         print("STEP 1: Loading PDF")
         print("=" * 80)
         text = self._load_pdf_text()
         print(f"Loaded PDF with {len(text)} characters")
         print("\n" + "=" * 80)
         print("STEP 2: Extracting and Merging Tables")
         print("=" * 80)
         self.table_csv_path = self._extract_and_merge_tables()
         print("\n" + "=" * 80)
         print("STEP 3: Chunking Text Content (Removing Tables)")
         print("=" * 80)
         text_chunks = self._chunk_text_content(text)
         self.text_chunks_path = self._save_text_chunks(text_chunks)
         print("\n" + "=" * 80)
         print("STEP 4: Creating Final Chunks")
         print("=" * 80)
         all_chunks = []
         all_chunks.extend(text_chunks)
         if self.table_csv_path:
             table_chunks = self._create_table_chunks(self.table_csv_path)
             all_chunks.extend(table_chunks)
             self._save_table_chunks(table_chunks)
         self.chunks = [c['content'] for c in all_chunks]
         self.chunk_metadata = all_chunks
         print(f"\nTotal chunks created: {len(self.chunks)}")
         print(f"  - Q&A chunks: {sum(1 for c in all_chunks if c['type'] == 'qa')}")
         print(f"  - Text chunks: {sum(1 for c in all_chunks if c['type'] == 'text')}")
         print(f"  - Table full: {sum(1 for c in all_chunks if c['type'] == 'table_full')}")
         print(f"  - Employee records: {sum(1 for c in all_chunks if c['type'] == 'table_row')}")
         self._save_manifest(all_chunks)
         print("\n" + "=" * 80)
         print("STEP 5: Creating Embeddings")
         print("=" * 80)
         print("Loading embedding model...")
         self.embeddings_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
         print("Creating embeddings for all chunks...")
         embeddings = self.embeddings_model.encode(self.chunks, show_progress_bar=True)
         print("Building FAISS index...")
         dimension = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(np.array(embeddings).astype('float32'))
         print("\n" + "=" * 80)
+        print("STEP 6: Initializing LLM API")
         print("=" * 80)
+        # ✅ API already configured in __init__
+        print(f"API URL: {self.api_url}")
+        print(f"Model: {self.model_name}")
+        print("LLM API ready!")
         print("\n" + "=" * 80)
         print("SETUP COMPLETE!")
         print("=" * 80)
     def _retrieve(self, query: str, k: int = 10) -> List[Tuple[str, Dict]]:
         """Retrieve relevant chunks with metadata"""
         return prompt
     def ask(self, question: str) -> str:
+        """Ask a question to the chatbot with learning from past conversations"""
         if question.lower() in ["reset data", "reset"]:
             self.chat_history = []
             self.chat_embeddings = []
         # Search through past conversations for similar questions
         relevant_past_chats = self._search_chat_history(resolved_question, k=5)
+        # Retrieve relevant chunks
         retrieved_data = self._retrieve(resolved_question, k=20)
+        # Build prompt
         prompt = self._build_prompt(resolved_question, retrieved_data, relevant_past_chats)
+        # ✅ NEW: Call Hugging Face Router API
+        payload = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "max_tokens": 512,
+            "temperature": 0.3
+        }
+        try:
+            response = requests.post(self.api_url, headers=self.headers, json=payload, timeout=60)
+            response.raise_for_status()
+            result = response.json()
+            # Extract answer from response
+            answer = result["choices"][0]["message"]["content"]
+        except Exception as e:
+            print(f"Error calling LLM API: {e}")
+            answer = "I apologize, but I'm having trouble generating a response right now. Please try again."
+        # Update conversation context
         self._update_conversation_context(question, answer)
+        # Store in history
         chat_entry = {
             'timestamp': datetime.now().isoformat(),
             'question': question,
         self.chat_history.append(chat_entry)
+        # Update chat history index
         new_text = f"Q: {question}\nA: {answer}"
         new_embedding = self.embeddings_model.encode([new_text])
         self.chat_index.add(np.array(new_embedding).astype('float32'))
+        # Save to disk
         self._save_chat_history()
         self._save_learning_stats()