Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Sleeping

App Files Files Community

pradeepsengarr commited on Jun 7, 2025

Commit

1dedfac

verified ·

1 Parent(s): 87bdf56

Update app.py

Browse files

Files changed (1) hide show

app.py +359 -71

app.py CHANGED Viewed

@@ -1,83 +1,371 @@
-import os
-import time
-import torch
 import gradio as gr
-from huggingface_hub import login
-from transformers import AutoTokenizer
-from auto_gptq import AutoGPTQForCausalLM
 from sentence_transformers import SentenceTransformer
-from langchain_community.vectorstores import FAISS
-# Load HF token and login
-hf_token = os.environ.get("HUGGINGFACE_TOKEN")
-if not hf_token:
-    raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
-login(token=hf_token)
-# Load tokenizer and quantized model
-model_id = "TheBloke/mistral-7B-GPTQ"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print("Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
-print("Loading quantized model...")
-start = time.time()
-model = AutoGPTQForCausalLM.from_quantized(
-    model_id,
-    use_safetensors=True,
-    device=device,
-    use_triton=True,
-    quantize_config=None,
-)
-print(f"Model loaded in {time.time() - start:.2f} seconds on {device}")
-# Load embedding model for FAISS vector store
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-# Sample documents to build vector index (can replace with your own)
-texts = [
-    "Hello world",
-    "Mistral 7B is a powerful language model",
-    "Langchain and FAISS make vector search easy",
-    "This is a test document for vector search",
-]
-embeddings = embedder.encode(texts)
-faiss_index = FAISS.from_embeddings(embeddings, texts)
-# Generate text from prompt
-def generate_text(prompt, max_length=128):
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_length=max_length)
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return decoded
-# Search docs with vector similarity
-def search_docs(query):
-    query_emb = embedder.encode([query])
-    results = faiss_index.similarity_search_by_vector(query_emb[0], k=3)
-    return "\n\n".join(results)
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# Mistral GPTQ + FAISS Vector Search Demo")
-    with gr.Tab("Text Generation"):
-        prompt_input = gr.Textbox(label="Enter prompt", lines=3)
-        generate_btn = gr.Button("Generate")
-        output_text = gr.Textbox(label="Output", lines=6)
-        generate_btn.click(fn=generate_text, inputs=prompt_input, outputs=output_text)
-    with gr.Tab("Vector Search"):
-        query_input = gr.Textbox(label="Enter search query", lines=2)
-        search_btn = gr.Button("Search")
-        search_output = gr.Textbox(label="Search Results", lines=6)
-        search_btn.click(fn=search_docs, inputs=query_input, outputs=search_output)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import PyPDF2
+import docx
+import io
+import os
+from typing import List, Optional
+class DocumentRAG:
+    def __init__(self):
+        print("🚀 Initializing RAG System...")
+        # Initialize embedding model (lightweight)
+        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        print("✅ Embedding model loaded")
+        # Initialize quantized LLM
+        self.setup_llm()
+        # Document storage
+        self.documents = []
+        self.index = None
+        self.is_indexed = False
+    def setup_llm(self):
+        """Setup quantized Mistral model"""
+        try:
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.float16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4"
+            )
+            model_name = "mistralai/Mistral-7B-Instruct-v0.1"
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                quantization_config=quantization_config,
+                device_map="auto",
+                torch_dtype=torch.float16,
+                trust_remote_code=True
+            )
+            print("✅ Quantized Mistral model loaded")
+        except Exception as e:
+            print(f"❌ Error loading model: {e}")
+            # Fallback to a smaller model if Mistral fails
+            self.setup_fallback_model()
+    def setup_fallback_model(self):
+        """Fallback to smaller model if Mistral fails"""
+        try:
+            model_name = "microsoft/DialoGPT-small"
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model = AutoModelForCausalLM.from_pretrained(model_name)
+            print("✅ Fallback model loaded")
+        except Exception as e:
+            print(f"❌ Fallback model failed: {e}")
+            self.model = None
+            self.tokenizer = None
+    def extract_text_from_file(self, file_path: str) -> str:
+        """Extract text from various file formats"""
+        try:
+            file_extension = os.path.splitext(file_path)[1].lower()
+            if file_extension == '.pdf':
+                return self.extract_from_pdf(file_path)
+            elif file_extension == '.docx':
+                return self.extract_from_docx(file_path)
+            elif file_extension == '.txt':
+                return self.extract_from_txt(file_path)
+            else:
+                return f"Unsupported file format: {file_extension}"
+        except Exception as e:
+            return f"Error reading file: {str(e)}"
+    def extract_from_pdf(self, file_path: str) -> str:
+        """Extract text from PDF"""
+        text = ""
+        try:
+            with open(file_path, 'rb') as file:
+                pdf_reader = PyPDF2.PdfReader(file)
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n"
+        except Exception as e:
+            text = f"Error reading PDF: {str(e)}"
+        return text
+    def extract_from_docx(self, file_path: str) -> str:
+        """Extract text from DOCX"""
+        try:
+            doc = docx.Document(file_path)
+            text = ""
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + "\n"
+            return text
+        except Exception as e:
+            return f"Error reading DOCX: {str(e)}"
+    def extract_from_txt(self, file_path: str) -> str:
+        """Extract text from TXT"""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as file:
+                return file.read()
+        except Exception as e:
+            try:
+                with open(file_path, 'r', encoding='latin-1') as file:
+                    return file.read()
+            except Exception as e2:
+                return f"Error reading TXT: {str(e2)}"
+    def chunk_text(self, text: str, chunk_size: int = 500, overlap: int = 50) -> List[str]:
+        """Split text into overlapping chunks"""
+        if not text.strip():
+            return []
+        words = text.split()
+        chunks = []
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk = ' '.join(words[i:i + chunk_size])
+            if chunk.strip():
+                chunks.append(chunk.strip())
+            if i + chunk_size >= len(words):
+                break
+        return chunks
+    def process_documents(self, files) -> str:
+        """Process uploaded files and create embeddings"""
+        if not files:
+            return "❌ No files uploaded!"
+        try:
+            all_text = ""
+            processed_files = []
+            # Extract text from all files
+            for file in files:
+                if file is None:
+                    continue
+                file_text = self.extract_text_from_file(file.name)
+                if not file_text.startswith("Error") and not file_text.startswith("Unsupported"):
+                    all_text += f"\n\n--- {os.path.basename(file.name)} ---\n\n{file_text}"
+                    processed_files.append(os.path.basename(file.name))
+                else:
+                    return f"❌ {file_text}"
+            if not all_text.strip():
+                return "❌ No text extracted from files!"
+            # Chunk the text
+            self.documents = self.chunk_text(all_text)
+            if not self.documents:
+                return "❌ No valid text chunks created!"
+            # Create embeddings
+            print(f"📄 Creating embeddings for {len(self.documents)} chunks...")
+            embeddings = self.embedder.encode(self.documents, show_progress_bar=True)
+            # Build FAISS index
+            dimension = embeddings.shape[1]
+            self.index = faiss.IndexFlatIP(dimension)
+            # Normalize embeddings for cosine similarity
+            faiss.normalize_L2(embeddings)
+            self.index.add(embeddings.astype('float32'))
+            self.is_indexed = True
+            return f"✅ Successfully processed {len(processed_files)} files:\n" + \
+                   f"📄 Files: {', '.join(processed_files)}\n" + \
+                   f"📊 Created {len(self.documents)} text chunks\n" + \
+                   f"🔍 Ready for Q&A!"
+        except Exception as e:
+            return f"❌ Error processing documents: {str(e)}"
+    def retrieve_context(self, query: str, k: int = 3) -> str:
+        """Retrieve relevant context for the query"""
+        if not self.is_indexed:
+            return ""
+        try:
+            # Get query embedding
+            query_embedding = self.embedder.encode([query])
+            faiss.normalize_L2(query_embedding)
+            # Search for similar chunks
+            scores, indices = self.index.search(query_embedding.astype('float32'), k)
+            # Get relevant documents
+            relevant_docs = []
+            for i, idx in enumerate(indices[0]):
+                if idx < len(self.documents) and scores[0][i] > 0.1:  # Similarity threshold
+                    relevant_docs.append(self.documents[idx])
+            return "\n\n".join(relevant_docs)
+        except Exception as e:
+            print(f"Error in retrieval: {e}")
+            return ""
+    def generate_answer(self, query: str, context: str) -> str:
+        """Generate answer using the LLM"""
+        if self.model is None or self.tokenizer is None:
+            return "❌ Model not available. Please try again."
+        try:
+            # Create prompt
+            prompt = f"""<s>[INST] Based on the following context, answer the question. If the answer is not in the context, say "I don't have enough information to answer this question."
+Context:
+{context[:2000]}  # Limit context length
+Question: {query}
+Answer: [/INST]"""
+            # Tokenize
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                max_length=1024,
+                truncation=True,
+                padding=True
+            )
+            # Generate
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=256,
+                    temperature=0.7,
+                    do_sample=True,
+                    top_p=0.9,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id
+                )
+            # Decode response
+            full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract answer (remove the prompt part)
+            if "[/INST]" in full_response:
+                answer = full_response.split("[/INST]")[-1].strip()
+            else:
+                answer = full_response[len(prompt):].strip()
+            return answer if answer else "I couldn't generate a proper response."
+        except Exception as e:
+            return f"❌ Error generating answer: {str(e)}"
+    def answer_question(self, query: str) -> str:
+        """Main function to answer questions"""
+        if not query.strip():
+            return "❓ Please ask a question!"
+        if not self.is_indexed:
+            return "📁 Please upload and process documents first!"
+        try:
+            # Retrieve relevant context
+            context = self.retrieve_context(query)
+            if not context:
+                return "🔍 No relevant information found in the uploaded documents."
+            # Generate answer
+            answer = self.generate_answer(query, context)
+            return f"💡 **Answer:** {answer}\n\n📄 **Source Context:** {context[:500]}..."
+        except Exception as e:
+            return f"❌ Error answering question: {str(e)}"
+# Initialize the RAG system
+print("Initializing Document RAG System...")
+rag_system = DocumentRAG()
+# Gradio Interface
+def create_interface():
+    with gr.Blocks(title="📚 Document Q&A with RAG", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 📚 Document Q&A System
+        Upload your documents and ask questions about them!
+        **Supported formats:** PDF, DOCX, TXT
+        """)
+        with gr.Tab("📤 Upload Documents"):
+            with gr.Row():
+                with gr.Column():
+                    file_upload = gr.File(
+                        label="Upload Documents",
+                        file_count="multiple",
+                        file_types=[".pdf", ".docx", ".txt"]
+                    )
+                    process_btn = gr.Button("🔄 Process Documents", variant="primary")
+                with gr.Column():
+                    process_status = gr.Textbox(
+                        label="Processing Status",
+                        lines=8,
+                        interactive=False
+                    )
+            process_btn.click(
+                fn=rag_system.process_documents,
+                inputs=[file_upload],
+                outputs=[process_status]
+            )
+        with gr.Tab("❓ Ask Questions"):
+            with gr.Row():
+                with gr.Column():
+                    question_input = gr.Textbox(
+                        label="Your Question",
+                        placeholder="What would you like to know about your documents?",
+                        lines=3
+                    )
+                    ask_btn = gr.Button("🔍 Get Answer", variant="primary")
+                with gr.Column():
+                    answer_output = gr.Textbox(
+                        label="Answer",
+                        lines=10,
+                        interactive=False
+                    )
+            ask_btn.click(
+                fn=rag_system.answer_question,
+                inputs=[question_input],
+                outputs=[answer_output]
+            )
+            # Example questions
+            gr.Markdown("""
+            ### 💡 Example Questions:
+            - What is the main topic of the document?
+            - Can you summarize the key points?
+            - What are the conclusions mentioned?
+            - Are there any specific numbers or statistics?
+            """)
+    return demo
+# Launch the app
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )