Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Sleeping

App Files Files Community

pradeepsengarr commited on Jun 7, 2025

Commit

841e2b8

verified ·

1 Parent(s): c33b536

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -35

app.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import os
 import gradio as gr
-import fitz
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer, pipeline
-from transformers import BitsAndBytesConfig, AutoModelForCausalLM
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from huggingface_hub import login
 hf_token = os.environ.get("HUGGINGFACE_TOKEN")
 if not hf_token:
     raise ValueError("Hugging Face token not found.")
@@ -17,28 +18,21 @@ login(token=hf_token)
 # Load embedding model
 embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
-# Load quantized Mistral with 8-bit
-model_id = "mistralai/Mistral-7B-Instruct-v0.1"
-bnb_config = BitsAndBytesConfig(
-    load_in_8bit=True,
-    llm_int8_threshold=6.0,
-    llm_int8_skip_modules=None,
-)
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
-model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    quantization_config=bnb_config,
-    device_map="auto",
-    token=hf_token
 )
-llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
-# State
 index = None
 doc_texts = []
-# Extract text
 def extract_text(file):
     try:
         text = ""
@@ -56,7 +50,7 @@ def extract_text(file):
     except Exception as e:
         return f"❌ Error extracting text: {e}"
-# Process file
 def process_file(file):
     global index, doc_texts
     try:
@@ -64,25 +58,23 @@ def process_file(file):
         if text.startswith("❌"):
             return text
-        # Trim large documents
-        text = text[:15000]
         splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
         doc_texts = splitter.split_text(text)
         if not doc_texts:
-            return "❌ Could not split document."
         embeddings = embed_model.encode(doc_texts, convert_to_numpy=True)
         dim = embeddings.shape[1]
         index = faiss.IndexFlatL2(dim)
         index.add(embeddings)
-        return "✅ Document processed. You may ask your question below."
     except Exception as e:
         return f"❌ Error processing file: {e}"
-# Answer generator
 def generate_answer(question):
     global index, doc_texts
     try:
@@ -94,31 +86,40 @@ def generate_answer(question):
         context = "\n".join([doc_texts[i] for i in I[0]])
         prompt = (
-            f"You are a helpful assistant. Use the context below to answer the question clearly.\n\n"
             f"Context:\n{context}\n\n"
             f"Question: {question}\n\n"
             f"Answer:"
         )
-        result = llm(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
-        return result[0]["generated_text"].split("Answer:")[-1].strip()
     except Exception as e:
         return f"❌ Error generating answer: {e}"
 # Gradio UI
-with gr.Blocks(title="📄 Document Q&A Assistant") as demo:
-    gr.Markdown("<h1 style='text-align: center;'>📄 Document AI Assistant</h1>")
-    gr.Markdown("Upload a PDF or TXT file, and ask questions about its content. The assistant uses Mistral 7B (quantized) for reasoning.")
     with gr.Row():
-        file_input = gr.File(label="Upload PDF or TXT", file_types=[".pdf", ".txt"])
         upload_output = gr.Textbox(label="Upload Status")
     with gr.Row():
-        question_input = gr.Textbox(label="Ask a Question", placeholder="e.g. What is the summary?")
         answer_output = gr.Textbox(label="Answer")
     file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
     question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)
-demo.launch(show_error=True, share=False)

 import os
 import gradio as gr
+import fitz  # PyMuPDF
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer
+from auto_gptq import AutoGPTQForCausalLM
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from huggingface_hub import login
+# Authenticate
 hf_token = os.environ.get("HUGGINGFACE_TOKEN")
 if not hf_token:
     raise ValueError("Hugging Face token not found.")
 # Load embedding model
 embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
+# Load 4-bit quantized Mistral model
+model_id = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+model = AutoGPTQForCausalLM.from_quantized(
     model_id,
+    use_safetensors=True,
+    trust_remote_code=True,
+    device_map="auto"
 )
+# Internal state
 index = None
 doc_texts = []
+# PDF/TXT text extraction
 def extract_text(file):
     try:
         text = ""
     except Exception as e:
         return f"❌ Error extracting text: {e}"
+# Preprocess and embed
 def process_file(file):
     global index, doc_texts
     try:
         if text.startswith("❌"):
             return text
+        text = text[:15000]  # Limit size
         splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
         doc_texts = splitter.split_text(text)
         if not doc_texts:
+            return "❌ Document could not be split."
         embeddings = embed_model.encode(doc_texts, convert_to_numpy=True)
         dim = embeddings.shape[1]
         index = faiss.IndexFlatL2(dim)
         index.add(embeddings)
+        return "✅ Document processed. Ask your question below."
     except Exception as e:
         return f"❌ Error processing file: {e}"
+# Generate answer using context
 def generate_answer(question):
     global index, doc_texts
     try:
         context = "\n".join([doc_texts[i] for i in I[0]])
         prompt = (
+            f"You are a helpful assistant. Use the context below to answer clearly.\n\n"
             f"Context:\n{context}\n\n"
             f"Question: {question}\n\n"
             f"Answer:"
         )
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        output = model.generate(
+            **inputs,
+            max_new_tokens=150,
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        answer = tokenizer.decode(output[0], skip_special_tokens=True)
+        return answer.split("Answer:")[-1].strip()
     except Exception as e:
         return f"❌ Error generating answer: {e}"
 # Gradio UI
+with gr.Blocks(title="📄 Document Q&A (Mistral 4-bit)") as demo:
+    gr.Markdown("<h1 style='text-align: center;'>📄 Document Q&A with Mistral 4-bit</h1>")
+    gr.Markdown("Upload a PDF or TXT and ask questions. Powered by Mistral-7B GPTQ.")
     with gr.Row():
+        file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt"])
         upload_output = gr.Textbox(label="Upload Status")
     with gr.Row():
+        question_input = gr.Textbox(label="Ask a Question", placeholder="e.g. What is this document about?")
         answer_output = gr.Textbox(label="Answer")
     file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
     question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)
+demo.launch(show_error=True)