Spaces:

MoslemBot
/

kajibuku

Sleeping

App Files Files Community

Bofandra commited on Jun 29, 2025

Commit

e5cb061

verified ·

1 Parent(s): a3f8edb

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -44

app.py CHANGED Viewed

@@ -1,68 +1,87 @@
 import os
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
-import faiss
-import torch
-from transformers import pipeline
-import gradio as gr
-# Load models
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # Replace with your preferred HF model
-generator = pipeline("text-generation", model=model_name, device=0 if torch.cuda.is_available() else -1)
-# Globals
-texts = []
-index = None
-def process_pdf(file):
-    global texts, index
     reader = PdfReader(file.name)
-    full_text = ""
-    for page in reader.pages:
-        full_text += page.extract_text() + "\n"
     chunks = [full_text[i:i+500] for i in range(0, len(full_text), 500)]
-    texts = chunks
     embeddings = embedder.encode(chunks)
-    index = faiss.IndexFlatL2(len(embeddings[0]))
     index.add(embeddings)
-    return "PDF processed. Ask me anything about it!"
-def chat_fn(message, history):
-    if index is None or not texts:
-        return "Please upload and process a PDF first."
-    q_embedding = embedder.encode([message])
-    D, I = index.search(q_embedding, k=3)
-    context = "\n".join([texts[i] for i in I[0]])
-    prompt = f"""You are a helpful assistant. Use the context to answer the question.
-Context:
-{context}
-Question:
-{message}
-Answer:"""
-    output = generator(prompt, max_new_tokens=300, do_sample=True)[0]["generated_text"]
-    answer = output.split("Answer:")[-1].strip()
-    return answer
 with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 PDF ChatBot - Ask Anything from Your Document")
-    with gr.Row():
-        file = gr.File(file_types=[".pdf"], label="Upload PDF")
-        status = gr.Textbox(label="Status", interactive=False)
-        upload_btn = gr.Button("Process PDF")
-    upload_btn.click(fn=process_pdf, inputs=file, outputs=status)
-    chatbot = gr.ChatInterface(chat_fn)
 demo.launch()

 import os
+import gradio as gr
+import faiss
+import pickle
 from PyPDF2 import PdfReader
 from sentence_transformers import SentenceTransformer
+from huggingface_hub import InferenceClient
+# Initialize embedder and LLM client
+embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+llm = InferenceClient("google/gemma-7b-it", token=os.getenv("HF_TOKEN"))  # Or any other model you prefer
+DATA_DIR = "data"
+os.makedirs(DATA_DIR, exist_ok=True)
+# Save uploaded PDF and index its content
+def save_pdf(file, title):
+    folder = os.path.join(DATA_DIR, title.strip())
+    if os.path.exists(folder):
+        return f"'{title}' already exists. Use a different title."
+    os.makedirs(folder, exist_ok=True)
+    # Extract text
     reader = PdfReader(file.name)
+    full_text = "\n".join(p.extract_text() for p in reader.pages if p.extract_text())
+    # Chunk text
     chunks = [full_text[i:i+500] for i in range(0, len(full_text), 500)]
+    # Embed and index
     embeddings = embedder.encode(chunks)
+    index = faiss.IndexFlatL2(embeddings.shape[1])
     index.add(embeddings)
+    # Save index and chunks
+    faiss.write_index(index, os.path.join(folder, "index.faiss"))
+    with open(os.path.join(folder, "chunks.pkl"), "wb") as f:
+        pickle.dump(chunks, f)
+    return f"Saved and indexed '{title}'."
+# Return all available PDF titles
+def list_titles():
+    return [d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))]
+# Ask question using selected PDFs as context
+def ask_question(message, history, selected_titles):
+    if not selected_titles:
+        return "❗ Please select at least one PDF."
+    combined_answer = ""
+    for title in selected_titles:
+        folder = os.path.join(DATA_DIR, title)
+        try:
+            index = faiss.read_index(os.path.join(folder, "index.faiss"))
+            with open(os.path.join(folder, "chunks.pkl"), "rb") as f:
+                chunks = pickle.load(f)
+            q_embed = embedder.encode([message])
+            D, I = index.search(q_embed, k=3)
+            context = "\n".join([chunks[i] for i in I[0]])
+            prompt = f"Context:\n{context}\n\nQuestion: {message}\nAnswer:"
+            response = llm.text_generation(prompt, max_new_tokens=200)
+            combined_answer += f"**{title}**:\n{response.strip()}\n\n"
+        except Exception as e:
+            combined_answer += f"⚠️ Error with {title}: {str(e)}\n\n"
+    return combined_answer.strip()
+# Gradio UI
 with gr.Blocks() as demo:
+    with gr.Tab("📤 Upload PDF"):
+        file = gr.File(label="PDF File")
+        title = gr.Textbox(label="Title for PDF")
+        upload_btn = gr.Button("Upload and Index")
+        upload_status = gr.Textbox(label="Status")
+        upload_btn.click(fn=save_pdf, inputs=[file, title], outputs=upload_status)
+    with gr.Tab("💬 Chat with PDFs"):
+        pdf_selector = gr.CheckboxGroup(label="Select PDFs", choices=list_titles())
+        refresh_btn = gr.Button("🔄 Refresh PDF List")
+        refresh_btn.click(fn=list_titles, outputs=pdf_selector)
+        chat = gr.ChatInterface(fn=ask_question, additional_inputs=[pdf_selector])
 demo.launch()