Spaces:

PEC-Hackathon
/

Lexicon_Chatbot

Sleeping

App Files Files Community

Harishkhawaja commited on May 18, 2025

Commit

44e950f

verified ·

1 Parent(s): 81c4f1a

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -28

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import gradio as gr
-import tempfile
 import fitz  # PyMuPDF
 from groq import Groq
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
@@ -9,36 +10,46 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
 from langchain.chains import RetrievalQA
 from langchain.llms.base import LLM
-from typing import List
-# Setup Groq client
 client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
-# Custom LLM wrapper for Groq to plug into LangChain
 class GroqLLM(LLM):
     model: str = "llama3-70b-8192"
     def _call(self, prompt: str, stop: List[str] = None) -> str:
-        response = client.chat.completions.create(
-            model=self.model,
-            messages=[{"role": "user", "content": prompt}]
-        )
-        return response.choices[0].message.content.strip()
     @property
     def _llm_type(self) -> str:
         return "groq_llm"
-# Helper: PDF/Text Input
 def extract_text(file=None, clipboard=None):
-    if file:
-        doc = fitz.open(file.name)
-        return " ".join(page.get_text() for page in doc)
-    elif clipboard:
-        return clipboard
     return ""
-# Preprocessing + Embeddings
 def process_text(input_text):
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     texts = splitter.split_text(input_text)
@@ -53,25 +64,29 @@ def process_text(input_text):
     )
     return qa_chain
-# Main RAG Pipeline
 def handle_input(file, clipboard, query):
-    raw_text = extract_text(file, clipboard)
-    if not raw_text:
-        return "Please provide either a PDF or clipboard text."
-    qa = process_text(raw_text)
-    result = qa.run(query if query else "Summarize the key points and risks in this policy.")
-    return result
-# Gradio UI
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
     with gr.Row():
         file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
-    query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What are the user-facing risks?")
     submit_btn = gr.Button("🔍 Analyze")
     output = gr.Textbox(label="Output", lines=15)

 import os
 import gradio as gr
 import fitz  # PyMuPDF
+from typing import List
 from groq import Groq
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain.docstore.document import Document
 from langchain.chains import RetrievalQA
 from langchain.llms.base import LLM
+# === Groq Client Setup ===
 client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# === Custom LLM Wrapper ===
 class GroqLLM(LLM):
     model: str = "llama3-70b-8192"
+    def __init__(self, model: str = None):
+        super().__init__()
+        if model:
+            self.model = model
     def _call(self, prompt: str, stop: List[str] = None) -> str:
+        try:
+            response = client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}]
+            )
+            return response.choices[0].message.content.strip()
+        except Exception as e:
+            return f"[Groq API Error] {str(e)}"
     @property
     def _llm_type(self) -> str:
         return "groq_llm"
+# === Input Extraction ===
 def extract_text(file=None, clipboard=None):
+    try:
+        if file:
+            doc = fitz.open(file.name)
+            return " ".join(page.get_text() for page in doc)
+        elif clipboard:
+            return clipboard
+    except Exception as e:
+        return f"[Extract Error] {str(e)}"
     return ""
+# === Preprocessing & Vector Store Setup ===
 def process_text(input_text):
     splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
     texts = splitter.split_text(input_text)
     )
     return qa_chain
+# === Core RAG Handler ===
 def handle_input(file, clipboard, query):
+    try:
+        raw_text = extract_text(file, clipboard)
+        if not raw_text or raw_text.strip() == "":
+            return "⚠️ Please provide either a PDF or some clipboard text."
+        qa = process_text(raw_text)
+        prompt = query if query else "Summarize the key points and user-facing risks in this policy."
+        result = qa.run(prompt)
+        return result
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# === Gradio UI ===
+with gr.Blocks(theme=gr.themes.Soft(), show_error=True) as demo:
     gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
     with gr.Row():
         file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
+    query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?")
     submit_btn = gr.Button("🔍 Analyze")
     output = gr.Textbox(label="Output", lines=15)