Spaces:

PEC-Hackathon
/

Lexicon_Chatbot

Sleeping

App Files Files Community

Harishkhawaja commited on May 18, 2025

Commit

288c00f

verified ·

1 Parent(s): 5347c8b

Create app.py

Browse files

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import gradio as gr
+import tempfile
+import fitz  # PyMuPDF
+from groq import Groq
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+from langchain.chains import RetrievalQA
+from langchain.llms.base import LLM
+from typing import List
+# Setup Groq client
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Custom LLM wrapper for Groq to plug into LangChain
+class GroqLLM(LLM):
+    model: str = "llama3-70b-8192"
+    def _call(self, prompt: str, stop: List[str] = None) -> str:
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content.strip()
+    @property
+    def _llm_type(self) -> str:
+        return "groq_llm"
+# Helper: PDF/Text Input
+def extract_text(file=None, clipboard=None):
+    if file:
+        doc = fitz.open(file.name)
+        return " ".join(page.get_text() for page in doc)
+    elif clipboard:
+        return clipboard
+    return ""
+# Preprocessing + Embeddings
+def process_text(input_text):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    texts = splitter.split_text(input_text)
+    docs = [Document(page_content=t) for t in texts]
+    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    db = FAISS.from_documents(docs, embeddings)
+    retriever = db.as_retriever()
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=GroqLLM(), retriever=retriever, return_source_documents=True
+    )
+    return qa_chain
+# Main RAG Pipeline
+def handle_input(file, clipboard, query):
+    raw_text = extract_text(file, clipboard)
+    if not raw_text:
+        return "Please provide either a PDF or clipboard text."
+    qa = process_text(raw_text)
+    result = qa.run(query if query else "Summarize the key points and risks in this policy.")
+    return result
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
+    with gr.Row():
+        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+        clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
+    query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What are the user-facing risks?")
+    submit_btn = gr.Button("🔍 Analyze")
+    output = gr.Textbox(label="Output", lines=15)
+    submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output)
+demo.launch()