Harishkhawaja commited on
Commit
288c00f
·
verified ·
1 Parent(s): 5347c8b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import tempfile
4
+ import fitz # PyMuPDF
5
+ from groq import Groq
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.docstore.document import Document
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.llms.base import LLM
12
+ from typing import List
13
+
14
+ # Setup Groq client
15
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
+
17
+ # Custom LLM wrapper for Groq to plug into LangChain
18
+ class GroqLLM(LLM):
19
+ model: str = "llama3-70b-8192"
20
+
21
+ def _call(self, prompt: str, stop: List[str] = None) -> str:
22
+ response = client.chat.completions.create(
23
+ model=self.model,
24
+ messages=[{"role": "user", "content": prompt}]
25
+ )
26
+ return response.choices[0].message.content.strip()
27
+
28
+ @property
29
+ def _llm_type(self) -> str:
30
+ return "groq_llm"
31
+
32
+ # Helper: PDF/Text Input
33
+ def extract_text(file=None, clipboard=None):
34
+ if file:
35
+ doc = fitz.open(file.name)
36
+ return " ".join(page.get_text() for page in doc)
37
+ elif clipboard:
38
+ return clipboard
39
+ return ""
40
+
41
+ # Preprocessing + Embeddings
42
+ def process_text(input_text):
43
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
44
+ texts = splitter.split_text(input_text)
45
+ docs = [Document(page_content=t) for t in texts]
46
+
47
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
48
+ db = FAISS.from_documents(docs, embeddings)
49
+
50
+ retriever = db.as_retriever()
51
+ qa_chain = RetrievalQA.from_chain_type(
52
+ llm=GroqLLM(), retriever=retriever, return_source_documents=True
53
+ )
54
+ return qa_chain
55
+
56
+ # Main RAG Pipeline
57
+ def handle_input(file, clipboard, query):
58
+ raw_text = extract_text(file, clipboard)
59
+ if not raw_text:
60
+ return "Please provide either a PDF or clipboard text."
61
+
62
+ qa = process_text(raw_text)
63
+ result = qa.run(query if query else "Summarize the key points and risks in this policy.")
64
+ return result
65
+
66
+ # Gradio UI
67
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
68
+ gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
69
+
70
+ with gr.Row():
71
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
72
+ clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
73
+
74
+ query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What are the user-facing risks?")
75
+ submit_btn = gr.Button("🔍 Analyze")
76
+ output = gr.Textbox(label="Output", lines=15)
77
+
78
+ submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output)
79
+
80
+ demo.launch()