Harishkhawaja commited on
Commit
44e950f
·
verified ·
1 Parent(s): 81c4f1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -28
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  import gradio as gr
3
- import tempfile
4
  import fitz # PyMuPDF
 
 
5
  from groq import Groq
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
@@ -9,36 +10,46 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.docstore.document import Document
10
  from langchain.chains import RetrievalQA
11
  from langchain.llms.base import LLM
12
- from typing import List
13
 
14
- # Setup Groq client
15
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
 
17
- # Custom LLM wrapper for Groq to plug into LangChain
18
  class GroqLLM(LLM):
19
  model: str = "llama3-70b-8192"
20
 
 
 
 
 
 
21
  def _call(self, prompt: str, stop: List[str] = None) -> str:
22
- response = client.chat.completions.create(
23
- model=self.model,
24
- messages=[{"role": "user", "content": prompt}]
25
- )
26
- return response.choices[0].message.content.strip()
 
 
 
27
 
28
  @property
29
  def _llm_type(self) -> str:
30
  return "groq_llm"
31
 
32
- # Helper: PDF/Text Input
33
  def extract_text(file=None, clipboard=None):
34
- if file:
35
- doc = fitz.open(file.name)
36
- return " ".join(page.get_text() for page in doc)
37
- elif clipboard:
38
- return clipboard
 
 
 
39
  return ""
40
 
41
- # Preprocessing + Embeddings
42
  def process_text(input_text):
43
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
44
  texts = splitter.split_text(input_text)
@@ -53,25 +64,29 @@ def process_text(input_text):
53
  )
54
  return qa_chain
55
 
56
- # Main RAG Pipeline
57
  def handle_input(file, clipboard, query):
58
- raw_text = extract_text(file, clipboard)
59
- if not raw_text:
60
- return "Please provide either a PDF or clipboard text."
61
-
62
- qa = process_text(raw_text)
63
- result = qa.run(query if query else "Summarize the key points and risks in this policy.")
64
- return result
65
-
66
- # Gradio UI
67
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
 
68
  gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
69
 
70
  with gr.Row():
71
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
72
  clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
73
 
74
- query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What are the user-facing risks?")
75
  submit_btn = gr.Button("🔍 Analyze")
76
  output = gr.Textbox(label="Output", lines=15)
77
 
 
1
  import os
2
  import gradio as gr
 
3
  import fitz # PyMuPDF
4
+ from typing import List
5
+
6
  from groq import Groq
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
 
10
  from langchain.docstore.document import Document
11
  from langchain.chains import RetrievalQA
12
  from langchain.llms.base import LLM
 
13
 
14
+ # === Groq Client Setup ===
15
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
 
17
+ # === Custom LLM Wrapper ===
18
  class GroqLLM(LLM):
19
  model: str = "llama3-70b-8192"
20
 
21
+ def __init__(self, model: str = None):
22
+ super().__init__()
23
+ if model:
24
+ self.model = model
25
+
26
  def _call(self, prompt: str, stop: List[str] = None) -> str:
27
+ try:
28
+ response = client.chat.completions.create(
29
+ model=self.model,
30
+ messages=[{"role": "user", "content": prompt}]
31
+ )
32
+ return response.choices[0].message.content.strip()
33
+ except Exception as e:
34
+ return f"[Groq API Error] {str(e)}"
35
 
36
  @property
37
  def _llm_type(self) -> str:
38
  return "groq_llm"
39
 
40
+ # === Input Extraction ===
41
  def extract_text(file=None, clipboard=None):
42
+ try:
43
+ if file:
44
+ doc = fitz.open(file.name)
45
+ return " ".join(page.get_text() for page in doc)
46
+ elif clipboard:
47
+ return clipboard
48
+ except Exception as e:
49
+ return f"[Extract Error] {str(e)}"
50
  return ""
51
 
52
+ # === Preprocessing & Vector Store Setup ===
53
  def process_text(input_text):
54
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
55
  texts = splitter.split_text(input_text)
 
64
  )
65
  return qa_chain
66
 
67
+ # === Core RAG Handler ===
68
  def handle_input(file, clipboard, query):
69
+ try:
70
+ raw_text = extract_text(file, clipboard)
71
+ if not raw_text or raw_text.strip() == "":
72
+ return "⚠️ Please provide either a PDF or some clipboard text."
73
+
74
+ qa = process_text(raw_text)
75
+ prompt = query if query else "Summarize the key points and user-facing risks in this policy."
76
+ result = qa.run(prompt)
77
+ return result
78
+ except Exception as e:
79
+ return f"❌ Error: {str(e)}"
80
+
81
+ # === Gradio UI ===
82
+ with gr.Blocks(theme=gr.themes.Soft(), show_error=True) as demo:
83
  gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
84
 
85
  with gr.Row():
86
  file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
87
  clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
88
 
89
+ query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?")
90
  submit_btn = gr.Button("🔍 Analyze")
91
  output = gr.Textbox(label="Output", lines=15)
92