Spaces:
Sleeping
Sleeping
File size: 3,617 Bytes
288c00f 44e950f 288c00f b18e082 81c4f1a 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f 44e950f 288c00f bdc2edb 15346ff bdc2edb 15346ff bdc2edb 15346ff bdc2edb 15346ff 44e950f 2be46a3 288c00f 44e950f 288c00f bdc2edb 288c00f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import gradio as gr
import fitz # PyMuPDF
from typing import List
from groq import Groq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chains import RetrievalQA
from langchain.llms.base import LLM
# === Groq Client Setup ===
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# === Custom LLM Wrapper ===
class GroqLLM(LLM):
model: str = "llama3-70b-8192"
def __init__(self, model: str = None):
super().__init__()
if model:
self.model = model
def _call(self, prompt: str, stop: List[str] = None) -> str:
try:
response = client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"[Groq API Error] {str(e)}"
@property
def _llm_type(self) -> str:
return "groq_llm"
# === Input Extraction ===
def extract_text(file=None, clipboard=None):
try:
if file:
doc = fitz.open(file.name)
return " ".join(page.get_text() for page in doc)
elif clipboard:
return clipboard
except Exception as e:
return f"[Extract Error] {str(e)}"
return ""
# === Preprocessing & Vector Store Setup ===
def process_text(input_text):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = splitter.split_text(input_text)
docs = [Document(page_content=t) for t in texts]
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embeddings)
retriever = db.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
llm=GroqLLM(), retriever=retriever, return_source_documents=True
)
return qa_chain
# === Core RAG Handler ===
def handle_input(file, clipboard, query):
try:
raw_text = extract_text(file, clipboard)
if not raw_text:
return "Please provide either a PDF or clipboard text."
# Your prompt for explanation + risks
default_query = (
"Explain this policy in simple terms and highlight the risks for the user. "
"Provide bullet points for risks."
)
user_query = query if query else default_query
qa = process_text(raw_text)
response = qa.invoke({"query": user_query})
result = response["result"]
sources = response["source_documents"]
source_preview = "\n\n📄 Sources:\n" + "\n---\n".join(
[doc.page_content[:300] + "..." for doc in sources[:3]]
)
return result + source_preview
except Exception as e:
return f"❌ Error: {str(e)}"
# === Gradio UI ===
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)
query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?")
submit_btn = gr.Button("🔍 Analyze")
output = gr.Textbox(label="Output", lines=20)
submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output)
demo.launch()
|