Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from typing import List | |
| from groq import Groq | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.docstore.document import Document | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms.base import LLM | |
| # === Groq Client Setup === | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # === Custom LLM Wrapper === | |
| class GroqLLM(LLM): | |
| model: str = "llama3-70b-8192" | |
| def __init__(self, model: str = None): | |
| super().__init__() | |
| if model: | |
| self.model = model | |
| def _call(self, prompt: str, stop: List[str] = None) -> str: | |
| try: | |
| response = client.chat.completions.create( | |
| model=self.model, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"[Groq API Error] {str(e)}" | |
| def _llm_type(self) -> str: | |
| return "groq_llm" | |
| # === Input Extraction === | |
| def extract_text(file=None, clipboard=None): | |
| try: | |
| if file: | |
| doc = fitz.open(file.name) | |
| return " ".join(page.get_text() for page in doc) | |
| elif clipboard: | |
| return clipboard | |
| except Exception as e: | |
| return f"[Extract Error] {str(e)}" | |
| return "" | |
| # === Preprocessing & Vector Store Setup === | |
| def process_text(input_text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| texts = splitter.split_text(input_text) | |
| docs = [Document(page_content=t) for t in texts] | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(docs, embeddings) | |
| retriever = db.as_retriever() | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=GroqLLM(), retriever=retriever, return_source_documents=True | |
| ) | |
| return qa_chain | |
| # === Core RAG Handler === | |
| def handle_input(file, clipboard, query): | |
| try: | |
| raw_text = extract_text(file, clipboard) | |
| if not raw_text or raw_text.strip() == "": | |
| return "⚠️ Please provide either a PDF or some clipboard text." | |
| qa = process_text(raw_text) | |
| prompt = query if query else "Summarize the key points and user-facing risks in this policy." | |
| result = qa.run(prompt) | |
| return result | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| # === Gradio UI === | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10) | |
| query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?") | |
| submit_btn = gr.Button("🔍 Analyze") | |
| output = gr.Textbox(label="Output", lines=15) | |
| submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output) | |
| demo.launch() | |