Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from typing import List | |
| from groq import Groq | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.docstore.document import Document | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms.base import LLM | |
| # === Groq Client Setup === | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| # === Custom LLM Wrapper === | |
| class GroqLLM(LLM): | |
| model: str = "llama3-70b-8192" | |
| def __init__(self, model: str = None): | |
| super().__init__() | |
| if model: | |
| self.model = model | |
| def _call(self, prompt: str, stop: List[str] = None) -> str: | |
| try: | |
| response = client.chat.completions.create( | |
| model=self.model, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"[Groq API Error] {str(e)}" | |
| def _llm_type(self) -> str: | |
| return "groq_llm" | |
| # === Input Extraction === | |
| def extract_text(file=None, clipboard=None): | |
| try: | |
| if file: | |
| doc = fitz.open(file.name) | |
| return " ".join(page.get_text() for page in doc) | |
| elif clipboard: | |
| return clipboard | |
| except Exception as e: | |
| return f"[Extract Error] {str(e)}" | |
| return "" | |
| # === Preprocessing & Vector Store Setup === | |
| def process_text(input_text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| texts = splitter.split_text(input_text) | |
| docs = [Document(page_content=t) for t in texts] | |
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(docs, embeddings) | |
| retriever = db.as_retriever() | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=GroqLLM(), retriever=retriever, return_source_documents=True | |
| ) | |
| return qa_chain | |
| # === Core RAG Handler === | |
| def handle_input(file, clipboard, query): | |
| try: | |
| raw_text = extract_text(file, clipboard) | |
| if not raw_text: | |
| return "Please provide either a PDF or clipboard text." | |
| # Your prompt for explanation + risks | |
| default_query = ( | |
| "Explain this policy in simple terms and highlight the risks for the user. " | |
| "Provide bullet points for risks." | |
| ) | |
| user_query = query if query else default_query | |
| qa = process_text(raw_text) | |
| response = qa.invoke({"query": user_query}) | |
| result = response["result"] | |
| sources = response["source_documents"] | |
| source_preview = "\n\nπ Sources:\n" + "\n---\n".join( | |
| [doc.page_content[:300] + "..." for doc in sources[:3]] | |
| ) | |
| return result + source_preview | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # === Gradio UI === | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π€ Lexicon: Your Policy Explainer Bot") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10) | |
| query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?") | |
| submit_btn = gr.Button("π Analyze") | |
| output = gr.Textbox(label="Output", lines=20) | |
| submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output) | |
| demo.launch() | |