Spaces:

PEC-Hackathon
/

Lexicon_Chatbot

Sleeping

App Files Files Community

Lexicon_Chatbot / app.py

Harishkhawaja

Update app.py

2be46a3 verified 11 months ago

raw

history blame

3.22 kB

	import os
	import gradio as gr
	import fitz # PyMuPDF
	from typing import List

	from groq import Groq
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document
	from langchain.chains import RetrievalQA
	from langchain.llms.base import LLM

	# === Groq Client Setup ===
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# === Custom LLM Wrapper ===
	class GroqLLM(LLM):
	model: str = "llama3-70b-8192"

	def __init__(self, model: str = None):
	super().__init__()
	if model:
	self.model = model

	def _call(self, prompt: str, stop: List[str] = None) -> str:
	try:
	response = client.chat.completions.create(
	model=self.model,
	messages=[{"role": "user", "content": prompt}]
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"[Groq API Error] {str(e)}"

	@property
	def _llm_type(self) -> str:
	return "groq_llm"

	# === Input Extraction ===
	def extract_text(file=None, clipboard=None):
	try:
	if file:
	doc = fitz.open(file.name)
	return " ".join(page.get_text() for page in doc)
	elif clipboard:
	return clipboard
	except Exception as e:
	return f"[Extract Error] {str(e)}"
	return ""

	# === Preprocessing & Vector Store Setup ===
	def process_text(input_text):
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	texts = splitter.split_text(input_text)
	docs = [Document(page_content=t) for t in texts]

	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	db = FAISS.from_documents(docs, embeddings)

	retriever = db.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(
	llm=GroqLLM(), retriever=retriever, return_source_documents=True
	)
	return qa_chain

	# === Core RAG Handler ===
	def handle_input(file, clipboard, query):
	try:
	raw_text = extract_text(file, clipboard)
	if not raw_text or raw_text.strip() == "":
	return "⚠️ Please provide either a PDF or some clipboard text."

	qa = process_text(raw_text)
	prompt = query if query else "Summarize the key points and user-facing risks in this policy."
	result = qa.run(prompt)
	return result
	except Exception as e:
	return f"❌ Error: {str(e)}"

	# === Gradio UI ===
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")

	with gr.Row():
	file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)

	query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?")
	submit_btn = gr.Button("🔍 Analyze")
	output = gr.Textbox(label="Output", lines=15)

	submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output)

	demo.launch()