Spaces:

PEC-Hackathon
/

Lexicon_Chatbot

Sleeping

App Files Files Community

Lexicon_Chatbot / app.py

Harishkhawaja

Update app.py

15346ff verified 9 months ago

raw

history blame contribute delete

3.62 kB

	import os
	import gradio as gr
	import fitz # PyMuPDF
	from typing import List

	from groq import Groq
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document
	from langchain.chains import RetrievalQA
	from langchain.llms.base import LLM

	# === Groq Client Setup ===
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# === Custom LLM Wrapper ===
	class GroqLLM(LLM):
	model: str = "llama3-70b-8192"

	def __init__(self, model: str = None):
	super().__init__()
	if model:
	self.model = model

	def _call(self, prompt: str, stop: List[str] = None) -> str:
	try:
	response = client.chat.completions.create(
	model=self.model,
	messages=[{"role": "user", "content": prompt}]
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"[Groq API Error] {str(e)}"

	@property
	def _llm_type(self) -> str:
	return "groq_llm"

	# === Input Extraction ===
	def extract_text(file=None, clipboard=None):
	try:
	if file:
	doc = fitz.open(file.name)
	return " ".join(page.get_text() for page in doc)
	elif clipboard:
	return clipboard
	except Exception as e:
	return f"[Extract Error] {str(e)}"
	return ""

	# === Preprocessing & Vector Store Setup ===
	def process_text(input_text):
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	texts = splitter.split_text(input_text)
	docs = [Document(page_content=t) for t in texts]

	embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
	db = FAISS.from_documents(docs, embeddings)

	retriever = db.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(
	llm=GroqLLM(), retriever=retriever, return_source_documents=True
	)
	return qa_chain

	# === Core RAG Handler ===
	def handle_input(file, clipboard, query):
	try:
	raw_text = extract_text(file, clipboard)
	if not raw_text:
	return "Please provide either a PDF or clipboard text."

	# Your prompt for explanation + risks
	default_query = (
	"Explain this policy in simple terms and highlight the risks for the user. "
	"Provide bullet points for risks."
	)
	user_query = query if query else default_query

	qa = process_text(raw_text)
	response = qa.invoke({"query": user_query})

	result = response["result"]
	sources = response["source_documents"]
	source_preview = "\n\n📄 Sources:\n" + "\n---\n".join(
	[doc.page_content[:300] + "..." for doc in sources[:3]]
	)

	return result + source_preview

	except Exception as e:
	return f"❌ Error: {str(e)}"

	# === Gradio UI ===
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🤖 Lexicon: Your Policy Explainer Bot")

	with gr.Row():
	file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	clipboard_input = gr.Textbox(label="Or Paste Text", placeholder="Paste policy text here", lines=10)

	query_input = gr.Textbox(label="Ask a Question (optional)", placeholder="e.g., What risks am I agreeing to?")
	submit_btn = gr.Button("🔍 Analyze")
	output = gr.Textbox(label="Output", lines=20)

	submit_btn.click(fn=handle_input, inputs=[file_input, clipboard_input, query_input], outputs=output)

	demo.launch()