Spaces:

ChatBotsTA
/

pdf

Runtime error

App Files Files Community

pdf / app.py

ChatBotsTA

Update app.py

01c52bf verified 5 months ago

raw

history blame contribute delete

20.5 kB

	"""
	PDF → Summary → Audio → Talk to PDF → Diagram
	- Summarization: Groq (LLaMA 3)
	- TTS: Deepgram (aura-asteria-en)
	- Talk to PDF: Groq chat completions
	- Diagram Generator: Stable Diffusion XL (Hugging Face Inference API)
	"""

	import os
	import tempfile
	import traceback
	import time
	from typing import List

	import requests
	import fitz # PyMuPDF
	import gradio as gr
	from groq import Groq

	# ================== Load API Keys ==================
	try:
	from google.colab import userdata
	if not os.environ.get("LLAMA"):
	val = userdata.get("LLAMA")
	if val: os.environ["LLAMA"] = val.strip()
	if not os.environ.get("DEEPGRAM"):
	val = userdata.get("DEEPGRAM")
	if val: os.environ["DEEPGRAM"] = val.strip()
	if not os.environ.get("HF_TOKEN"):
	val = userdata.get("HF_TOKEN")
	if val: os.environ["HF_TOKEN"] = val.strip()
	except Exception:
	pass

	# ================== Config ==================
	CHUNK_CHARS = 20000
	DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant"
	DEEPGRAM_TTS_MODEL = "aura-asteria-en"
	DEEPGRAM_ENCODING = "mp3"
	HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"

	# Global variable to store PDF text for Q&A
	pdf_text_storage = {"text": "", "processed": False}

	# ================== Utils ==================
	def extract_text_from_pdf(file_path: str) -> str:
	doc = fitz.open(file_path)
	text = "\n\n".join(page.get_text("text") for page in doc)
	doc.close()
	return text.strip()

	def chunk_text(text: str, max_chars: int) -> List[str]:
	if not text:
	return []
	parts, start, L = [], 0, len(text)
	while start < L:
	end = min(start + max_chars, L)
	if end < L:
	back = text.rfind("\n", start, end)
	if back == -1:
	back = text.rfind(" ", start, end)
	if back != -1 and back > start:
	end = back
	parts.append(text[start:end].strip())
	start = end
	return parts

	# ================== Groq Summarization ==================
	def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str:
	prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}"
	resp = groq_client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.2,
	max_tokens=800,
	)
	return resp.choices[0].message.content.strip()

	def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str:
	client = Groq(api_key=groq_api_key)
	if len(extracted_text) <= CHUNK_CHARS:
	return summarize_chunk_via_groq(extracted_text, client, groq_model)
	chunks = chunk_text(extracted_text, CHUNK_CHARS)
	summaries = []
	for ch in chunks:
	try:
	summaries.append(summarize_chunk_via_groq(ch, client, groq_model))
	except Exception as e:
	summaries.append(f"(error summarizing chunk: {str(e)})")
	final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
	resp = client.chat.completions.create(
	model=groq_model,
	messages=[{"role": "user", "content": final_prompt}],
	temperature=0.2,
	max_tokens=900,
	)
	return resp.choices[0].message.content.strip()

	# ================== Deepgram TTS ==================
	def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str:
	url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}"
	headers = {"Authorization": f"Token {deepgram_api_key}"}
	payload = {"text": summary_text}
	resp = requests.post(url, headers=headers, json=payload, timeout=120)
	if resp.status_code >= 400:
	raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}")
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}")
	tmp.write(resp.content)
	tmp.close()
	return tmp.name

	# ================== Talk to PDF (Separate Function) ==================
	def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str:
	if not pdf_text_storage["processed"]:
	return "❌ Please process a PDF first before asking questions!"

	if not question.strip():
	return "❌ Please enter a question!"

	if not groq_key.strip():
	return "❌ Please provide your Groq API key!"

	try:
	client = Groq(api_key=groq_key)
	prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific."
	resp = client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	temperature=0,
	max_tokens=500,
	)
	return f"🤖 {resp.choices[0].message.content.strip()}"
	except Exception as e:
	return f"❌ Error: {str(e)}"

	# ================== Diagram via HF (Fixed) ==================
	def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str:
	headers = {"Authorization": f"Bearer {hf_token}"}
	url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"

	prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}"
	payload = {"inputs": prompt}

	for attempt in range(max_retries):
	try:
	resp = requests.post(url, headers=headers, json=payload, timeout=60)

	if resp.status_code == 503:
	try:
	error_data = resp.json()
	if "loading" in error_data.get("error", "").lower():
	estimated_time = error_data.get("estimated_time", 20)
	time.sleep(estimated_time)
	continue
	except:
	pass

	if resp.status_code == 200:
	content_type = resp.headers.get('content-type', '')
	if 'image' in content_type or len(resp.content) > 1000:
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	tmp.write(resp.content)
	tmp.close()
	return tmp.name

	if attempt < max_retries - 1:
	wait_time = (attempt + 1) * 10
	time.sleep(wait_time)

	except requests.exceptions.RequestException as e:
	if attempt < max_retries - 1:
	time.sleep((attempt + 1) * 5)

	alternative_models = [
	"stabilityai/stable-diffusion-xl-base-1.0",
	"CompVis/stable-diffusion-v1-4"
	]

	for alt_model in alternative_models:
	try:
	alt_url = f"https://api-inference.huggingface.co/models/{alt_model}"
	resp = requests.post(alt_url, headers=headers, json=payload, timeout=60)

	if resp.status_code == 200:
	content_type = resp.headers.get('content-type', '')
	if 'image' in content_type or len(resp.content) > 1000:
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	tmp.write(resp.content)
	tmp.close()
	return tmp.name
	except Exception:
	continue

	return create_text_diagram_placeholder(summary)

	def create_text_diagram_placeholder(summary: str) -> str:
	try:
	from PIL import Image, ImageDraw, ImageFont

	width, height = 800, 600
	img = Image.new('RGB', (width, height), color='#0a0a0a')
	draw = ImageDraw.Draw(img)

	try:
	font = ImageFont.truetype("arial.ttf", 16)
	title_font = ImageFont.truetype("arial.ttf", 20)
	except:
	font = ImageFont.load_default()
	title_font = ImageFont.load_default()

	draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font)

	words = summary.split()
	lines = []
	current_line = []
	max_width = 45

	for word in words:
	if len(' '.join(current_line + [word])) <= max_width:
	current_line.append(word)
	else:
	if current_line:
	lines.append(' '.join(current_line))
	current_line = [word]
	if current_line:
	lines.append(' '.join(current_line))

	y_offset = 100
	for line in lines[:18]:
	draw.text((50, y_offset), line, fill='#ccffcc', font=font)
	y_offset += 25

	draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3)

	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	img.save(tmp.name, "PNG")
	tmp.close()
	return tmp.name

	except Exception:
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
	tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
	tmp.close()
	return tmp.name

	# ================== Main Pipeline ==================
	def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model):
	try:
	if not groq_key.strip():
	return "❌ Missing Groq API key!", None, None, "Process a PDF first!"
	if not deepgram_key.strip():
	return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!"
	if not hf_token.strip():
	return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!"
	if pdf_file is None:
	return "❌ Please upload a PDF file!", None, None, "Process a PDF first!"

	pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)

	# Extract and store text globally
	text = extract_text_from_pdf(pdf_path)
	if not text.strip():
	return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!"

	# Store text for Q&A
	pdf_text_storage["text"] = text
	pdf_text_storage["processed"] = True

	# Generate summary
	summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL)

	# Generate audio
	audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key)

	# Generate diagram
	diagram_path = generate_diagram(summary, hf_token)

	return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below."

	except Exception as e:
	pdf_text_storage["processed"] = False
	return f"❌ Error: {str(e)}", None, None, "Process a PDF first!"

	# ================== Gen-Z Dark Theme CSS ==================
	GENZ_CSS = """
	/* Main container styling */
	.gradio-container {
	background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important;
	color: #00ff88 !important;
	font-family: 'Segoe UI', 'Roboto', sans-serif !important;
	}

	body {
	background: #000000 !important;
	color: #00ff88 !important;
	}

	/* Input fields styling */
	input, textarea, .gradio-textbox, .gradio-file, select {
	background: linear-gradient(145deg, #111111, #1a1a1a) !important;
	color: #00ff88 !important;
	border: 2px solid #00ff88 !important;
	border-radius: 12px !important;
	box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important;
	transition: all 0.3s ease !important;
	}

	input:focus, textarea:focus, .gradio-textbox:focus {
	border-color: #00ff00 !important;
	box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important;
	transform: translateY(-2px) !important;
	}

	/* Button styling */
	button {
	background: linear-gradient(145deg, #00ff88, #00cc66) !important;
	color: #000000 !important;
	border: none !important;
	border-radius: 15px !important;
	font-weight: bold !important;
	text-transform: uppercase !important;
	letter-spacing: 1px !important;
	box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important;
	transition: all 0.3s ease !important;
	}

	button:hover {
	background: linear-gradient(145deg, #00cc66, #00ff88) !important;
	transform: translateY(-3px) !important;
	box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important;
	}

	button:active {
	transform: translateY(1px) !important;
	}

	/* Headers and text */
	h1, h2, h3, h4, .gradio-markdown {
	color: #00ff88 !important;
	text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important;
	}

	h1 {
	font-size: 2.5em !important;
	background: linear-gradient(45deg, #00ff88, #00cc66) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	}

	/* Tabs styling */
	.gradio-tab {
	background: linear-gradient(145deg, #111111, #1a1a1a) !important;
	color: #00ff88 !important;
	border: 2px solid #00ff88 !important;
	border-radius: 10px !important;
	}

	.gradio-tab.selected {
	background: linear-gradient(145deg, #00ff88, #00cc66) !important;
	color: #000000 !important;
	}

	/* Slider styling */
	.gradio-slider input[type="range"] {
	background: #00ff88 !important;
	}

	.gradio-slider .gradio-slider-track {
	background: #333333 !important;
	}

	.gradio-slider .gradio-slider-thumb {
	background: #00ff88 !important;
	border: 2px solid #00cc66 !important;
	}

	/* File upload area */
	.gradio-file {
	border: 3px dashed #00ff88 !important;
	background: rgba(0, 255, 136, 0.1) !important;
	border-radius: 15px !important;
	}

	/* Progress bar */
	.progress-bar {
	background: linear-gradient(90deg, #00ff88, #00cc66) !important;
	border-radius: 10px !important;
	}

	/* Accordion styling */
	.gradio-accordion {
	background: linear-gradient(145deg, #111111, #1a1a1a) !important;
	border: 2px solid #00ff88 !important;
	border-radius: 12px !important;
	}

	/* Scrollbar */
	::-webkit-scrollbar {
	width: 12px !important;
	}

	::-webkit-scrollbar-track {
	background: #111111 !important;
	}

	::-webkit-scrollbar-thumb {
	background: linear-gradient(145deg, #00ff88, #00cc66) !important;
	border-radius: 6px !important;
	}

	/* Glowing effects */
	.glow {
	box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important;
	}

	/* Custom animations */
	@keyframes pulse {
	0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
	50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); }
	100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
	}

	.pulse-effect {
	animation: pulse 2s infinite !important;
	}
	"""

	# ================== UI Build Function ==================
	def build_ui():
	env_groq = os.environ.get("LLAMA", "")
	env_deepgram = os.environ.get("DEEPGRAM", "")
	env_hf = os.environ.get("HF_TOKEN", "")

	with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo:

	# Header - Centered
	gr.Markdown("""
	<div style="text-align: center; margin: 20px 0;">
	<h1 style="font-size: 3.5em; margin-bottom: 10px;">🔥 AI PDF PROCESSOR</h1>
	<h2 style="font-size: 1.8em; margin-bottom: 10px;">Transform PDFs into Audio, Summaries & Interactive Q&A</h2>
	<h3 style="font-size: 1.2em; font-style: italic; opacity: 0.9;"> PEC COHORT 3</h3>
	</div>
	""", elem_classes=["pulse-effect"])

	with gr.Row():
	# Left Column - Upload & API Settings
	with gr.Column(scale=1):
	with gr.Accordion("📁 UPLOAD PDF", open=True):
	pdf_input = gr.File(
	label="Drop your PDF here",
	file_types=[".pdf"],
	height=150
	)

	with gr.Accordion("🔑 API KEYS", open=False):
	gr.Markdown("Keep your keys secure • Use env vars in production")
	groq_key = gr.Textbox(
	label="🤖 Groq API Key",
	value=env_groq,
	type="password",
	placeholder="sk-..."
	)
	deepgram_key = gr.Textbox(
	label="🎤 Deepgram API Key",
	value=env_deepgram,
	type="password",
	placeholder="Enter Deepgram key"
	)
	hf_key = gr.Textbox(
	label="🤗 HuggingFace Token",
	value=env_hf,
	type="password",
	placeholder="hf_..."
	)

	with gr.Accordion("⚙️ SETTINGS", open=False):
	groq_model = gr.Dropdown(
	label="🧠 AI Model",
	choices=[
	"llama-3.1-8b-instant",
	"llama-3.1-70b-versatile",
	"mixtral-8x7b-32768",
	"gemma2-9b-it"
	],
	value=DEFAULT_GROQ_MODEL
	)

	# Main Process Button
	process_btn = gr.Button(
	"🚀 PROCESS PDF",
	variant="primary",
	size="lg",
	elem_classes=["pulse-effect"]
	)

	# Right Column - Results
	with gr.Column(scale=2):
	with gr.Tabs():
	with gr.Tab("📝 SUMMARY"):
	summary_output = gr.Textbox(
	label="AI Generated Summary",
	lines=12,
	placeholder="Your PDF summary will appear here...",
	interactive=False
	)

	with gr.Tab("🔊 AUDIO"):
	audio_output = gr.Audio(
	label="Listen to Summary",
	type="filepath",
	interactive=False
	)

	with gr.Tab("🎨 DIAGRAM"):
	diagram_output = gr.Image(
	label="Visual Representation",
	interactive=False,
	height=400
	)

	# Separate Q&A Section
	gr.Markdown("---")
	gr.Markdown("## 💬 CHAT WITH YOUR PDF")

	with gr.Row():
	with gr.Column(scale=3):
	question_input = gr.Textbox(
	label="Ask anything about your PDF",
	placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...",
	lines=2
	)
	with gr.Column(scale=1):
	ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg")

	chat_output = gr.Textbox(
	label="🤖 AI Response",
	lines=8,
	placeholder="Upload and process a PDF first, then ask your questions!",
	interactive=False
	)

	# Status indicator
	status_output = gr.Textbox(
	label="📊 Status",
	value="Ready to process PDF...",
	interactive=False
	)

	# Footer
	gr.Markdown("""
	---
	🔥 Pro Tips:
	• Upload PDFs with extractable text (not image-only)
	• Questions work only after processing
	• Audio generation takes ~30-60 seconds
	• Diagrams may take longer depending on HF API load

	Built with ❤️ for the AI generation
	""")

	# Event handlers
	process_btn.click(
	fn=process_pdf_pipeline,
	inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model],
	outputs=[summary_output, audio_output, diagram_output, status_output],
	show_progress=True
	)

	ask_btn.click(
	fn=ask_pdf_question,
	inputs=[question_input, groq_key, groq_model],
	outputs=[chat_output],
	show_progress=False
	)

	# Enter key support for questions
	question_input.submit(
	fn=ask_pdf_question,
	inputs=[question_input, groq_key, groq_model],
	outputs=[chat_output]
	)

	return demo

	if __name__ == "__main__":
	demo = build_ui()
	demo.launch(
	share=True,
	debug=True,
	show_error=True
	)