pdf / app.py
ChatBotsTA's picture
Update app.py
01c52bf verified
"""
PDF → Summary → Audio → Talk to PDF → Diagram
- Summarization: Groq (LLaMA 3)
- TTS: Deepgram (aura-asteria-en)
- Talk to PDF: Groq chat completions
- Diagram Generator: Stable Diffusion XL (Hugging Face Inference API)
"""
import os
import tempfile
import traceback
import time
from typing import List
import requests
import fitz # PyMuPDF
import gradio as gr
from groq import Groq
# ================== Load API Keys ==================
try:
from google.colab import userdata
if not os.environ.get("LLAMA"):
val = userdata.get("LLAMA")
if val: os.environ["LLAMA"] = val.strip()
if not os.environ.get("DEEPGRAM"):
val = userdata.get("DEEPGRAM")
if val: os.environ["DEEPGRAM"] = val.strip()
if not os.environ.get("HF_TOKEN"):
val = userdata.get("HF_TOKEN")
if val: os.environ["HF_TOKEN"] = val.strip()
except Exception:
pass
# ================== Config ==================
CHUNK_CHARS = 20000
DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant"
DEEPGRAM_TTS_MODEL = "aura-asteria-en"
DEEPGRAM_ENCODING = "mp3"
HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"
# Global variable to store PDF text for Q&A
pdf_text_storage = {"text": "", "processed": False}
# ================== Utils ==================
def extract_text_from_pdf(file_path: str) -> str:
doc = fitz.open(file_path)
text = "\n\n".join(page.get_text("text") for page in doc)
doc.close()
return text.strip()
def chunk_text(text: str, max_chars: int) -> List[str]:
if not text:
return []
parts, start, L = [], 0, len(text)
while start < L:
end = min(start + max_chars, L)
if end < L:
back = text.rfind("\n", start, end)
if back == -1:
back = text.rfind(" ", start, end)
if back != -1 and back > start:
end = back
parts.append(text[start:end].strip())
start = end
return parts
# ================== Groq Summarization ==================
def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str:
prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}"
resp = groq_client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
max_tokens=800,
)
return resp.choices[0].message.content.strip()
def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str:
client = Groq(api_key=groq_api_key)
if len(extracted_text) <= CHUNK_CHARS:
return summarize_chunk_via_groq(extracted_text, client, groq_model)
chunks = chunk_text(extracted_text, CHUNK_CHARS)
summaries = []
for ch in chunks:
try:
summaries.append(summarize_chunk_via_groq(ch, client, groq_model))
except Exception as e:
summaries.append(f"(error summarizing chunk: {str(e)})")
final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
resp = client.chat.completions.create(
model=groq_model,
messages=[{"role": "user", "content": final_prompt}],
temperature=0.2,
max_tokens=900,
)
return resp.choices[0].message.content.strip()
# ================== Deepgram TTS ==================
def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str:
url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}"
headers = {"Authorization": f"Token {deepgram_api_key}"}
payload = {"text": summary_text}
resp = requests.post(url, headers=headers, json=payload, timeout=120)
if resp.status_code >= 400:
raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}")
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}")
tmp.write(resp.content)
tmp.close()
return tmp.name
# ================== Talk to PDF (Separate Function) ==================
def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str:
if not pdf_text_storage["processed"]:
return "❌ Please process a PDF first before asking questions!"
if not question.strip():
return "❌ Please enter a question!"
if not groq_key.strip():
return "❌ Please provide your Groq API key!"
try:
client = Groq(api_key=groq_key)
prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific."
resp = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=0,
max_tokens=500,
)
return f"🤖 {resp.choices[0].message.content.strip()}"
except Exception as e:
return f"❌ Error: {str(e)}"
# ================== Diagram via HF (Fixed) ==================
def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str:
headers = {"Authorization": f"Bearer {hf_token}"}
url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"
prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}"
payload = {"inputs": prompt}
for attempt in range(max_retries):
try:
resp = requests.post(url, headers=headers, json=payload, timeout=60)
if resp.status_code == 503:
try:
error_data = resp.json()
if "loading" in error_data.get("error", "").lower():
estimated_time = error_data.get("estimated_time", 20)
time.sleep(estimated_time)
continue
except:
pass
if resp.status_code == 200:
content_type = resp.headers.get('content-type', '')
if 'image' in content_type or len(resp.content) > 1000:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
tmp.write(resp.content)
tmp.close()
return tmp.name
if attempt < max_retries - 1:
wait_time = (attempt + 1) * 10
time.sleep(wait_time)
except requests.exceptions.RequestException as e:
if attempt < max_retries - 1:
time.sleep((attempt + 1) * 5)
alternative_models = [
"stabilityai/stable-diffusion-xl-base-1.0",
"CompVis/stable-diffusion-v1-4"
]
for alt_model in alternative_models:
try:
alt_url = f"https://api-inference.huggingface.co/models/{alt_model}"
resp = requests.post(alt_url, headers=headers, json=payload, timeout=60)
if resp.status_code == 200:
content_type = resp.headers.get('content-type', '')
if 'image' in content_type or len(resp.content) > 1000:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
tmp.write(resp.content)
tmp.close()
return tmp.name
except Exception:
continue
return create_text_diagram_placeholder(summary)
def create_text_diagram_placeholder(summary: str) -> str:
try:
from PIL import Image, ImageDraw, ImageFont
width, height = 800, 600
img = Image.new('RGB', (width, height), color='#0a0a0a')
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("arial.ttf", 16)
title_font = ImageFont.truetype("arial.ttf", 20)
except:
font = ImageFont.load_default()
title_font = ImageFont.load_default()
draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font)
words = summary.split()
lines = []
current_line = []
max_width = 45
for word in words:
if len(' '.join(current_line + [word])) <= max_width:
current_line.append(word)
else:
if current_line:
lines.append(' '.join(current_line))
current_line = [word]
if current_line:
lines.append(' '.join(current_line))
y_offset = 100
for line in lines[:18]:
draw.text((50, y_offset), line, fill='#ccffcc', font=font)
y_offset += 25
draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
img.save(tmp.name, "PNG")
tmp.close()
return tmp.name
except Exception:
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
tmp.close()
return tmp.name
# ================== Main Pipeline ==================
def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model):
try:
if not groq_key.strip():
return "❌ Missing Groq API key!", None, None, "Process a PDF first!"
if not deepgram_key.strip():
return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!"
if not hf_token.strip():
return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!"
if pdf_file is None:
return "❌ Please upload a PDF file!", None, None, "Process a PDF first!"
pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)
# Extract and store text globally
text = extract_text_from_pdf(pdf_path)
if not text.strip():
return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!"
# Store text for Q&A
pdf_text_storage["text"] = text
pdf_text_storage["processed"] = True
# Generate summary
summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL)
# Generate audio
audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key)
# Generate diagram
diagram_path = generate_diagram(summary, hf_token)
return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below."
except Exception as e:
pdf_text_storage["processed"] = False
return f"❌ Error: {str(e)}", None, None, "Process a PDF first!"
# ================== Gen-Z Dark Theme CSS ==================
GENZ_CSS = """
/* Main container styling */
.gradio-container {
background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important;
color: #00ff88 !important;
font-family: 'Segoe UI', 'Roboto', sans-serif !important;
}
body {
background: #000000 !important;
color: #00ff88 !important;
}
/* Input fields styling */
input, textarea, .gradio-textbox, .gradio-file, select {
background: linear-gradient(145deg, #111111, #1a1a1a) !important;
color: #00ff88 !important;
border: 2px solid #00ff88 !important;
border-radius: 12px !important;
box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important;
transition: all 0.3s ease !important;
}
input:focus, textarea:focus, .gradio-textbox:focus {
border-color: #00ff00 !important;
box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important;
transform: translateY(-2px) !important;
}
/* Button styling */
button {
background: linear-gradient(145deg, #00ff88, #00cc66) !important;
color: #000000 !important;
border: none !important;
border-radius: 15px !important;
font-weight: bold !important;
text-transform: uppercase !important;
letter-spacing: 1px !important;
box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important;
transition: all 0.3s ease !important;
}
button:hover {
background: linear-gradient(145deg, #00cc66, #00ff88) !important;
transform: translateY(-3px) !important;
box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important;
}
button:active {
transform: translateY(1px) !important;
}
/* Headers and text */
h1, h2, h3, h4, .gradio-markdown {
color: #00ff88 !important;
text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important;
}
h1 {
font-size: 2.5em !important;
background: linear-gradient(45deg, #00ff88, #00cc66) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
}
/* Tabs styling */
.gradio-tab {
background: linear-gradient(145deg, #111111, #1a1a1a) !important;
color: #00ff88 !important;
border: 2px solid #00ff88 !important;
border-radius: 10px !important;
}
.gradio-tab.selected {
background: linear-gradient(145deg, #00ff88, #00cc66) !important;
color: #000000 !important;
}
/* Slider styling */
.gradio-slider input[type="range"] {
background: #00ff88 !important;
}
.gradio-slider .gradio-slider-track {
background: #333333 !important;
}
.gradio-slider .gradio-slider-thumb {
background: #00ff88 !important;
border: 2px solid #00cc66 !important;
}
/* File upload area */
.gradio-file {
border: 3px dashed #00ff88 !important;
background: rgba(0, 255, 136, 0.1) !important;
border-radius: 15px !important;
}
/* Progress bar */
.progress-bar {
background: linear-gradient(90deg, #00ff88, #00cc66) !important;
border-radius: 10px !important;
}
/* Accordion styling */
.gradio-accordion {
background: linear-gradient(145deg, #111111, #1a1a1a) !important;
border: 2px solid #00ff88 !important;
border-radius: 12px !important;
}
/* Scrollbar */
::-webkit-scrollbar {
width: 12px !important;
}
::-webkit-scrollbar-track {
background: #111111 !important;
}
::-webkit-scrollbar-thumb {
background: linear-gradient(145deg, #00ff88, #00cc66) !important;
border-radius: 6px !important;
}
/* Glowing effects */
.glow {
box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important;
}
/* Custom animations */
@keyframes pulse {
0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); }
100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
}
.pulse-effect {
animation: pulse 2s infinite !important;
}
"""
# ================== UI Build Function ==================
def build_ui():
env_groq = os.environ.get("LLAMA", "")
env_deepgram = os.environ.get("DEEPGRAM", "")
env_hf = os.environ.get("HF_TOKEN", "")
with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo:
# Header - Centered
gr.Markdown("""
<div style="text-align: center; margin: 20px 0;">
<h1 style="font-size: 3.5em; margin-bottom: 10px;">🔥 AI PDF PROCESSOR</h1>
<h2 style="font-size: 1.8em; margin-bottom: 10px;">Transform PDFs into Audio, Summaries & Interactive Q&A</h2>
<h3 style="font-size: 1.2em; font-style: italic; opacity: 0.9;"> PEC COHORT 3</h3>
</div>
""", elem_classes=["pulse-effect"])
with gr.Row():
# Left Column - Upload & API Settings
with gr.Column(scale=1):
with gr.Accordion("📁 UPLOAD PDF", open=True):
pdf_input = gr.File(
label="Drop your PDF here",
file_types=[".pdf"],
height=150
)
with gr.Accordion("🔑 API KEYS", open=False):
gr.Markdown("*Keep your keys secure • Use env vars in production*")
groq_key = gr.Textbox(
label="🤖 Groq API Key",
value=env_groq,
type="password",
placeholder="sk-..."
)
deepgram_key = gr.Textbox(
label="🎤 Deepgram API Key",
value=env_deepgram,
type="password",
placeholder="Enter Deepgram key"
)
hf_key = gr.Textbox(
label="🤗 HuggingFace Token",
value=env_hf,
type="password",
placeholder="hf_..."
)
with gr.Accordion("⚙️ SETTINGS", open=False):
groq_model = gr.Dropdown(
label="🧠 AI Model",
choices=[
"llama-3.1-8b-instant",
"llama-3.1-70b-versatile",
"mixtral-8x7b-32768",
"gemma2-9b-it"
],
value=DEFAULT_GROQ_MODEL
)
# Main Process Button
process_btn = gr.Button(
"🚀 PROCESS PDF",
variant="primary",
size="lg",
elem_classes=["pulse-effect"]
)
# Right Column - Results
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("📝 SUMMARY"):
summary_output = gr.Textbox(
label="AI Generated Summary",
lines=12,
placeholder="Your PDF summary will appear here...",
interactive=False
)
with gr.Tab("🔊 AUDIO"):
audio_output = gr.Audio(
label="Listen to Summary",
type="filepath",
interactive=False
)
with gr.Tab("🎨 DIAGRAM"):
diagram_output = gr.Image(
label="Visual Representation",
interactive=False,
height=400
)
# Separate Q&A Section
gr.Markdown("---")
gr.Markdown("## 💬 CHAT WITH YOUR PDF")
with gr.Row():
with gr.Column(scale=3):
question_input = gr.Textbox(
label="Ask anything about your PDF",
placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...",
lines=2
)
with gr.Column(scale=1):
ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg")
chat_output = gr.Textbox(
label="🤖 AI Response",
lines=8,
placeholder="Upload and process a PDF first, then ask your questions!",
interactive=False
)
# Status indicator
status_output = gr.Textbox(
label="📊 Status",
value="Ready to process PDF...",
interactive=False
)
# Footer
gr.Markdown("""
---
**🔥 Pro Tips:**
• Upload PDFs with extractable text (not image-only)
• Questions work only after processing
• Audio generation takes ~30-60 seconds
• Diagrams may take longer depending on HF API load
*Built with ❤️ for the AI generation*
""")
# Event handlers
process_btn.click(
fn=process_pdf_pipeline,
inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model],
outputs=[summary_output, audio_output, diagram_output, status_output],
show_progress=True
)
ask_btn.click(
fn=ask_pdf_question,
inputs=[question_input, groq_key, groq_model],
outputs=[chat_output],
show_progress=False
)
# Enter key support for questions
question_input.submit(
fn=ask_pdf_question,
inputs=[question_input, groq_key, groq_model],
outputs=[chat_output]
)
return demo
if __name__ == "__main__":
demo = build_ui()
demo.launch(
share=True,
debug=True,
show_error=True
)