""" PDF → Summary → Audio → Talk to PDF → Diagram - Summarization: Groq (LLaMA 3) - TTS: Deepgram (aura-asteria-en) - Talk to PDF: Groq chat completions - Diagram Generator: Stable Diffusion XL (Hugging Face Inference API) """ import os import tempfile import traceback import time from typing import List import requests import fitz # PyMuPDF import gradio as gr from groq import Groq # ================== Load API Keys ================== try: from google.colab import userdata if not os.environ.get("LLAMA"): val = userdata.get("LLAMA") if val: os.environ["LLAMA"] = val.strip() if not os.environ.get("DEEPGRAM"): val = userdata.get("DEEPGRAM") if val: os.environ["DEEPGRAM"] = val.strip() if not os.environ.get("HF_TOKEN"): val = userdata.get("HF_TOKEN") if val: os.environ["HF_TOKEN"] = val.strip() except Exception: pass # ================== Config ================== CHUNK_CHARS = 20000 DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant" DEEPGRAM_TTS_MODEL = "aura-asteria-en" DEEPGRAM_ENCODING = "mp3" HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5" # Global variable to store PDF text for Q&A pdf_text_storage = {"text": "", "processed": False} # ================== Utils ================== def extract_text_from_pdf(file_path: str) -> str: doc = fitz.open(file_path) text = "\n\n".join(page.get_text("text") for page in doc) doc.close() return text.strip() def chunk_text(text: str, max_chars: int) -> List[str]: if not text: return [] parts, start, L = [], 0, len(text) while start < L: end = min(start + max_chars, L) if end < L: back = text.rfind("\n", start, end) if back == -1: back = text.rfind(" ", start, end) if back != -1 and back > start: end = back parts.append(text[start:end].strip()) start = end return parts # ================== Groq Summarization ================== def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str: prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}" resp = groq_client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], temperature=0.2, max_tokens=800, ) return resp.choices[0].message.content.strip() def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str: client = Groq(api_key=groq_api_key) if len(extracted_text) <= CHUNK_CHARS: return summarize_chunk_via_groq(extracted_text, client, groq_model) chunks = chunk_text(extracted_text, CHUNK_CHARS) summaries = [] for ch in chunks: try: summaries.append(summarize_chunk_via_groq(ch, client, groq_model)) except Exception as e: summaries.append(f"(error summarizing chunk: {str(e)})") final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries) resp = client.chat.completions.create( model=groq_model, messages=[{"role": "user", "content": final_prompt}], temperature=0.2, max_tokens=900, ) return resp.choices[0].message.content.strip() # ================== Deepgram TTS ================== def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str: url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}" headers = {"Authorization": f"Token {deepgram_api_key}"} payload = {"text": summary_text} resp = requests.post(url, headers=headers, json=payload, timeout=120) if resp.status_code >= 400: raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}") tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}") tmp.write(resp.content) tmp.close() return tmp.name # ================== Talk to PDF (Separate Function) ================== def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str: if not pdf_text_storage["processed"]: return "❌ Please process a PDF first before asking questions!" if not question.strip(): return "❌ Please enter a question!" if not groq_key.strip(): return "❌ Please provide your Groq API key!" try: client = Groq(api_key=groq_key) prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific." resp = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], temperature=0, max_tokens=500, ) return f"🤖 {resp.choices[0].message.content.strip()}" except Exception as e: return f"❌ Error: {str(e)}" # ================== Diagram via HF (Fixed) ================== def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str: headers = {"Authorization": f"Bearer {hf_token}"} url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}" prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}" payload = {"inputs": prompt} for attempt in range(max_retries): try: resp = requests.post(url, headers=headers, json=payload, timeout=60) if resp.status_code == 503: try: error_data = resp.json() if "loading" in error_data.get("error", "").lower(): estimated_time = error_data.get("estimated_time", 20) time.sleep(estimated_time) continue except: pass if resp.status_code == 200: content_type = resp.headers.get('content-type', '') if 'image' in content_type or len(resp.content) > 1000: tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") tmp.write(resp.content) tmp.close() return tmp.name if attempt < max_retries - 1: wait_time = (attempt + 1) * 10 time.sleep(wait_time) except requests.exceptions.RequestException as e: if attempt < max_retries - 1: time.sleep((attempt + 1) * 5) alternative_models = [ "stabilityai/stable-diffusion-xl-base-1.0", "CompVis/stable-diffusion-v1-4" ] for alt_model in alternative_models: try: alt_url = f"https://api-inference.huggingface.co/models/{alt_model}" resp = requests.post(alt_url, headers=headers, json=payload, timeout=60) if resp.status_code == 200: content_type = resp.headers.get('content-type', '') if 'image' in content_type or len(resp.content) > 1000: tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") tmp.write(resp.content) tmp.close() return tmp.name except Exception: continue return create_text_diagram_placeholder(summary) def create_text_diagram_placeholder(summary: str) -> str: try: from PIL import Image, ImageDraw, ImageFont width, height = 800, 600 img = Image.new('RGB', (width, height), color='#0a0a0a') draw = ImageDraw.Draw(img) try: font = ImageFont.truetype("arial.ttf", 16) title_font = ImageFont.truetype("arial.ttf", 20) except: font = ImageFont.load_default() title_font = ImageFont.load_default() draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font) words = summary.split() lines = [] current_line = [] max_width = 45 for word in words: if len(' '.join(current_line + [word])) <= max_width: current_line.append(word) else: if current_line: lines.append(' '.join(current_line)) current_line = [word] if current_line: lines.append(' '.join(current_line)) y_offset = 100 for line in lines[:18]: draw.text((50, y_offset), line, fill='#ccffcc', font=font) y_offset += 25 draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3) tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") img.save(tmp.name, "PNG") tmp.close() return tmp.name except Exception: tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode()) tmp.close() return tmp.name # ================== Main Pipeline ================== def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model): try: if not groq_key.strip(): return "❌ Missing Groq API key!", None, None, "Process a PDF first!" if not deepgram_key.strip(): return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!" if not hf_token.strip(): return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!" if pdf_file is None: return "❌ Please upload a PDF file!", None, None, "Process a PDF first!" pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file) # Extract and store text globally text = extract_text_from_pdf(pdf_path) if not text.strip(): return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!" # Store text for Q&A pdf_text_storage["text"] = text pdf_text_storage["processed"] = True # Generate summary summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL) # Generate audio audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key) # Generate diagram diagram_path = generate_diagram(summary, hf_token) return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below." except Exception as e: pdf_text_storage["processed"] = False return f"❌ Error: {str(e)}", None, None, "Process a PDF first!" # ================== Gen-Z Dark Theme CSS ================== GENZ_CSS = """ /* Main container styling */ .gradio-container { background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important; color: #00ff88 !important; font-family: 'Segoe UI', 'Roboto', sans-serif !important; } body { background: #000000 !important; color: #00ff88 !important; } /* Input fields styling */ input, textarea, .gradio-textbox, .gradio-file, select { background: linear-gradient(145deg, #111111, #1a1a1a) !important; color: #00ff88 !important; border: 2px solid #00ff88 !important; border-radius: 12px !important; box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important; transition: all 0.3s ease !important; } input:focus, textarea:focus, .gradio-textbox:focus { border-color: #00ff00 !important; box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important; transform: translateY(-2px) !important; } /* Button styling */ button { background: linear-gradient(145deg, #00ff88, #00cc66) !important; color: #000000 !important; border: none !important; border-radius: 15px !important; font-weight: bold !important; text-transform: uppercase !important; letter-spacing: 1px !important; box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important; transition: all 0.3s ease !important; } button:hover { background: linear-gradient(145deg, #00cc66, #00ff88) !important; transform: translateY(-3px) !important; box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important; } button:active { transform: translateY(1px) !important; } /* Headers and text */ h1, h2, h3, h4, .gradio-markdown { color: #00ff88 !important; text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important; } h1 { font-size: 2.5em !important; background: linear-gradient(45deg, #00ff88, #00cc66) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; } /* Tabs styling */ .gradio-tab { background: linear-gradient(145deg, #111111, #1a1a1a) !important; color: #00ff88 !important; border: 2px solid #00ff88 !important; border-radius: 10px !important; } .gradio-tab.selected { background: linear-gradient(145deg, #00ff88, #00cc66) !important; color: #000000 !important; } /* Slider styling */ .gradio-slider input[type="range"] { background: #00ff88 !important; } .gradio-slider .gradio-slider-track { background: #333333 !important; } .gradio-slider .gradio-slider-thumb { background: #00ff88 !important; border: 2px solid #00cc66 !important; } /* File upload area */ .gradio-file { border: 3px dashed #00ff88 !important; background: rgba(0, 255, 136, 0.1) !important; border-radius: 15px !important; } /* Progress bar */ .progress-bar { background: linear-gradient(90deg, #00ff88, #00cc66) !important; border-radius: 10px !important; } /* Accordion styling */ .gradio-accordion { background: linear-gradient(145deg, #111111, #1a1a1a) !important; border: 2px solid #00ff88 !important; border-radius: 12px !important; } /* Scrollbar */ ::-webkit-scrollbar { width: 12px !important; } ::-webkit-scrollbar-track { background: #111111 !important; } ::-webkit-scrollbar-thumb { background: linear-gradient(145deg, #00ff88, #00cc66) !important; border-radius: 6px !important; } /* Glowing effects */ .glow { box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important; } /* Custom animations */ @keyframes pulse { 0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); } 50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); } 100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); } } .pulse-effect { animation: pulse 2s infinite !important; } """ # ================== UI Build Function ================== def build_ui(): env_groq = os.environ.get("LLAMA", "") env_deepgram = os.environ.get("DEEPGRAM", "") env_hf = os.environ.get("HF_TOKEN", "") with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo: # Header - Centered gr.Markdown("""

🔥 AI PDF PROCESSOR

Transform PDFs into Audio, Summaries & Interactive Q&A

PEC COHORT 3

""", elem_classes=["pulse-effect"]) with gr.Row(): # Left Column - Upload & API Settings with gr.Column(scale=1): with gr.Accordion("📁 UPLOAD PDF", open=True): pdf_input = gr.File( label="Drop your PDF here", file_types=[".pdf"], height=150 ) with gr.Accordion("🔑 API KEYS", open=False): gr.Markdown("*Keep your keys secure • Use env vars in production*") groq_key = gr.Textbox( label="🤖 Groq API Key", value=env_groq, type="password", placeholder="sk-..." ) deepgram_key = gr.Textbox( label="🎤 Deepgram API Key", value=env_deepgram, type="password", placeholder="Enter Deepgram key" ) hf_key = gr.Textbox( label="🤗 HuggingFace Token", value=env_hf, type="password", placeholder="hf_..." ) with gr.Accordion("⚙️ SETTINGS", open=False): groq_model = gr.Dropdown( label="🧠 AI Model", choices=[ "llama-3.1-8b-instant", "llama-3.1-70b-versatile", "mixtral-8x7b-32768", "gemma2-9b-it" ], value=DEFAULT_GROQ_MODEL ) # Main Process Button process_btn = gr.Button( "🚀 PROCESS PDF", variant="primary", size="lg", elem_classes=["pulse-effect"] ) # Right Column - Results with gr.Column(scale=2): with gr.Tabs(): with gr.Tab("📝 SUMMARY"): summary_output = gr.Textbox( label="AI Generated Summary", lines=12, placeholder="Your PDF summary will appear here...", interactive=False ) with gr.Tab("🔊 AUDIO"): audio_output = gr.Audio( label="Listen to Summary", type="filepath", interactive=False ) with gr.Tab("🎨 DIAGRAM"): diagram_output = gr.Image( label="Visual Representation", interactive=False, height=400 ) # Separate Q&A Section gr.Markdown("---") gr.Markdown("## 💬 CHAT WITH YOUR PDF") with gr.Row(): with gr.Column(scale=3): question_input = gr.Textbox( label="Ask anything about your PDF", placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...", lines=2 ) with gr.Column(scale=1): ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg") chat_output = gr.Textbox( label="🤖 AI Response", lines=8, placeholder="Upload and process a PDF first, then ask your questions!", interactive=False ) # Status indicator status_output = gr.Textbox( label="📊 Status", value="Ready to process PDF...", interactive=False ) # Footer gr.Markdown(""" --- **🔥 Pro Tips:** • Upload PDFs with extractable text (not image-only) • Questions work only after processing • Audio generation takes ~30-60 seconds • Diagrams may take longer depending on HF API load *Built with ❤️ for the AI generation* """) # Event handlers process_btn.click( fn=process_pdf_pipeline, inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model], outputs=[summary_output, audio_output, diagram_output, status_output], show_progress=True ) ask_btn.click( fn=ask_pdf_question, inputs=[question_input, groq_key, groq_model], outputs=[chat_output], show_progress=False ) # Enter key support for questions question_input.submit( fn=ask_pdf_question, inputs=[question_input, groq_key, groq_model], outputs=[chat_output] ) return demo if __name__ == "__main__": demo = build_ui() demo.launch( share=True, debug=True, show_error=True )