Spaces:
Runtime error
Runtime error
| """ | |
| PDF → Summary → Audio → Talk to PDF → Diagram | |
| - Summarization: Groq (LLaMA 3) | |
| - TTS: Deepgram (aura-asteria-en) | |
| - Talk to PDF: Groq chat completions | |
| - Diagram Generator: Stable Diffusion XL (Hugging Face Inference API) | |
| """ | |
| import os | |
| import tempfile | |
| import traceback | |
| import time | |
| from typing import List | |
| import requests | |
| import fitz # PyMuPDF | |
| import gradio as gr | |
| from groq import Groq | |
| # ================== Load API Keys ================== | |
| try: | |
| from google.colab import userdata | |
| if not os.environ.get("LLAMA"): | |
| val = userdata.get("LLAMA") | |
| if val: os.environ["LLAMA"] = val.strip() | |
| if not os.environ.get("DEEPGRAM"): | |
| val = userdata.get("DEEPGRAM") | |
| if val: os.environ["DEEPGRAM"] = val.strip() | |
| if not os.environ.get("HF_TOKEN"): | |
| val = userdata.get("HF_TOKEN") | |
| if val: os.environ["HF_TOKEN"] = val.strip() | |
| except Exception: | |
| pass | |
| # ================== Config ================== | |
| CHUNK_CHARS = 20000 | |
| DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant" | |
| DEEPGRAM_TTS_MODEL = "aura-asteria-en" | |
| DEEPGRAM_ENCODING = "mp3" | |
| HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5" | |
| # Global variable to store PDF text for Q&A | |
| pdf_text_storage = {"text": "", "processed": False} | |
| # ================== Utils ================== | |
| def extract_text_from_pdf(file_path: str) -> str: | |
| doc = fitz.open(file_path) | |
| text = "\n\n".join(page.get_text("text") for page in doc) | |
| doc.close() | |
| return text.strip() | |
| def chunk_text(text: str, max_chars: int) -> List[str]: | |
| if not text: | |
| return [] | |
| parts, start, L = [], 0, len(text) | |
| while start < L: | |
| end = min(start + max_chars, L) | |
| if end < L: | |
| back = text.rfind("\n", start, end) | |
| if back == -1: | |
| back = text.rfind(" ", start, end) | |
| if back != -1 and back > start: | |
| end = back | |
| parts.append(text[start:end].strip()) | |
| start = end | |
| return parts | |
| # ================== Groq Summarization ================== | |
| def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str: | |
| prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}" | |
| resp = groq_client.chat.completions.create( | |
| model=model, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.2, | |
| max_tokens=800, | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str: | |
| client = Groq(api_key=groq_api_key) | |
| if len(extracted_text) <= CHUNK_CHARS: | |
| return summarize_chunk_via_groq(extracted_text, client, groq_model) | |
| chunks = chunk_text(extracted_text, CHUNK_CHARS) | |
| summaries = [] | |
| for ch in chunks: | |
| try: | |
| summaries.append(summarize_chunk_via_groq(ch, client, groq_model)) | |
| except Exception as e: | |
| summaries.append(f"(error summarizing chunk: {str(e)})") | |
| final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries) | |
| resp = client.chat.completions.create( | |
| model=groq_model, | |
| messages=[{"role": "user", "content": final_prompt}], | |
| temperature=0.2, | |
| max_tokens=900, | |
| ) | |
| return resp.choices[0].message.content.strip() | |
| # ================== Deepgram TTS ================== | |
| def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str: | |
| url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}" | |
| headers = {"Authorization": f"Token {deepgram_api_key}"} | |
| payload = {"text": summary_text} | |
| resp = requests.post(url, headers=headers, json=payload, timeout=120) | |
| if resp.status_code >= 400: | |
| raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}") | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}") | |
| tmp.write(resp.content) | |
| tmp.close() | |
| return tmp.name | |
| # ================== Talk to PDF (Separate Function) ================== | |
| def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str: | |
| if not pdf_text_storage["processed"]: | |
| return "❌ Please process a PDF first before asking questions!" | |
| if not question.strip(): | |
| return "❌ Please enter a question!" | |
| if not groq_key.strip(): | |
| return "❌ Please provide your Groq API key!" | |
| try: | |
| client = Groq(api_key=groq_key) | |
| prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific." | |
| resp = client.chat.completions.create( | |
| model=model, | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0, | |
| max_tokens=500, | |
| ) | |
| return f"🤖 {resp.choices[0].message.content.strip()}" | |
| except Exception as e: | |
| return f"❌ Error: {str(e)}" | |
| # ================== Diagram via HF (Fixed) ================== | |
| def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str: | |
| headers = {"Authorization": f"Bearer {hf_token}"} | |
| url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}" | |
| prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}" | |
| payload = {"inputs": prompt} | |
| for attempt in range(max_retries): | |
| try: | |
| resp = requests.post(url, headers=headers, json=payload, timeout=60) | |
| if resp.status_code == 503: | |
| try: | |
| error_data = resp.json() | |
| if "loading" in error_data.get("error", "").lower(): | |
| estimated_time = error_data.get("estimated_time", 20) | |
| time.sleep(estimated_time) | |
| continue | |
| except: | |
| pass | |
| if resp.status_code == 200: | |
| content_type = resp.headers.get('content-type', '') | |
| if 'image' in content_type or len(resp.content) > 1000: | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
| tmp.write(resp.content) | |
| tmp.close() | |
| return tmp.name | |
| if attempt < max_retries - 1: | |
| wait_time = (attempt + 1) * 10 | |
| time.sleep(wait_time) | |
| except requests.exceptions.RequestException as e: | |
| if attempt < max_retries - 1: | |
| time.sleep((attempt + 1) * 5) | |
| alternative_models = [ | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| "CompVis/stable-diffusion-v1-4" | |
| ] | |
| for alt_model in alternative_models: | |
| try: | |
| alt_url = f"https://api-inference.huggingface.co/models/{alt_model}" | |
| resp = requests.post(alt_url, headers=headers, json=payload, timeout=60) | |
| if resp.status_code == 200: | |
| content_type = resp.headers.get('content-type', '') | |
| if 'image' in content_type or len(resp.content) > 1000: | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
| tmp.write(resp.content) | |
| tmp.close() | |
| return tmp.name | |
| except Exception: | |
| continue | |
| return create_text_diagram_placeholder(summary) | |
| def create_text_diagram_placeholder(summary: str) -> str: | |
| try: | |
| from PIL import Image, ImageDraw, ImageFont | |
| width, height = 800, 600 | |
| img = Image.new('RGB', (width, height), color='#0a0a0a') | |
| draw = ImageDraw.Draw(img) | |
| try: | |
| font = ImageFont.truetype("arial.ttf", 16) | |
| title_font = ImageFont.truetype("arial.ttf", 20) | |
| except: | |
| font = ImageFont.load_default() | |
| title_font = ImageFont.load_default() | |
| draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font) | |
| words = summary.split() | |
| lines = [] | |
| current_line = [] | |
| max_width = 45 | |
| for word in words: | |
| if len(' '.join(current_line + [word])) <= max_width: | |
| current_line.append(word) | |
| else: | |
| if current_line: | |
| lines.append(' '.join(current_line)) | |
| current_line = [word] | |
| if current_line: | |
| lines.append(' '.join(current_line)) | |
| y_offset = 100 | |
| for line in lines[:18]: | |
| draw.text((50, y_offset), line, fill='#ccffcc', font=font) | |
| y_offset += 25 | |
| draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png") | |
| img.save(tmp.name, "PNG") | |
| tmp.close() | |
| return tmp.name | |
| except Exception: | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") | |
| tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode()) | |
| tmp.close() | |
| return tmp.name | |
| # ================== Main Pipeline ================== | |
| def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model): | |
| try: | |
| if not groq_key.strip(): | |
| return "❌ Missing Groq API key!", None, None, "Process a PDF first!" | |
| if not deepgram_key.strip(): | |
| return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!" | |
| if not hf_token.strip(): | |
| return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!" | |
| if pdf_file is None: | |
| return "❌ Please upload a PDF file!", None, None, "Process a PDF first!" | |
| pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file) | |
| # Extract and store text globally | |
| text = extract_text_from_pdf(pdf_path) | |
| if not text.strip(): | |
| return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!" | |
| # Store text for Q&A | |
| pdf_text_storage["text"] = text | |
| pdf_text_storage["processed"] = True | |
| # Generate summary | |
| summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL) | |
| # Generate audio | |
| audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key) | |
| # Generate diagram | |
| diagram_path = generate_diagram(summary, hf_token) | |
| return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below." | |
| except Exception as e: | |
| pdf_text_storage["processed"] = False | |
| return f"❌ Error: {str(e)}", None, None, "Process a PDF first!" | |
| # ================== Gen-Z Dark Theme CSS ================== | |
| GENZ_CSS = """ | |
| /* Main container styling */ | |
| .gradio-container { | |
| background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important; | |
| color: #00ff88 !important; | |
| font-family: 'Segoe UI', 'Roboto', sans-serif !important; | |
| } | |
| body { | |
| background: #000000 !important; | |
| color: #00ff88 !important; | |
| } | |
| /* Input fields styling */ | |
| input, textarea, .gradio-textbox, .gradio-file, select { | |
| background: linear-gradient(145deg, #111111, #1a1a1a) !important; | |
| color: #00ff88 !important; | |
| border: 2px solid #00ff88 !important; | |
| border-radius: 12px !important; | |
| box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| input:focus, textarea:focus, .gradio-textbox:focus { | |
| border-color: #00ff00 !important; | |
| box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important; | |
| transform: translateY(-2px) !important; | |
| } | |
| /* Button styling */ | |
| button { | |
| background: linear-gradient(145deg, #00ff88, #00cc66) !important; | |
| color: #000000 !important; | |
| border: none !important; | |
| border-radius: 15px !important; | |
| font-weight: bold !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 1px !important; | |
| box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| button:hover { | |
| background: linear-gradient(145deg, #00cc66, #00ff88) !important; | |
| transform: translateY(-3px) !important; | |
| box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important; | |
| } | |
| button:active { | |
| transform: translateY(1px) !important; | |
| } | |
| /* Headers and text */ | |
| h1, h2, h3, h4, .gradio-markdown { | |
| color: #00ff88 !important; | |
| text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important; | |
| } | |
| h1 { | |
| font-size: 2.5em !important; | |
| background: linear-gradient(45deg, #00ff88, #00cc66) !important; | |
| -webkit-background-clip: text !important; | |
| -webkit-text-fill-color: transparent !important; | |
| } | |
| /* Tabs styling */ | |
| .gradio-tab { | |
| background: linear-gradient(145deg, #111111, #1a1a1a) !important; | |
| color: #00ff88 !important; | |
| border: 2px solid #00ff88 !important; | |
| border-radius: 10px !important; | |
| } | |
| .gradio-tab.selected { | |
| background: linear-gradient(145deg, #00ff88, #00cc66) !important; | |
| color: #000000 !important; | |
| } | |
| /* Slider styling */ | |
| .gradio-slider input[type="range"] { | |
| background: #00ff88 !important; | |
| } | |
| .gradio-slider .gradio-slider-track { | |
| background: #333333 !important; | |
| } | |
| .gradio-slider .gradio-slider-thumb { | |
| background: #00ff88 !important; | |
| border: 2px solid #00cc66 !important; | |
| } | |
| /* File upload area */ | |
| .gradio-file { | |
| border: 3px dashed #00ff88 !important; | |
| background: rgba(0, 255, 136, 0.1) !important; | |
| border-radius: 15px !important; | |
| } | |
| /* Progress bar */ | |
| .progress-bar { | |
| background: linear-gradient(90deg, #00ff88, #00cc66) !important; | |
| border-radius: 10px !important; | |
| } | |
| /* Accordion styling */ | |
| .gradio-accordion { | |
| background: linear-gradient(145deg, #111111, #1a1a1a) !important; | |
| border: 2px solid #00ff88 !important; | |
| border-radius: 12px !important; | |
| } | |
| /* Scrollbar */ | |
| ::-webkit-scrollbar { | |
| width: 12px !important; | |
| } | |
| ::-webkit-scrollbar-track { | |
| background: #111111 !important; | |
| } | |
| ::-webkit-scrollbar-thumb { | |
| background: linear-gradient(145deg, #00ff88, #00cc66) !important; | |
| border-radius: 6px !important; | |
| } | |
| /* Glowing effects */ | |
| .glow { | |
| box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important; | |
| } | |
| /* Custom animations */ | |
| @keyframes pulse { | |
| 0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); } | |
| 50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); } | |
| 100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); } | |
| } | |
| .pulse-effect { | |
| animation: pulse 2s infinite !important; | |
| } | |
| """ | |
| # ================== UI Build Function ================== | |
| def build_ui(): | |
| env_groq = os.environ.get("LLAMA", "") | |
| env_deepgram = os.environ.get("DEEPGRAM", "") | |
| env_hf = os.environ.get("HF_TOKEN", "") | |
| with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo: | |
| # Header - Centered | |
| gr.Markdown(""" | |
| <div style="text-align: center; margin: 20px 0;"> | |
| <h1 style="font-size: 3.5em; margin-bottom: 10px;">🔥 AI PDF PROCESSOR</h1> | |
| <h2 style="font-size: 1.8em; margin-bottom: 10px;">Transform PDFs into Audio, Summaries & Interactive Q&A</h2> | |
| <h3 style="font-size: 1.2em; font-style: italic; opacity: 0.9;"> PEC COHORT 3</h3> | |
| </div> | |
| """, elem_classes=["pulse-effect"]) | |
| with gr.Row(): | |
| # Left Column - Upload & API Settings | |
| with gr.Column(scale=1): | |
| with gr.Accordion("📁 UPLOAD PDF", open=True): | |
| pdf_input = gr.File( | |
| label="Drop your PDF here", | |
| file_types=[".pdf"], | |
| height=150 | |
| ) | |
| with gr.Accordion("🔑 API KEYS", open=False): | |
| gr.Markdown("*Keep your keys secure • Use env vars in production*") | |
| groq_key = gr.Textbox( | |
| label="🤖 Groq API Key", | |
| value=env_groq, | |
| type="password", | |
| placeholder="sk-..." | |
| ) | |
| deepgram_key = gr.Textbox( | |
| label="🎤 Deepgram API Key", | |
| value=env_deepgram, | |
| type="password", | |
| placeholder="Enter Deepgram key" | |
| ) | |
| hf_key = gr.Textbox( | |
| label="🤗 HuggingFace Token", | |
| value=env_hf, | |
| type="password", | |
| placeholder="hf_..." | |
| ) | |
| with gr.Accordion("⚙️ SETTINGS", open=False): | |
| groq_model = gr.Dropdown( | |
| label="🧠 AI Model", | |
| choices=[ | |
| "llama-3.1-8b-instant", | |
| "llama-3.1-70b-versatile", | |
| "mixtral-8x7b-32768", | |
| "gemma2-9b-it" | |
| ], | |
| value=DEFAULT_GROQ_MODEL | |
| ) | |
| # Main Process Button | |
| process_btn = gr.Button( | |
| "🚀 PROCESS PDF", | |
| variant="primary", | |
| size="lg", | |
| elem_classes=["pulse-effect"] | |
| ) | |
| # Right Column - Results | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.Tab("📝 SUMMARY"): | |
| summary_output = gr.Textbox( | |
| label="AI Generated Summary", | |
| lines=12, | |
| placeholder="Your PDF summary will appear here...", | |
| interactive=False | |
| ) | |
| with gr.Tab("🔊 AUDIO"): | |
| audio_output = gr.Audio( | |
| label="Listen to Summary", | |
| type="filepath", | |
| interactive=False | |
| ) | |
| with gr.Tab("🎨 DIAGRAM"): | |
| diagram_output = gr.Image( | |
| label="Visual Representation", | |
| interactive=False, | |
| height=400 | |
| ) | |
| # Separate Q&A Section | |
| gr.Markdown("---") | |
| gr.Markdown("## 💬 CHAT WITH YOUR PDF") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| question_input = gr.Textbox( | |
| label="Ask anything about your PDF", | |
| placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...", | |
| lines=2 | |
| ) | |
| with gr.Column(scale=1): | |
| ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg") | |
| chat_output = gr.Textbox( | |
| label="🤖 AI Response", | |
| lines=8, | |
| placeholder="Upload and process a PDF first, then ask your questions!", | |
| interactive=False | |
| ) | |
| # Status indicator | |
| status_output = gr.Textbox( | |
| label="📊 Status", | |
| value="Ready to process PDF...", | |
| interactive=False | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| **🔥 Pro Tips:** | |
| • Upload PDFs with extractable text (not image-only) | |
| • Questions work only after processing | |
| • Audio generation takes ~30-60 seconds | |
| • Diagrams may take longer depending on HF API load | |
| *Built with ❤️ for the AI generation* | |
| """) | |
| # Event handlers | |
| process_btn.click( | |
| fn=process_pdf_pipeline, | |
| inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model], | |
| outputs=[summary_output, audio_output, diagram_output, status_output], | |
| show_progress=True | |
| ) | |
| ask_btn.click( | |
| fn=ask_pdf_question, | |
| inputs=[question_input, groq_key, groq_model], | |
| outputs=[chat_output], | |
| show_progress=False | |
| ) | |
| # Enter key support for questions | |
| question_input.submit( | |
| fn=ask_pdf_question, | |
| inputs=[question_input, groq_key, groq_model], | |
| outputs=[chat_output] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = build_ui() | |
| demo.launch( | |
| share=True, | |
| debug=True, | |
| show_error=True | |
| ) |