Spaces:

ChatBotsTA
/

pdf

Runtime error

pdf

File size: 20,471 Bytes

"""
PDF → Summary → Audio → Talk to PDF → Diagram
- Summarization: Groq (LLaMA 3)
- TTS: Deepgram (aura-asteria-en)
- Talk to PDF: Groq chat completions
- Diagram Generator: Stable Diffusion XL (Hugging Face Inference API)
"""

import os
import tempfile
import traceback
import time
from typing import List

import requests
import fitz  # PyMuPDF
import gradio as gr
from groq import Groq

# ================== Load API Keys ==================
try:
    from google.colab import userdata
    if not os.environ.get("LLAMA"):
        val = userdata.get("LLAMA")
        if val: os.environ["LLAMA"] = val.strip()
    if not os.environ.get("DEEPGRAM"):
        val = userdata.get("DEEPGRAM")
        if val: os.environ["DEEPGRAM"] = val.strip()
    if not os.environ.get("HF_TOKEN"):
        val = userdata.get("HF_TOKEN")
        if val: os.environ["HF_TOKEN"] = val.strip()
except Exception:
    pass

# ================== Config ==================
CHUNK_CHARS = 20000
DEFAULT_GROQ_MODEL = "llama-3.1-8b-instant"
DEEPGRAM_TTS_MODEL = "aura-asteria-en"
DEEPGRAM_ENCODING = "mp3"
HF_IMAGE_MODEL = "runwayml/stable-diffusion-v1-5"

# Global variable to store PDF text for Q&A
pdf_text_storage = {"text": "", "processed": False}

# ================== Utils ==================
def extract_text_from_pdf(file_path: str) -> str:
    doc = fitz.open(file_path)
    text = "\n\n".join(page.get_text("text") for page in doc)
    doc.close()
    return text.strip()

def chunk_text(text: str, max_chars: int) -> List[str]:
    if not text:
        return []
    parts, start, L = [], 0, len(text)
    while start < L:
        end = min(start + max_chars, L)
        if end < L:
            back = text.rfind("\n", start, end)
            if back == -1:
                back = text.rfind(" ", start, end)
            if back != -1 and back > start:
                end = back
        parts.append(text[start:end].strip())
        start = end
    return parts

# ================== Groq Summarization ==================
def summarize_chunk_via_groq(chunk_text: str, groq_client: Groq, model: str) -> str:
    prompt = f"Summarize this text into a concise paragraph (~180 words max):\n\n{chunk_text}"
    resp = groq_client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.2,
        max_tokens=800,
    )
    return resp.choices[0].message.content.strip()

def summarize_document(extracted_text: str, groq_api_key: str, groq_model: str = DEFAULT_GROQ_MODEL) -> str:
    client = Groq(api_key=groq_api_key)
    if len(extracted_text) <= CHUNK_CHARS:
        return summarize_chunk_via_groq(extracted_text, client, groq_model)
    chunks = chunk_text(extracted_text, CHUNK_CHARS)
    summaries = []
    for ch in chunks:
        try:
            summaries.append(summarize_chunk_via_groq(ch, client, groq_model))
        except Exception as e:
            summaries.append(f"(error summarizing chunk: {str(e)})")
    final_prompt = "Combine and refine the following summaries into a single clear summary (200-300 words):\n\n" + " ".join(summaries)
    resp = client.chat.completions.create(
        model=groq_model,
        messages=[{"role": "user", "content": final_prompt}],
        temperature=0.2,
        max_tokens=900,
    )
    return resp.choices[0].message.content.strip()

# ================== Deepgram TTS ==================
def deepgram_tts(summary_text: str, deepgram_api_key: str, model: str = DEEPGRAM_TTS_MODEL, encoding: str = DEEPGRAM_ENCODING) -> str:
    url = f"https://api.deepgram.com/v1/speak?model={model}&encoding={encoding}"
    headers = {"Authorization": f"Token {deepgram_api_key}"}
    payload = {"text": summary_text}
    resp = requests.post(url, headers=headers, json=payload, timeout=120)
    if resp.status_code >= 400:
        raise RuntimeError(f"Deepgram TTS failed ({resp.status_code}): {resp.text}")
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=f".{encoding}")
    tmp.write(resp.content)
    tmp.close()
    return tmp.name

# ================== Talk to PDF (Separate Function) ==================
def ask_pdf_question(question: str, groq_key: str, model: str = DEFAULT_GROQ_MODEL) -> str:
    if not pdf_text_storage["processed"]:
        return "❌ Please process a PDF first before asking questions!"

    if not question.strip():
        return "❌ Please enter a question!"

    if not groq_key.strip():
        return "❌ Please provide your Groq API key!"

    try:
        client = Groq(api_key=groq_key)
        prompt = f"Here is PDF content:\n\n{pdf_text_storage['text'][:15000]}\n\nUser Question: {question}\n\nAnswer strictly based on PDF content. Be concise and specific."
        resp = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=500,
        )
        return f"🤖 {resp.choices[0].message.content.strip()}"
    except Exception as e:
        return f"❌ Error: {str(e)}"

# ================== Diagram via HF (Fixed) ==================
def generate_diagram(summary: str, hf_token: str, max_retries: int = 3) -> str:
    headers = {"Authorization": f"Bearer {hf_token}"}
    url = f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}"

    prompt = f"detailed technical diagram, infographic style, clean illustration of: {summary[:500]}"
    payload = {"inputs": prompt}

    for attempt in range(max_retries):
        try:
            resp = requests.post(url, headers=headers, json=payload, timeout=60)

            if resp.status_code == 503:
                try:
                    error_data = resp.json()
                    if "loading" in error_data.get("error", "").lower():
                        estimated_time = error_data.get("estimated_time", 20)
                        time.sleep(estimated_time)
                        continue
                except:
                    pass

            if resp.status_code == 200:
                content_type = resp.headers.get('content-type', '')
                if 'image' in content_type or len(resp.content) > 1000:
                    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
                    tmp.write(resp.content)
                    tmp.close()
                    return tmp.name

            if attempt < max_retries - 1:
                wait_time = (attempt + 1) * 10
                time.sleep(wait_time)

        except requests.exceptions.RequestException as e:
            if attempt < max_retries - 1:
                time.sleep((attempt + 1) * 5)

    alternative_models = [
        "stabilityai/stable-diffusion-xl-base-1.0",
        "CompVis/stable-diffusion-v1-4"
    ]

    for alt_model in alternative_models:
        try:
            alt_url = f"https://api-inference.huggingface.co/models/{alt_model}"
            resp = requests.post(alt_url, headers=headers, json=payload, timeout=60)

            if resp.status_code == 200:
                content_type = resp.headers.get('content-type', '')
                if 'image' in content_type or len(resp.content) > 1000:
                    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
                    tmp.write(resp.content)
                    tmp.close()
                    return tmp.name
        except Exception:
            continue

    return create_text_diagram_placeholder(summary)

def create_text_diagram_placeholder(summary: str) -> str:
    try:
        from PIL import Image, ImageDraw, ImageFont

        width, height = 800, 600
        img = Image.new('RGB', (width, height), color='#0a0a0a')
        draw = ImageDraw.Draw(img)

        try:
            font = ImageFont.truetype("arial.ttf", 16)
            title_font = ImageFont.truetype("arial.ttf", 20)
        except:
            font = ImageFont.load_default()
            title_font = ImageFont.load_default()

        draw.text((50, 50), "📊 Document Summary", fill='#00ff88', font=title_font)

        words = summary.split()
        lines = []
        current_line = []
        max_width = 45

        for word in words:
            if len(' '.join(current_line + [word])) <= max_width:
                current_line.append(word)
            else:
                if current_line:
                    lines.append(' '.join(current_line))
                current_line = [word]
        if current_line:
            lines.append(' '.join(current_line))

        y_offset = 100
        for line in lines[:18]:
            draw.text((50, y_offset), line, fill='#ccffcc', font=font)
            y_offset += 25

        draw.rectangle([25, 25, width-25, height-25], outline='#00ff88', width=3)

        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
        img.save(tmp.name, "PNG")
        tmp.close()
        return tmp.name

    except Exception:
        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
        tmp.write(f"Diagram generation failed. Summary: {summary[:200]}...".encode())
        tmp.close()
        return tmp.name

# ================== Main Pipeline ==================
def process_pdf_pipeline(pdf_file, groq_key, deepgram_key, hf_token, groq_model):
    try:
        if not groq_key.strip():
            return "❌ Missing Groq API key!", None, None, "Process a PDF first!"
        if not deepgram_key.strip():
            return "❌ Missing Deepgram API key!", None, None, "Process a PDF first!"
        if not hf_token.strip():
            return "❌ Missing HuggingFace token!", None, None, "Process a PDF first!"
        if pdf_file is None:
            return "❌ Please upload a PDF file!", None, None, "Process a PDF first!"

        pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)

        # Extract and store text globally
        text = extract_text_from_pdf(pdf_path)
        if not text.strip():
            return "❌ PDF contains no extractable text!", None, None, "Process a PDF first!"

        # Store text for Q&A
        pdf_text_storage["text"] = text
        pdf_text_storage["processed"] = True

        # Generate summary
        summary = summarize_document(text, groq_api_key=groq_key, groq_model=groq_model or DEFAULT_GROQ_MODEL)

        # Generate audio
        audio_path = deepgram_tts(summary, deepgram_api_key=deepgram_key)

        # Generate diagram
        diagram_path = generate_diagram(summary, hf_token)

        return summary, audio_path, diagram_path, "✅ PDF processed! You can now ask questions below."

    except Exception as e:
        pdf_text_storage["processed"] = False
        return f"❌ Error: {str(e)}", None, None, "Process a PDF first!"

# ================== Gen-Z Dark Theme CSS ==================
GENZ_CSS = """
/* Main container styling */
.gradio-container {
    background: linear-gradient(135deg, #000000 0%, #0a0a0a 100%) !important;
    color: #00ff88 !important;
    font-family: 'Segoe UI', 'Roboto', sans-serif !important;
}

body {
    background: #000000 !important;
    color: #00ff88 !important;
}

/* Input fields styling */
input, textarea, .gradio-textbox, .gradio-file, select {
    background: linear-gradient(145deg, #111111, #1a1a1a) !important;
    color: #00ff88 !important;
    border: 2px solid #00ff88 !important;
    border-radius: 12px !important;
    box-shadow: 0 4px 15px rgba(0, 255, 136, 0.2) !important;
    transition: all 0.3s ease !important;
}

input:focus, textarea:focus, .gradio-textbox:focus {
    border-color: #00ff00 !important;
    box-shadow: 0 0 25px rgba(0, 255, 136, 0.5) !important;
    transform: translateY(-2px) !important;
}

/* Button styling */
button {
    background: linear-gradient(145deg, #00ff88, #00cc66) !important;
    color: #000000 !important;
    border: none !important;
    border-radius: 15px !important;
    font-weight: bold !important;
    text-transform: uppercase !important;
    letter-spacing: 1px !important;
    box-shadow: 0 6px 20px rgba(0, 255, 136, 0.3) !important;
    transition: all 0.3s ease !important;
}

button:hover {
    background: linear-gradient(145deg, #00cc66, #00ff88) !important;
    transform: translateY(-3px) !important;
    box-shadow: 0 8px 25px rgba(0, 255, 136, 0.5) !important;
}

button:active {
    transform: translateY(1px) !important;
}

/* Headers and text */
h1, h2, h3, h4, .gradio-markdown {
    color: #00ff88 !important;
    text-shadow: 0 0 10px rgba(0, 255, 136, 0.3) !important;
}

h1 {
    font-size: 2.5em !important;
    background: linear-gradient(45deg, #00ff88, #00cc66) !important;
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
}

/* Tabs styling */
.gradio-tab {
    background: linear-gradient(145deg, #111111, #1a1a1a) !important;
    color: #00ff88 !important;
    border: 2px solid #00ff88 !important;
    border-radius: 10px !important;
}

.gradio-tab.selected {
    background: linear-gradient(145deg, #00ff88, #00cc66) !important;
    color: #000000 !important;
}

/* Slider styling */
.gradio-slider input[type="range"] {
    background: #00ff88 !important;
}

.gradio-slider .gradio-slider-track {
    background: #333333 !important;
}

.gradio-slider .gradio-slider-thumb {
    background: #00ff88 !important;
    border: 2px solid #00cc66 !important;
}

/* File upload area */
.gradio-file {
    border: 3px dashed #00ff88 !important;
    background: rgba(0, 255, 136, 0.1) !important;
    border-radius: 15px !important;
}

/* Progress bar */
.progress-bar {
    background: linear-gradient(90deg, #00ff88, #00cc66) !important;
    border-radius: 10px !important;
}

/* Accordion styling */
.gradio-accordion {
    background: linear-gradient(145deg, #111111, #1a1a1a) !important;
    border: 2px solid #00ff88 !important;
    border-radius: 12px !important;
}

/* Scrollbar */
::-webkit-scrollbar {
    width: 12px !important;
}

::-webkit-scrollbar-track {
    background: #111111 !important;
}

::-webkit-scrollbar-thumb {
    background: linear-gradient(145deg, #00ff88, #00cc66) !important;
    border-radius: 6px !important;
}

/* Glowing effects */
.glow {
    box-shadow: 0 0 20px rgba(0, 255, 136, 0.5) !important;
}

/* Custom animations */
@keyframes pulse {
    0% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
    50% { box-shadow: 0 0 30px rgba(0, 255, 136, 0.6); }
    100% { box-shadow: 0 0 20px rgba(0, 255, 136, 0.3); }
}

.pulse-effect {
    animation: pulse 2s infinite !important;
}
"""

# ================== UI Build Function ==================
def build_ui():
    env_groq = os.environ.get("LLAMA", "")
    env_deepgram = os.environ.get("DEEPGRAM", "")
    env_hf = os.environ.get("HF_TOKEN", "")

    with gr.Blocks(css=GENZ_CSS, title="🔥 PDF AI Pipeline", theme=gr.themes.Base()) as demo:

        # Header - Centered
        gr.Markdown("""
        <div style="text-align: center; margin: 20px 0;">
            <h1 style="font-size: 3.5em; margin-bottom: 10px;">🔥 AI PDF PROCESSOR</h1>
            <h2 style="font-size: 1.8em; margin-bottom: 10px;">Transform PDFs into Audio, Summaries & Interactive Q&A</h2>
            <h3 style="font-size: 1.2em; font-style: italic; opacity: 0.9;"> PEC COHORT 3</h3>
        </div>
        """, elem_classes=["pulse-effect"])

        with gr.Row():
            # Left Column - Upload & API Settings
            with gr.Column(scale=1):
                with gr.Accordion("📁 UPLOAD PDF", open=True):
                    pdf_input = gr.File(
                        label="Drop your PDF here",
                        file_types=[".pdf"],
                        height=150
                    )

                with gr.Accordion("🔑 API KEYS", open=False):
                    gr.Markdown("*Keep your keys secure • Use env vars in production*")
                    groq_key = gr.Textbox(
                        label="🤖 Groq API Key",
                        value=env_groq,
                        type="password",
                        placeholder="sk-..."
                    )
                    deepgram_key = gr.Textbox(
                        label="🎤 Deepgram API Key",
                        value=env_deepgram,
                        type="password",
                        placeholder="Enter Deepgram key"
                    )
                    hf_key = gr.Textbox(
                        label="🤗 HuggingFace Token",
                        value=env_hf,
                        type="password",
                        placeholder="hf_..."
                    )

                with gr.Accordion("⚙️ SETTINGS", open=False):
                    groq_model = gr.Dropdown(
                        label="🧠 AI Model",
                        choices=[
                            "llama-3.1-8b-instant",
                            "llama-3.1-70b-versatile",
                            "mixtral-8x7b-32768",
                            "gemma2-9b-it"
                        ],
                        value=DEFAULT_GROQ_MODEL
                    )

                # Main Process Button
                process_btn = gr.Button(
                    "🚀 PROCESS PDF",
                    variant="primary",
                    size="lg",
                    elem_classes=["pulse-effect"]
                )

            # Right Column - Results
            with gr.Column(scale=2):
                with gr.Tabs():
                    with gr.Tab("📝 SUMMARY"):
                        summary_output = gr.Textbox(
                            label="AI Generated Summary",
                            lines=12,
                            placeholder="Your PDF summary will appear here...",
                            interactive=False
                        )

                    with gr.Tab("🔊 AUDIO"):
                        audio_output = gr.Audio(
                            label="Listen to Summary",
                            type="filepath",
                            interactive=False
                        )

                    with gr.Tab("🎨 DIAGRAM"):
                        diagram_output = gr.Image(
                            label="Visual Representation",
                            interactive=False,
                            height=400
                        )

        # Separate Q&A Section
        gr.Markdown("---")
        gr.Markdown("## 💬 CHAT WITH YOUR PDF")

        with gr.Row():
            with gr.Column(scale=3):
                question_input = gr.Textbox(
                    label="Ask anything about your PDF",
                    placeholder="What are the main findings? • Who are the key people mentioned? • Summarize chapter 2...",
                    lines=2
                )
            with gr.Column(scale=1):
                ask_btn = gr.Button("📨 SEND", variant="secondary", size="lg")

        chat_output = gr.Textbox(
            label="🤖 AI Response",
            lines=8,
            placeholder="Upload and process a PDF first, then ask your questions!",
            interactive=False
        )

        # Status indicator
        status_output = gr.Textbox(
            label="📊 Status",
            value="Ready to process PDF...",
            interactive=False
        )

        # Footer
        gr.Markdown("""
        ---
        **🔥 Pro Tips:**
        • Upload PDFs with extractable text (not image-only)
        • Questions work only after processing
        • Audio generation takes ~30-60 seconds
        • Diagrams may take longer depending on HF API load

        *Built with ❤️ for the AI generation*
        """)

        # Event handlers
        process_btn.click(
            fn=process_pdf_pipeline,
            inputs=[pdf_input, groq_key, deepgram_key, hf_key, groq_model],
            outputs=[summary_output, audio_output, diagram_output, status_output],
            show_progress=True
        )

        ask_btn.click(
            fn=ask_pdf_question,
            inputs=[question_input, groq_key, groq_model],
            outputs=[chat_output],
            show_progress=False
        )

        # Enter key support for questions
        question_input.submit(
            fn=ask_pdf_question,
            inputs=[question_input, groq_key, groq_model],
            outputs=[chat_output]
        )

    return demo

if __name__ == "__main__":
    demo = build_ui()
    demo.launch(
        share=True,
        debug=True,
        show_error=True
    )