import gradio as gr
import os
import json
import hashlib
import shutil
import time
import re
import anthropic
from fpdf import FPDF
from pathlib      import Path
from dotenv       import load_dotenv
from google       import genai
from google.genai import types
from pdf2image    import convert_from_path
from PIL          import Image
import io

# -----------------------------------------------------------------------------
# CONFIGURATION
# -----------------------------------------------------------------------------
# On HF Spaces, set this in "Settings" -> "Secrets"
load_dotenv() 
API_KEY         = os.getenv("GOOGLE_API_KEY")
CLAUDE_API_KEY  = os.getenv("CLAUDE_API_KEY")
ACCESS_PASSWORD = os.getenv("APP_PASSWORD") 

SCANNER_MODEL  = "gemini-3.1-pro-preview"
FALLBACK_MODEL = "gemini-2.5-pro"
#COACH_MODEL    = "claude-sonnet-4-6"
COACH_MODEL    = "claude-opus-4-6"
CACHE_DIR      = Path("cache/slides")
CACHE_DIR.mkdir(parents=True, exist_ok=True)

COACH_PERSONAS = {
    "business": {
        "name": "Business Strategy Coach",
        "icon": "💼",
        "role": "You are a Senior Business Strategist and executive communication expert.",
        "focus": (
            "Evaluate through a BUSINESS LENS:\n"
            "- Is the business problem clearly articulated? Would a VP understand it?\n"
            "- Does the executive summary lead with the answer, not the methodology?\n"
            "- Is the value proposition compelling with specific ROI numbers?\n"
            "- Is the business impact quantified and positioned persuasively?\n"
            "- Would this presentation convince decision-makers to act?"
        )
    },
    "analytics": {
        "name": "Analytics & Methodology Coach",
        "icon": "📊",
        "role": "You are a Senior Data Scientist and ML methodology expert.",
        "focus": (
            "Evaluate through a TECHNICAL/ANALYTICAL LENS:\n"
            "- Is the data structure and preparation approach well-documented?\n"
            "- Are the target variables and evaluation metrics appropriate and justified?\n"
            "- Is model selection rigorous? Were enough candidates explored?\n"
            "- Is the HPO strategy systematic and well-explained?\n"
            "- Is validation thorough (holdout tests, cross-validation, confidence intervals)?\n"
            "- Are results reproducible from what is shown?"
        )
    }
}

# -----------------------------------------------------------------------------
# LOGIC: CONVERSION (PDF -> IMAGES)
# -----------------------------------------------------------------------------
def convert_to_images(file_path):
    output_dir = Path("temp_slides")
    if output_dir.exists():
        shutil.rmtree(output_dir)
    output_dir.mkdir()
    
    # Check extension
    ext = Path(file_path).suffix.lower()
    
    if ext == ".pdf":
        print("Converting PDF to images...")
        images = convert_from_path(file_path, dpi=300)
        image_paths = []
        for i, img in enumerate(images):
            path = output_dir / f"slide-{i+1:02d}.jpg"
            img.save(path, "JPEG", quality=85, optimize=True)
            image_paths.append(path)
        return image_paths
    else:
        # TODO: PPTX support requires LibreOffice/Aspose. 
        # For V1, we ask users to upload PDF.
        raise ValueError("Please convert your PPTX to PDF before uploading.")

# -----------------------------------------------------------------------------
# LOGIC: PASS 1 (VISION SCANNER)
# -----------------------------------------------------------------------------
def scan_slides(client, image_paths):
    inventory = []
    warnings = []
    total = len(image_paths)
    
    cache_hits = 0
    use_model = SCANNER_MODEL
    start = time.perf_counter()
    for i, img_path in enumerate(image_paths):
        slide_num = i + 1
        yield f"Reading Slide {slide_num}/{total}...", None
        
        with open(img_path, "rb") as f:
            img_bytes = f.read()
        
        # Check slide cache by image hash
        img_hash = hashlib.sha256(img_bytes).hexdigest()
        cache_path = CACHE_DIR / f"{img_hash}.json"
        
        if cache_path.exists():
            data = json.loads(cache_path.read_text())
            data["slide_number"] = slide_num
            inventory.append(data)
            cache_hits += 1
            print(f"   Slide {slide_num}: CACHE HIT")
            continue
        
        print(f"Scanning Slide {slide_num}...")
        
        # Rate Limiting: Sleep to respect API limits (avoid 429 errors)
        file_size_mb = len(img_bytes) / (1024 * 1024)
        
        if file_size_mb > 1.0:
            print(f"   Large file ({file_size_mb:.1f}MB). Pausing 10s to refill quota...")
            time.sleep(10)
        else:
            time.sleep(2)

        prompt = f"""
        Analyze this slide (Slide {slide_num}).
        
        INSTRUCTIONS:
        1. **Title**: Extract the title. If text is embedded in an image (e.g. "Questions"), use that. If none, "Untitled".
        2. **Visuals**: Describe the visual content (e.g. "Photo of oil rig", "Bar chart of accuracy").
        3. **Busy**: boolean true if crowded.
        
        OUTPUT STRICT JSON:
        {{
            "slide_number": {slide_num},
            "title": "Extracted Title",
            "main_text_bullets": ["List of points"],
            "visual_elements": {{ "chart_count": Int, "screenshot_count": Int, "is_busy": Bool }},
            "visual_description": "Brief description of images/charts",
            "key_takeaway": "Summary sentence"
        }}
        """
        
        max_retries = 3
        slide_ok = False
        
        for model_name in [use_model, FALLBACK_MODEL]:
            if slide_ok:
                break
            for attempt in range(max_retries):
                try:
                    response = client.models.generate_content(
                        model=model_name,
                    contents=[
                        types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"),
                        prompt
                    ],
                        config=types.GenerateContentConfig(
                            response_mime_type="application/json",
                            temperature=0.1
                        )
                    )
                    
                    if response.text is None:
                        raise ValueError("Empty response from model (text is None)")
                    
                    data = json.loads(response.text)
                    
                    if isinstance(data, list):
                        if len(data) > 0 and isinstance(data[0], dict):
                            data = data[0]
                        else:
                            raise ValueError(f"Model returned a list without a dict: {data}")

                    if isinstance(data, dict):
                        inventory.append(data)
                        cache_path.write_text(json.dumps(data, indent=2))
                        slide_ok = True
                    else:
                        raise ValueError(f"Response is not a valid JSON dict: {data}")
                    
                    break

                except Exception as e:
                    error_str = str(e)
                    is_rate_limit = ("429" in error_str or "RESOURCE_EXHAUSTED" in error_str)
                    is_retryable = (is_rate_limit or
                                    "Empty response" in error_str or
                                    "NoneType" in error_str)
                    
                    if is_rate_limit and model_name == use_model:
                        print(f"   ⚠️ Slide {slide_num}: {model_name} rate limited. Falling back to {FALLBACK_MODEL}...")
                        yield f"⚠️ Rate limit hit — switching to fallback model for Slide {slide_num}...", None
                        use_model = FALLBACK_MODEL
                        time.sleep(2)
                        break
                    elif is_retryable and attempt < max_retries - 1:
                        wait_time = (attempt + 1) * 5
                        print(f"   ⚠️ Slide {slide_num} attempt {attempt+1} failed: {e}. Retrying in {wait_time}s...")
                        yield f"⚠️ Retrying Slide {slide_num} ({attempt+1}/{max_retries})...", None
                        time.sleep(wait_time)
                    else:
                        print(f"   ❌ Slide {slide_num} failed on {model_name}: {e}")
                        break
        
        if not slide_ok:
            warnings.append(slide_num)
            yield f"⚠️ **Warning: Slide {slide_num} could not be scanned — skipped**", None
    
    print(f"   Cache: {cache_hits}/{total} slides cached, {total - cache_hits} scanned via API")
    if warnings:
        print(f"   ⚠️ Skipped slides: {warnings}")
    end = time.perf_counter()
    print(f"Elapsed Time: {end-start:.6f} seconds")
    yield "Scan Complete", (inventory, warnings)

def debug_inventory(inventory):
    print("\n--- DEBUG: INVENTORY SANITY CHECK ---")
    print(f"Total Slides Captured: {len(inventory)}")
    captured_nums = sorted([s.get("slide_number", -1) for s in inventory])
    print(f"Slide Numbers: {captured_nums}")
    
    # Check for empty content
    for s in inventory:
        if not s.get("title") and not s.get("key_takeaway"):
            print(f"⚠️ WARNING: Slide {s.get('slide_number')} has empty title/takeaway!")
    print("---------------------------------------\n")

# -----------------------------------------------------------------------------
# LOGIC: PASS 2 (COACH CRITIQUE)
# -----------------------------------------------------------------------------
def build_inventory_script(inventory):
    """Shared logic: filter appendices and build the text script from inventory."""
    def get_title(slide):
        if not isinstance(slide, dict): return ""
        t = slide.get("title")
        return t if t else ""
        
    active = [s for s in inventory if isinstance(s, dict) and "appendix" not in get_title(s).lower()]
    print(f"DEBUG: Pass 2 using {len(active)} active slides (excluding appendices).")
    
    script = []
    for s in active:
        visuals = s.get("visual_elements", {})
        if not isinstance(visuals, dict): visuals = {}
        
        busy = "BUSY" if visuals.get("is_busy") else "OK"
        title = s.get('title', 'No Title')
        num = s.get('slide_number', '?')
        takeaway = s.get('key_takeaway', '')
        desc = s.get('visual_description', '')
        
        entry = f"Slide {num}: {title}\n- Content: {takeaway}\n- Visuals: {desc} [{busy}]"
        script.append(entry)
        
    return "\n".join(script)

def generate_critique(coach_client, inventory, persona, temperature=0.2):
    start = time.perf_counter()
    try:
        full_text = build_inventory_script(inventory)
        
        prompt = f"""{persona['role']}
Your goal is to guide a Data Science student to professional excellence.

{persona['focus']}

SLIDE INVENTORY:
{full_text}

TASK:
Coach this student based on the 8-Step Story Arc.

REQUIRED STORY ARC:
1. Executive Summary
2. Data Structure
3. Targets & Metrics
4. Candidate Models
5. HPO Strategy
6. Best Model Selection
7. Validation
8. Business Impact

INSTRUCTIONS:
1. **Fill the Roadmap**: For each of the 8 steps above, determine status (✅, ⚠️, ❓, ⭕).
2. **Check for Specifics**: If the student provides specific numbers (e.g. "$5,065 savings", "98% accuracy"), YOU MUST QUOTE THEM in the notes. Do not give generic advice if the specific data is present.
3. **Slide Refs**: Cite specific slide numbers in the notes.
4. **Tone**: Encouraging but precise.
5. **Summary**: Write a robust 2-paragraph summary (approx 150 words) from your perspective as {persona['name']}.

OUTPUT STRICT JSON (no markdown fences, no extra text):
{{
    "overall_summary": "Encouraging feedback (2 paragraphs).",
    "structure_roadmap": [
        {{ 
            "step_name": "String (e.g. '1. Exec Summary')", 
            "status_icon": "String (✅, ⚠️, ❓, ⭕)", 
            "coach_notes": "String"
        }}
    ]
}}"""
        
        response = coach_client.messages.create(
            model=COACH_MODEL,
            max_tokens=4096,
            temperature=temperature,
            messages=[{"role": "user", "content": prompt}]
        )
        
        raw_text = response.content[0].text
        print(f"DEBUG: {persona['name']} response received from {COACH_MODEL}.")
        
        cleaned = raw_text.strip()
        fence_match = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
        if fence_match:
            cleaned = fence_match.group(1).strip()
        
        critique = json.loads(cleaned)
        
        if isinstance(critique, list):
            if len(critique) > 0 and isinstance(critique[0], dict):
                critique = critique[0]
            else:
                 raise ValueError(f"Coach returned a list, expected a dictionary. Output: {critique}")
        
        end = time.perf_counter()
        print(f"Elapsed Time: {end-start:.6f} seconds")
        return critique
        
    except Exception as e:
        print(f"CRITICAL ERROR in Pass 2 ({persona['name']}): {e}")
        return {
            "overall_summary": f"Error generating critique: {e}",
            "structure_roadmap": [],
        }

# -----------------------------------------------------------------------------
# GRADIO INTERFACE
# -----------------------------------------------------------------------------
def format_roadmap_table(critique):
    """Build a markdown table from a critique's structure_roadmap."""
    table_md = (
        "| <span style='display:inline-block; min-width:180px'>STEP</span> "
        "| <span style='display:inline-block; min-width:60px'>FLAG</span> "
        "| COACH NOTES |\n|---|:---:|---|\n"
    )
    for item in critique.get("structure_roadmap", []):
        icon = item.get('status_icon', '❓')
        step = item.get('step_name', 'Step')
        note = item.get('coach_notes', '')
        table_md += f"| **{step}** | <span style='font-size: 1.5em'>{icon}</span> | {note} |\n"
    return table_md

def extract_student_name(inventory, fallback):
    """Extract student name from title slide. Checks bullets, key_takeaway, and description."""
    if not inventory or not isinstance(inventory[0], dict):
        return fallback
    
    slide1 = inventory[0]
    
    # Check short bullets on slide 1 — name is usually a short entry
    for bullet in slide1.get("main_text_bullets", []):
        if isinstance(bullet, str) and 3 < len(bullet) < 40:
            # Skip entries that look like dates, universities, or titles
            lower = bullet.lower()
            if any(skip in lower for skip in ["university", "capstone", "project", "201", "202"]):
                continue
            return bullet
    
    # Check key_takeaway for "by [Name]" or "presented by [Name]"
    takeaway = slide1.get("key_takeaway", "")
    for pattern in [r"presented by ([A-Z][a-z]+ [A-Z][a-z]+)", 
                    r"by ([A-Z][a-z]+ [A-Z][a-z]+)"]:
        match = re.search(pattern, takeaway)
        if match:
            return match.group(1)
    
    print(f"   Note: Could not extract student name from slide 1, using filename.")
    return fallback

def generate_pdf_report(filename, student_name, persona, critique, title_slide_path=None):
    ICON_MAP = {'✅': '[PASS]', '⚠️': '[WARN]', '❓': '[UNCLEAR]', '⭕': '[MISSING]'}
    FONT_DIR = "/usr/share/fonts/truetype/dejavu"
    
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    
    pdf.add_font("DejaVu", "", f"{FONT_DIR}/DejaVuSans.ttf")
    pdf.add_font("DejaVu", "B", f"{FONT_DIR}/DejaVuSans-Bold.ttf")
    
    pdf.add_page()
    
    # Title
    pdf.set_font("DejaVu", "B", 18)
    pdf.cell(0, 12, f"Dr. Jones Feedback: {student_name}", new_x="LMARGIN", new_y="NEXT")
    pdf.ln(2)
    pdf.set_font("DejaVu", "", 12)
    pdf.cell(0, 8, persona['name'], new_x="LMARGIN", new_y="NEXT")
    pdf.ln(4)
    
    # Title slide image
    if title_slide_path and os.path.exists(str(title_slide_path)):
        page_width = pdf.w - pdf.l_margin - pdf.r_margin
        pdf.image(str(title_slide_path), w=page_width)
        pdf.ln(6)
    
    # Summary
    pdf.set_font("DejaVu", "B", 12)
    pdf.cell(0, 8, "Coach Summary", new_x="LMARGIN", new_y="NEXT")
    pdf.ln(2)
    pdf.set_font("DejaVu", "", 10)
    summary = critique.get("overall_summary", "")
    pdf.multi_cell(0, 5, summary)
    
    pdf.add_page()
    
    # Roadmap table
    pdf.set_font("DejaVu", "B", 12)
    pdf.cell(0, 8, "Story Roadmap", new_x="LMARGIN", new_y="NEXT")
    pdf.ln(2)
    
    table_width = pdf.w - pdf.l_margin - pdf.r_margin
    col_widths = (table_width * 0.20, table_width * 0.10, table_width * 0.70)
    
    with pdf.table(col_widths=col_widths, text_align="LEFT") as table:
        header = table.row()
        pdf.set_font("DejaVu", "B", 9)
        header.cell("STEP")
        header.cell("FLAG")
        header.cell("COACH NOTES")
        
        pdf.set_font("DejaVu", "", 8)
        for item in critique.get("structure_roadmap", []):
            icon = item.get('status_icon', '?')
            flag = ICON_MAP.get(icon, icon)
            step = item.get('step_name', 'Step')
            note = item.get('coach_notes', '')
            row = table.row()
            row.cell(step)
            row.cell(flag)
            row.cell(note)
    
    pdf.output(filename)
    print(f"   Saved PDF to {filename}")

EMPTY_OUTPUTS = ("", "", "", "", None, None, None, "")

def process_presentation(file_obj, email, password):
    temperature = 0.2
    print("--- NEW JOB STARTED ---")
    if file_obj is None:
        yield ("❌ Error: No file uploaded",) + EMPTY_OUTPUTS
        return

    # Validate TAMU email domain
    if not email or not re.match(r'^[^@]+@(\w+\.)?tamu\.edu$', email.strip(), re.IGNORECASE):
        yield ("❌ Please enter a valid tamu.edu email address",) + EMPTY_OUTPUTS
        return

    if password != ACCESS_PASSWORD:
        yield ("❌ Incorrect Password",) + EMPTY_OUTPUTS
        return
    
    print(f"   User: {email.strip()}")
        
    if not API_KEY:
        yield ("❌ Server Error: Google API Key missing",) + EMPTY_OUTPUTS
        return
    if not CLAUDE_API_KEY:
        yield ("❌ Server Error: Claude API Key missing",) + EMPTY_OUTPUTS
        return
        
    scanner_client = genai.Client(api_key=API_KEY)
    coach_client   = anthropic.Anthropic(api_key=CLAUDE_API_KEY)
    
    try:
        # 1. Convert
        print("Step 1: Converting PDF...")
        yield ("⏳ **Converting PDF to images...**",) + EMPTY_OUTPUTS
        images = convert_to_images(file_obj.name)
        print(f"   Converted {len(images)} slides.")
        
        # 2. Scan (Pass 1 - Gemini Flash)
        yield (f"⏳ **Scanning {len(images)} slides...**",) + EMPTY_OUTPUTS
        print("Step 2: Scanning Slides (Pass 1)...")
        
        scanner = scan_slides(scanner_client, images)
        inventory = []
        scan_warnings = []
        
        for msg, result in scanner:
            if result is None:
                yield (f"⏳ **{msg}**",) + EMPTY_OUTPUTS
            else:
                inventory, scan_warnings = result
                
        print("   Scan Complete.")
        
        # Save Inventory
        original_stem = Path(file_obj.name).stem
        target_dir = Path("slides_images") / original_stem
        target_dir.mkdir(parents=True, exist_ok=True)
        
        inventory_filename = target_dir / f"{original_stem}_Inventory.json"
        with open(inventory_filename, "w") as f:
            json.dump(inventory, f, indent=4)
        print(f"   Saved Inventory to {inventory_filename}")
        
        # 3. Coach (Pass 2 - Sonnet 4.6, two personas)
        debug_inventory(inventory)
        
        biz_persona = COACH_PERSONAS["business"]
        ana_persona = COACH_PERSONAS["analytics"]
        
        yield (f"⏳ **💼 {biz_persona['name']} reviewing...**",) + EMPTY_OUTPUTS
        print(f"Step 3a: {biz_persona['name']} [Temp: {temperature}]...")
        biz_critique = generate_critique(coach_client, inventory, biz_persona, temperature)
        print(f"   {biz_persona['name']} done.")
        
        yield (f"⏳ **📊 {ana_persona['name']} reviewing...**",) + EMPTY_OUTPUTS
        print(f"Step 3b: {ana_persona['name']} [Temp: {temperature}]...")
        ana_critique = generate_critique(coach_client, inventory, ana_persona, temperature)
        print(f"   {ana_persona['name']} done.")
        
        # 4. Format Output
        biz_summary  = biz_critique.get("overall_summary", "")
        biz_table    = format_roadmap_table(biz_critique)
        ana_summary  = ana_critique.get("overall_summary", "")
        ana_table    = format_roadmap_table(ana_critique)
            
        # Create separate PDF reports
        student_name = extract_student_name(inventory, original_stem)
        title_slide = images[0] if images else None
        
        biz_pdf = f"{original_stem}_Business_Review.pdf"
        ana_pdf = f"{original_stem}_Analytics_Review.pdf"
        generate_pdf_report(biz_pdf, student_name, biz_persona, biz_critique, title_slide)
        generate_pdf_report(ana_pdf, student_name, ana_persona, ana_critique, title_slide)

        done_msg = "✅ Done!"
        if scan_warnings:
            skipped = ", ".join(str(s) for s in scan_warnings)
            done_msg += f"  ⚠️ **Warning: Slide(s) {skipped} could not be scanned and were excluded from the review.**"
        
        yield done_msg, biz_summary, biz_table, ana_summary, ana_table, \
              images[0], biz_pdf, ana_pdf, ""
        
    except Exception as e:
        print(f"CRITICAL ERROR: {e}")
        yield (f"❌ Error: {str(e)}",) + EMPTY_OUTPUTS

# Define a custom maroon color palette
maroon = gr.themes.Color(
    c50="#fdf2f2",
    c100="#fbe5e5",
    c200="#f7c8c8",
    c300="#f09e9e",
    c400="#e66a6a",
    c500="#d63d3d",
    c600="#800000", # Core Maroon
    c700="#800000",
    c800="#800000", # Deep Maroon
    c900="#701a1a",
    c950="#450a0a",
)

with gr.Blocks(title="Dr. Jones AI Coach", 
        theme=gr.themes.Default(primary_hue=maroon, text_size="lg")) as demo:
    gr.Markdown("# 🎓 Capstone Slide Review")
    gr.Markdown("Upload your slides (PDF) for feedback from your AI coaching committee.")
    
    with gr.Row():
        with gr.Column(scale=3):
            file_input = gr.File(label="Upload PDF Slides", 
                                 file_types=[".pdf", "application/pdf"], 
                                 type="filepath", height=150)
        with gr.Column(scale=1):
            email_input = gr.Textbox(label="Email Address", placeholder="you@tamu.edu")
            pass_input  = gr.Textbox(label="Password", type="password")
    
    status = gr.Markdown("**Status**: Ready")
    
    btn = gr.Button("REVIEW PRESENTATION", scale=1, variant="primary")
    
    with gr.Row():
        with gr.Column(scale=1):
            preview_img = gr.Image(label="Title Slide", interactive=False)
            with gr.Row():
                download_biz = gr.File(label="💼 Business (PDF)")
                download_ana = gr.File(label="📊 Analytics (PDF)")
            progress_status = gr.Markdown(value="")
    
        with gr.Column(scale=2):
            with gr.Tabs():
                with gr.TabItem("💼 Business Strategy Coach"):
                    biz_summary_display = gr.Textbox(label="Business Summary", 
                                                     show_label=False, lines=6, interactive=False)
                    
                with gr.TabItem("📊 Analytics & Methodology Coach"):
                    ana_summary_display = gr.Textbox(label="Analytics Summary", 
                                                     show_label=False, lines=6, interactive=False)

    with gr.Tabs():
        with gr.TabItem("💼 Business Roadmap"):
            biz_roadmap_display = gr.Markdown()
            
        with gr.TabItem("📊 Analytics Roadmap"):
            ana_roadmap_display = gr.Markdown()

    btn.click(
        fn=process_presentation,
        inputs=[file_input, email_input, pass_input],
        outputs=[status, biz_summary_display, biz_roadmap_display,
                 ana_summary_display, ana_roadmap_display,
                 preview_img, download_biz, download_ana, progress_status]
    )

if __name__ == "__main__":
    demo.queue() # Enable queuing for generators
    demo.launch(debug=True) # Debug mode on