import huggingface_hub
if not hasattr(huggingface_hub, 'HfFolder'):
    class HfFolder:
        @staticmethod
        def get_token():
            return huggingface_hub.get_token()
    huggingface_hub.HfFolder = HfFolder

import spaces
import gradio as gr
import torch
import faiss
import numpy as np
import json
import re
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification

print("===============================================")
print("🚀 BOOTING VIORRA TRIO FOR HUGGINGFACE ZEROGPU")
print("===============================================\n")

# Global variables for Lazy Loading
is_loaded = False
dataset = None
corpus_texts = []
corpus_feedback = []
embedder = None
index = None
aes_tokenizer = None
aes_model = None
llm_tokenizer = None
llm = None

def load_models_lazily():
    global is_loaded, dataset, corpus_texts, corpus_feedback, embedder, index
    global aes_tokenizer, aes_model, llm_tokenizer, llm
    
    if is_loaded:
        return
        
    print("--> [1/3] Loading Vector Database from published Viorra Dataset...")
    dataset = load_dataset('qsardor/viorra-admissions-essays', split='train')
    
    corpus_texts = dataset['Essay']
    corpus_feedback = dataset['Feedback_cleaned']

    embedder = SentenceTransformer('all-MiniLM-L6-v2', device="cpu")
    corpus_embeddings = embedder.encode(corpus_texts, convert_to_numpy=True, show_progress_bar=False)
    dimension = corpus_embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(corpus_embeddings)

    print("--> [2/3] Lazy Loading AES Scoring Model...")
    aes_model_id = "Kevintu/Engessay_grading_ML"
    aes_tokenizer = AutoTokenizer.from_pretrained(aes_model_id)
    aes_model = AutoModelForSequenceClassification.from_pretrained(aes_model_id)

    print("--> [3/3] Lazy Loading The Brain (google/gemma-4-E2B-it in float16)...")
    llm_id = "google/gemma-4-E2B-it"
    llm_tokenizer = AutoTokenizer.from_pretrained(llm_id, trust_remote_code=True)
    llm = AutoModelForCausalLM.from_pretrained(
        llm_id, 
        torch_dtype=torch.float16,
        trust_remote_code=True
    ).eval()
    
    is_loaded = True
    print("\n✅ MODELS LOADED LAZILY ON FIRST RUN.")

@spaces.GPU(duration=120)
def analyze_essay(test_text, progress=gr.Progress()):
    if not test_text.strip():
        return "Please enter a personal statement to analyze.", gr.update(visible=True)
        
    progress(0.1, desc="Loading embedding model & vector DB...")
    load_models_lazily()
    
    word_count = len(test_text.split())
    if word_count < 150:
        return f"Invalid Submission: Your essay is only {word_count} words. The Common App personal statement has a strict minimum of 150 words. Do not submit fragments or test strings.", gr.update(visible=True)
    if word_count > 1000:
        return f"Invalid Submission: Your essay is {word_count} words, which exceeds the strict 1,000-word maximum limit of the VIORRA. Please edit it down before requesting Ivy League analysis.", gr.update(visible=True)

    # CUDA EXECUTION
    device = torch.device("cuda")
    
    # Move models to GPU
    embedder.to(device)
    aes_model.to(device)
    llm.to(device)

    # 1. Base Score (AES)
    progress(0.3, desc="Calculating base grammar & syntax scores...")
    aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        aes_outputs = aes_model(**aes_inputs)
        probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1)
        classes = torch.arange(len(probs), dtype=torch.float32, device=device)
        raw_score = (probs * classes).sum().item() 
        final_aes_score = round(max(1.0, min(5.0, raw_score)), 2)
    
    # 2. RAG Context Retrieval
    progress(0.5, desc="Retrieving relevant historical examples...")
    query_embedding = embedder.encode([test_text], convert_to_numpy=True)
    distances, indices = index.search(query_embedding, 2)
    rag_examples = ""
    for i, idx in enumerate(indices[0]):
        rag_examples += f"\n--- SIMILAR ADMISSIONS ESSAY {i+1} ---\n"
        rag_examples += "EXCERPT: " + corpus_texts[idx][:300] + "...\n"
        rag_examples += "ADMISSIONS FEEDBACK: " + corpus_feedback[idx] + "\n"

    # AES
    aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device)
    with torch.no_grad():
        aes_outputs = aes_model(**aes_inputs)
        probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1)
        classes = torch.arange(len(probs), dtype=torch.float32, device=device)
        raw_score = (probs * classes).sum().item() 
        final_aes_score = round(max(1.0, min(5.0, raw_score)), 2)

    try:
        with open("system_prompt.txt", "r", encoding="utf-8") as f:
            sys_prompt = f.read()
    except FileNotFoundError:
        sys_prompt = "[STUDENT ESSAY]\n\"[[TEST_TEXT]]\"\n[INSTRUCTIONS]\nOutput empty JSON."
        
    sys_prompt = sys_prompt.replace("[[TEST_TEXT]]", test_text)
    sys_prompt = sys_prompt.replace("[[AES_SCORE]]", str(final_aes_score))
    sys_prompt = sys_prompt.replace("[[NARRATIVE_HOOK]]", str(int((final_aes_score/5.0)*100)))
    sys_prompt = sys_prompt.replace("[[RAG_EXAMPLES]]", rag_examples)


    messages = [{"role": "user", "content": sys_prompt}]
    try:
        prompt_text = llm_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    except Exception:
        prompt_text = f"{sys_prompt}\n"
        
    inputs = llm_tokenizer(prompt_text, return_tensors="pt").to(device)
    
    progress(0.7, desc="Analyzing Statement (this may take up to 40 seconds)...")
    with torch.no_grad():
        outputs = llm.generate(
            **inputs,
            max_new_tokens=2048,
            do_sample=True,
            temperature=0.7
        )
    
    output_text = llm_tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
    
    # Parse JSON
    progress(0.9, desc="Formatting evaluation report...")
    try:
        # Robust JSON extraction using regex
        json_str = output_text
        match = re.search(r'\{[\s\S]*\}', output_text)
        if match:
            json_str = match.group(0)
            
        data = json.loads(json_str)
        
        clarity = data.get("scores", {}).get("clarity_meter", 0)
        motivation = data.get("scores", {}).get("motivation_index", 0)
        fit = data.get("scores", {}).get("institutional_fit", 0)
        
        md_report = f"""
### 📊 Final Evaluation Scores
* **Clarity Meter:** {clarity}/100
* **Motivation Index:** {motivation}/100
* **Institutional Fit:** {fit}/100

### 🧠 Expert Diagnostics
"""
        diagnostics = data.get("diagnostics", [])
        for diag in diagnostics:
            md_report += f"- **{diag.get('title', 'Issue')}**: \"*{diag.get('quote', '')}*\" \n  - **Feedback**: {diag.get('feedback', '')}\n\n"
            
        return md_report, gr.update(visible=True)
        
    except Exception as e:
        return f"Failed to parse JSON. Raw output: \n{output_text}\n\nError: {str(e)}", gr.update(visible=True)

ivy_theme = gr.themes.Default(
    primary_hue="red",
    font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
)

css = """
/* ===== VIORRA PREMIUM REDESIGN ===== */

/* Global: lock to cream-white, override Gradio dark mode completely */
body, .dark, .gradio-container, .dark .gradio-container,
.main, .dark .main, .contain, .dark .contain {
    background-color: #faf8f5 !important;
    color: #1e293b !important;
}

/* All block/panel backgrounds */
.block, .dark .block, .panel, .dark .panel,
.form, .dark .form, .wrap, .dark .wrap {
    background-color: transparent !important;
    border: none !important;
    box-shadow: none !important;
}

/* Main column: min height to prevent footer jump */
.gradio-column, .dark .gradio-column {
    min-height: 50vh !important;
}

/* Textarea — warm cream, not stark white */
textarea, .dark textarea {
    background-color: #faf8f5 !important;
    color: #1e293b !important;
    border: 1.5px solid #d4cdc3 !important;
    border-radius: 12px !important;
    padding: 1.25rem !important;
    font-size: 1rem !important;
    line-height: 1.7 !important;
    font-family: 'Inter', sans-serif !important;
    transition: border-color 0.2s ease, box-shadow 0.2s ease !important;
}
textarea:focus, .dark textarea:focus {
    border-color: #8b0000 !important;
    box-shadow: 0 0 0 3px rgba(139, 0, 0, 0.08) !important;
    outline: none !important;
}
textarea::placeholder {
    color: #9ca3af !important;
    font-style: italic !important;
}

/* Primary button */
button.primary, .dark button.primary {
    background: linear-gradient(135deg, #8b0000, #a31515) !important;
    color: #ffffff !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 0.9rem 2rem !important;
    font-size: 1.05rem !important;
    font-weight: 600 !important;
    letter-spacing: 0.02em !important;
    cursor: pointer !important;
    transition: all 0.25s ease !important;
    box-shadow: 0 2px 8px rgba(139, 0, 0, 0.15) !important;
}
button.primary:hover, .dark button.primary:hover {
    background: linear-gradient(135deg, #a31515, #c0392b) !important;
    box-shadow: 0 4px 16px rgba(139, 0, 0, 0.25) !important;
    transform: translateY(-1px) !important;
}

/* Secondary button */
button.secondary, .dark button.secondary {
    background-color: #faf8f5 !important;
    color: #8b0000 !important;
    border: 1.5px solid #8b0000 !important;
    border-radius: 12px !important;
    padding: 0.9rem 2rem !important;
    font-size: 1.05rem !important;
    font-weight: 600 !important;
    cursor: pointer !important;
    transition: all 0.25s ease !important;
}
button.secondary:hover, .dark button.secondary:hover {
    background-color: #fef2f2 !important;
    box-shadow: 0 2px 8px rgba(139, 0, 0, 0.1) !important;
}

/* Markdown / prose output */
.prose, .dark .prose,
.prose *, .dark .prose *,
.markdown-text, .dark .markdown-text {
    color: #1e293b !important;
}
.prose strong, .dark .prose strong {
    color: #0f172a !important;
}
.prose h1, .prose h2, .prose h3,
.dark .prose h1, .dark .prose h2, .dark .prose h3 {
    color: #0f172a !important;
    font-weight: 700 !important;
}
.prose em, .dark .prose em {
    color: #1e293b !important;
}
.prose li, .dark .prose li {
    color: #1e293b !important;
}

/* Copy button — styled nicely */
button[title="copy"], .dark button[title="copy"],
.copy-btn, .dark .copy-btn {
    background-color: #f1ece4 !important;
    color: #8b0000 !important;
    border: 1px solid #d4cdc3 !important;
    border-radius: 8px !important;
    padding: 6px 10px !important;
    transition: all 0.2s ease !important;
}
button[title="copy"]:hover, .dark button[title="copy"]:hover,
.copy-btn:hover, .dark .copy-btn:hover {
    background-color: #8b0000 !important;
    color: #ffffff !important;
    border-color: #8b0000 !important;
}

/* Header */
.viorra-hero {
    text-align: center;
    padding: 3rem 1rem 2rem;
    margin-bottom: 0.5rem;
}
.viorra-hero h1 {
    font-size: 3.8rem;
    font-weight: 800;
    color: #8b0000;
    letter-spacing: 6px;
    text-transform: uppercase;
    margin: 0 0 0.6rem 0;
    line-height: 1;
}
.viorra-hero .tagline {
    font-size: 1.05rem;
    color: #64748b;
    font-weight: 400;
    letter-spacing: 0.5px;
}
.viorra-hero .divider {
    width: 60px;
    height: 3px;
    background: linear-gradient(90deg, #8b0000, #c0392b);
    margin: 1.2rem auto 0;
    border-radius: 2px;
}

/* Footer */
.viorra-footer {
    text-align: center;
    margin-top: 3rem;
    padding: 2rem 1rem;
    border-top: 1px solid #e2ddd5;
}
.viorra-footer .team-label {
    font-size: 0.95rem;
    color: #475569;
    margin-bottom: 0.75rem;
    font-weight: 500;
}
.viorra-footer .team-label strong {
    color: #1e293b;
}
.viorra-footer .members {
    font-size: 0.85rem;
    color: #64748b;
    line-height: 1.8;
}
.viorra-footer .members strong {
    color: #334155;
    font-weight: 600;
}
.viorra-footer .members .role {
    color: #94a3b8;
    font-size: 0.8rem;
}

/* Progress bar */
.progress-bar, .dark .progress-bar {
    background-color: #8b0000 !important;
}
.progress-text, .dark .progress-text {
    color: #1e293b !important;
}
.eta-bar {
    background: linear-gradient(135deg, #8b0000, #c0392b) !important;
}

/* Warning Box */
.viorra-warning {
    background-color: #fef2f2;
    border: 1px solid #f87171;
    color: #991b1b;
    padding: 1rem;
    border-radius: 8px;
    text-align: center;
    margin: 0 auto 2rem auto;
    max-width: 800px;
    font-size: 0.95rem;
    line-height: 1.5;
}
.viorra-warning strong {
    color: #7f1d1d;
}

/* Remove Gradio default borders and shadows on containers */
.gradio-container .contain, .dark .gradio-container .contain {
    box-shadow: none !important;
}
"""

with gr.Blocks(theme=ivy_theme, css=css) as demo:
    gr.HTML('''
    <div class="viorra-hero">
        <h1>VIORRA</h1>
        <p class="tagline">Smart Personal Statement Checker &bull; Fast, Accurate, and Reliable</p>
        <div class="divider"></div>
    </div>
    
    <div class="viorra-warning">
        <strong>⚠️ LIMITED DEMO VERSION</strong><br>
        This is a lightweight cloud preview. For the complete VIORRA experience—including 100% offline privacy, interactive chat follow-ups, and session history—<a href="https://github.com/qsardor/VIORRA" target="_blank" style="color: #8b0000; text-decoration: underline; font-weight: 600;">install the full application from GitHub</a>.
    </div>
    ''')
    
    with gr.Column():
        with gr.Column(visible=True) as input_container:
            essay_input = gr.Textbox(
                lines=12, 
                placeholder="Paste the student's Personal Statement here.\n\nVIORRA will instantly analyze clarity, score the document, and provide expert-level diagnostics...", 
                label="Personal Statement",
                show_label=False,
                container=False
            )
            submit_btn = gr.Button("Analyze Personal Statement", variant="primary", size="lg")
            
        with gr.Column(visible=False) as output_container:
            report_output = gr.Markdown(
                label="Evaluation Report", 
                value="*Analyzing... Please wait.*",
                show_copy_button=True
            )
            reset_btn = gr.Button("Analyze Another Statement", variant="secondary", size="lg", visible=False)
            
    def show_loading():
        return (
            gr.update(visible=False), 
            gr.update(visible=False), 
            gr.update(visible=True), 
            gr.update(value="*Analyzing your statement... This may take up to 40 seconds.*"),
            gr.update(visible=False)
        )
        
    def reset_ui():
        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(value="")
        
    submit_btn.click(
        fn=show_loading,
        inputs=[],
        outputs=[essay_input, submit_btn, output_container, report_output, reset_btn],
        queue=False
    ).then(
        fn=analyze_essay, 
        inputs=essay_input, 
        outputs=[report_output, reset_btn]
    )
    
    reset_btn.click(
        fn=reset_ui,
        inputs=[],
        outputs=[essay_input, submit_btn, output_container, essay_input],
        queue=False
    )
    
    gr.HTML("""
    <div class="viorra-footer">
        <p class="team-label">Created with ❤️ by <strong>Team Violets</strong></p>
        <p class="members">
            <strong>Azizakhan Rustamova</strong> <span class="role">Founder &amp; Marketing</span> &bull; 
            <strong>Sardor Qurbonov</strong> <span class="role">Main Developer of Software</span><br>
            <strong>Ruhshona Farhodova</strong> <span class="role">Business Developer</span> &bull; 
            <strong>Damirbek Xolnazarov</strong> <span class="role">Full Stack Developer</span>
        </p>
    </div>
    """)

if __name__ == "__main__":
    demo.launch()