import huggingface_hub if not hasattr(huggingface_hub, 'HfFolder'): class HfFolder: @staticmethod def get_token(): return huggingface_hub.get_token() huggingface_hub.HfFolder = HfFolder import spaces import gradio as gr import torch import faiss import numpy as np import json import re from datasets import load_dataset from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification print("===============================================") print("🚀 BOOTING VIORRA TRIO FOR HUGGINGFACE ZEROGPU") print("===============================================\n") # Global variables for Lazy Loading is_loaded = False dataset = None corpus_texts = [] corpus_feedback = [] embedder = None index = None aes_tokenizer = None aes_model = None llm_tokenizer = None llm = None def load_models_lazily(): global is_loaded, dataset, corpus_texts, corpus_feedback, embedder, index global aes_tokenizer, aes_model, llm_tokenizer, llm if is_loaded: return print("--> [1/3] Loading Vector Database from published Viorra Dataset...") dataset = load_dataset('qsardor/viorra-admissions-essays', split='train') corpus_texts = dataset['Essay'] corpus_feedback = dataset['Feedback_cleaned'] embedder = SentenceTransformer('all-MiniLM-L6-v2', device="cpu") corpus_embeddings = embedder.encode(corpus_texts, convert_to_numpy=True, show_progress_bar=False) dimension = corpus_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(corpus_embeddings) print("--> [2/3] Lazy Loading AES Scoring Model...") aes_model_id = "Kevintu/Engessay_grading_ML" aes_tokenizer = AutoTokenizer.from_pretrained(aes_model_id) aes_model = AutoModelForSequenceClassification.from_pretrained(aes_model_id) print("--> [3/3] Lazy Loading The Brain (google/gemma-4-E2B-it in float16)...") llm_id = "google/gemma-4-E2B-it" llm_tokenizer = AutoTokenizer.from_pretrained(llm_id, trust_remote_code=True) llm = AutoModelForCausalLM.from_pretrained( llm_id, torch_dtype=torch.float16, trust_remote_code=True ).eval() is_loaded = True print("\n✅ MODELS LOADED LAZILY ON FIRST RUN.") @spaces.GPU(duration=120) def analyze_essay(test_text, progress=gr.Progress()): if not test_text.strip(): return "Please enter a personal statement to analyze.", gr.update(visible=True) progress(0.1, desc="Loading embedding model & vector DB...") load_models_lazily() word_count = len(test_text.split()) if word_count < 150: return f"Invalid Submission: Your essay is only {word_count} words. The Common App personal statement has a strict minimum of 150 words. Do not submit fragments or test strings.", gr.update(visible=True) if word_count > 1000: return f"Invalid Submission: Your essay is {word_count} words, which exceeds the strict 1,000-word maximum limit of the VIORRA. Please edit it down before requesting Ivy League analysis.", gr.update(visible=True) # CUDA EXECUTION device = torch.device("cuda") # Move models to GPU embedder.to(device) aes_model.to(device) llm.to(device) # 1. Base Score (AES) progress(0.3, desc="Calculating base grammar & syntax scores...") aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device) with torch.no_grad(): aes_outputs = aes_model(**aes_inputs) probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1) classes = torch.arange(len(probs), dtype=torch.float32, device=device) raw_score = (probs * classes).sum().item() final_aes_score = round(max(1.0, min(5.0, raw_score)), 2) # 2. RAG Context Retrieval progress(0.5, desc="Retrieving relevant historical examples...") query_embedding = embedder.encode([test_text], convert_to_numpy=True) distances, indices = index.search(query_embedding, 2) rag_examples = "" for i, idx in enumerate(indices[0]): rag_examples += f"\n--- SIMILAR ADMISSIONS ESSAY {i+1} ---\n" rag_examples += "EXCERPT: " + corpus_texts[idx][:300] + "...\n" rag_examples += "ADMISSIONS FEEDBACK: " + corpus_feedback[idx] + "\n" # AES aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device) with torch.no_grad(): aes_outputs = aes_model(**aes_inputs) probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1) classes = torch.arange(len(probs), dtype=torch.float32, device=device) raw_score = (probs * classes).sum().item() final_aes_score = round(max(1.0, min(5.0, raw_score)), 2) try: with open("system_prompt.txt", "r", encoding="utf-8") as f: sys_prompt = f.read() except FileNotFoundError: sys_prompt = "[STUDENT ESSAY]\n\"[[TEST_TEXT]]\"\n[INSTRUCTIONS]\nOutput empty JSON." sys_prompt = sys_prompt.replace("[[TEST_TEXT]]", test_text) sys_prompt = sys_prompt.replace("[[AES_SCORE]]", str(final_aes_score)) sys_prompt = sys_prompt.replace("[[NARRATIVE_HOOK]]", str(int((final_aes_score/5.0)*100))) sys_prompt = sys_prompt.replace("[[RAG_EXAMPLES]]", rag_examples) messages = [{"role": "user", "content": sys_prompt}] try: prompt_text = llm_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) except Exception: prompt_text = f"{sys_prompt}\n" inputs = llm_tokenizer(prompt_text, return_tensors="pt").to(device) progress(0.7, desc="Analyzing Statement (this may take up to 40 seconds)...") with torch.no_grad(): outputs = llm.generate( **inputs, max_new_tokens=2048, do_sample=True, temperature=0.7 ) output_text = llm_tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True) # Parse JSON progress(0.9, desc="Formatting evaluation report...") try: # Robust JSON extraction using regex json_str = output_text match = re.search(r'\{[\s\S]*\}', output_text) if match: json_str = match.group(0) data = json.loads(json_str) clarity = data.get("scores", {}).get("clarity_meter", 0) motivation = data.get("scores", {}).get("motivation_index", 0) fit = data.get("scores", {}).get("institutional_fit", 0) md_report = f""" ### 📊 Final Evaluation Scores * **Clarity Meter:** {clarity}/100 * **Motivation Index:** {motivation}/100 * **Institutional Fit:** {fit}/100 ### 🧠 Expert Diagnostics """ diagnostics = data.get("diagnostics", []) for diag in diagnostics: md_report += f"- **{diag.get('title', 'Issue')}**: \"*{diag.get('quote', '')}*\" \n - **Feedback**: {diag.get('feedback', '')}\n\n" return md_report, gr.update(visible=True) except Exception as e: return f"Failed to parse JSON. Raw output: \n{output_text}\n\nError: {str(e)}", gr.update(visible=True) ivy_theme = gr.themes.Default( primary_hue="red", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ) css = """ /* ===== VIORRA PREMIUM REDESIGN ===== */ /* Global: lock to cream-white, override Gradio dark mode completely */ body, .dark, .gradio-container, .dark .gradio-container, .main, .dark .main, .contain, .dark .contain { background-color: #faf8f5 !important; color: #1e293b !important; } /* All block/panel backgrounds */ .block, .dark .block, .panel, .dark .panel, .form, .dark .form, .wrap, .dark .wrap { background-color: transparent !important; border: none !important; box-shadow: none !important; } /* Main column: min height to prevent footer jump */ .gradio-column, .dark .gradio-column { min-height: 50vh !important; } /* Textarea — warm cream, not stark white */ textarea, .dark textarea { background-color: #faf8f5 !important; color: #1e293b !important; border: 1.5px solid #d4cdc3 !important; border-radius: 12px !important; padding: 1.25rem !important; font-size: 1rem !important; line-height: 1.7 !important; font-family: 'Inter', sans-serif !important; transition: border-color 0.2s ease, box-shadow 0.2s ease !important; } textarea:focus, .dark textarea:focus { border-color: #8b0000 !important; box-shadow: 0 0 0 3px rgba(139, 0, 0, 0.08) !important; outline: none !important; } textarea::placeholder { color: #9ca3af !important; font-style: italic !important; } /* Primary button */ button.primary, .dark button.primary { background: linear-gradient(135deg, #8b0000, #a31515) !important; color: #ffffff !important; border: none !important; border-radius: 12px !important; padding: 0.9rem 2rem !important; font-size: 1.05rem !important; font-weight: 600 !important; letter-spacing: 0.02em !important; cursor: pointer !important; transition: all 0.25s ease !important; box-shadow: 0 2px 8px rgba(139, 0, 0, 0.15) !important; } button.primary:hover, .dark button.primary:hover { background: linear-gradient(135deg, #a31515, #c0392b) !important; box-shadow: 0 4px 16px rgba(139, 0, 0, 0.25) !important; transform: translateY(-1px) !important; } /* Secondary button */ button.secondary, .dark button.secondary { background-color: #faf8f5 !important; color: #8b0000 !important; border: 1.5px solid #8b0000 !important; border-radius: 12px !important; padding: 0.9rem 2rem !important; font-size: 1.05rem !important; font-weight: 600 !important; cursor: pointer !important; transition: all 0.25s ease !important; } button.secondary:hover, .dark button.secondary:hover { background-color: #fef2f2 !important; box-shadow: 0 2px 8px rgba(139, 0, 0, 0.1) !important; } /* Markdown / prose output */ .prose, .dark .prose, .prose *, .dark .prose *, .markdown-text, .dark .markdown-text { color: #1e293b !important; } .prose strong, .dark .prose strong { color: #0f172a !important; } .prose h1, .prose h2, .prose h3, .dark .prose h1, .dark .prose h2, .dark .prose h3 { color: #0f172a !important; font-weight: 700 !important; } .prose em, .dark .prose em { color: #1e293b !important; } .prose li, .dark .prose li { color: #1e293b !important; } /* Copy button — styled nicely */ button[title="copy"], .dark button[title="copy"], .copy-btn, .dark .copy-btn { background-color: #f1ece4 !important; color: #8b0000 !important; border: 1px solid #d4cdc3 !important; border-radius: 8px !important; padding: 6px 10px !important; transition: all 0.2s ease !important; } button[title="copy"]:hover, .dark button[title="copy"]:hover, .copy-btn:hover, .dark .copy-btn:hover { background-color: #8b0000 !important; color: #ffffff !important; border-color: #8b0000 !important; } /* Header */ .viorra-hero { text-align: center; padding: 3rem 1rem 2rem; margin-bottom: 0.5rem; } .viorra-hero h1 { font-size: 3.8rem; font-weight: 800; color: #8b0000; letter-spacing: 6px; text-transform: uppercase; margin: 0 0 0.6rem 0; line-height: 1; } .viorra-hero .tagline { font-size: 1.05rem; color: #64748b; font-weight: 400; letter-spacing: 0.5px; } .viorra-hero .divider { width: 60px; height: 3px; background: linear-gradient(90deg, #8b0000, #c0392b); margin: 1.2rem auto 0; border-radius: 2px; } /* Footer */ .viorra-footer { text-align: center; margin-top: 3rem; padding: 2rem 1rem; border-top: 1px solid #e2ddd5; } .viorra-footer .team-label { font-size: 0.95rem; color: #475569; margin-bottom: 0.75rem; font-weight: 500; } .viorra-footer .team-label strong { color: #1e293b; } .viorra-footer .members { font-size: 0.85rem; color: #64748b; line-height: 1.8; } .viorra-footer .members strong { color: #334155; font-weight: 600; } .viorra-footer .members .role { color: #94a3b8; font-size: 0.8rem; } /* Progress bar */ .progress-bar, .dark .progress-bar { background-color: #8b0000 !important; } .progress-text, .dark .progress-text { color: #1e293b !important; } .eta-bar { background: linear-gradient(135deg, #8b0000, #c0392b) !important; } /* Warning Box */ .viorra-warning { background-color: #fef2f2; border: 1px solid #f87171; color: #991b1b; padding: 1rem; border-radius: 8px; text-align: center; margin: 0 auto 2rem auto; max-width: 800px; font-size: 0.95rem; line-height: 1.5; } .viorra-warning strong { color: #7f1d1d; } /* Remove Gradio default borders and shadows on containers */ .gradio-container .contain, .dark .gradio-container .contain { box-shadow: none !important; } """ with gr.Blocks(theme=ivy_theme, css=css) as demo: gr.HTML('''

VIORRA

Smart Personal Statement Checker • Fast, Accurate, and Reliable

⚠️ LIMITED DEMO VERSION
This is a lightweight cloud preview. For the complete VIORRA experience—including 100% offline privacy, interactive chat follow-ups, and session history—install the full application from GitHub.
''') with gr.Column(): with gr.Column(visible=True) as input_container: essay_input = gr.Textbox( lines=12, placeholder="Paste the student's Personal Statement here.\n\nVIORRA will instantly analyze clarity, score the document, and provide expert-level diagnostics...", label="Personal Statement", show_label=False, container=False ) submit_btn = gr.Button("Analyze Personal Statement", variant="primary", size="lg") with gr.Column(visible=False) as output_container: report_output = gr.Markdown( label="Evaluation Report", value="*Analyzing... Please wait.*", show_copy_button=True ) reset_btn = gr.Button("Analyze Another Statement", variant="secondary", size="lg", visible=False) def show_loading(): return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(value="*Analyzing your statement... This may take up to 40 seconds.*"), gr.update(visible=False) ) def reset_ui(): return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(value="") submit_btn.click( fn=show_loading, inputs=[], outputs=[essay_input, submit_btn, output_container, report_output, reset_btn], queue=False ).then( fn=analyze_essay, inputs=essay_input, outputs=[report_output, reset_btn] ) reset_btn.click( fn=reset_ui, inputs=[], outputs=[essay_input, submit_btn, output_container, essay_input], queue=False ) gr.HTML(""" """) if __name__ == "__main__": demo.launch()