Spaces:

qsardor
/

VIORRA

Running on Zero

App Files Files Community

VIORRA / app.py

qsardor

Upload folder using huggingface_hub

0c52635 verified about 3 hours ago

raw

history blame contribute delete

16.7 kB

	import huggingface_hub
	if not hasattr(huggingface_hub, 'HfFolder'):
	class HfFolder:
	@staticmethod
	def get_token():
	return huggingface_hub.get_token()
	huggingface_hub.HfFolder = HfFolder

	import spaces
	import gradio as gr
	import torch
	import faiss
	import numpy as np
	import json
	import re
	from datasets import load_dataset
	from sentence_transformers import SentenceTransformer
	from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification

	print("===============================================")
	print("🚀 BOOTING VIORRA TRIO FOR HUGGINGFACE ZEROGPU")
	print("===============================================\n")

	# Global variables for Lazy Loading
	is_loaded = False
	dataset = None
	corpus_texts = []
	corpus_feedback = []
	embedder = None
	index = None
	aes_tokenizer = None
	aes_model = None
	llm_tokenizer = None
	llm = None

	def load_models_lazily():
	global is_loaded, dataset, corpus_texts, corpus_feedback, embedder, index
	global aes_tokenizer, aes_model, llm_tokenizer, llm

	if is_loaded:
	return

	print("--> [1/3] Loading Vector Database from published Viorra Dataset...")
	dataset = load_dataset('qsardor/viorra-admissions-essays', split='train')

	corpus_texts = dataset['Essay']
	corpus_feedback = dataset['Feedback_cleaned']

	embedder = SentenceTransformer('all-MiniLM-L6-v2', device="cpu")
	corpus_embeddings = embedder.encode(corpus_texts, convert_to_numpy=True, show_progress_bar=False)
	dimension = corpus_embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(corpus_embeddings)

	print("--> [2/3] Lazy Loading AES Scoring Model...")
	aes_model_id = "Kevintu/Engessay_grading_ML"
	aes_tokenizer = AutoTokenizer.from_pretrained(aes_model_id)
	aes_model = AutoModelForSequenceClassification.from_pretrained(aes_model_id)

	print("--> [3/3] Lazy Loading The Brain (google/gemma-4-E2B-it in float16)...")
	llm_id = "google/gemma-4-E2B-it"
	llm_tokenizer = AutoTokenizer.from_pretrained(llm_id, trust_remote_code=True)
	llm = AutoModelForCausalLM.from_pretrained(
	llm_id,
	torch_dtype=torch.float16,
	trust_remote_code=True
	).eval()

	is_loaded = True
	print("\n✅ MODELS LOADED LAZILY ON FIRST RUN.")

	@spaces.GPU(duration=120)
	def analyze_essay(test_text, progress=gr.Progress()):
	if not test_text.strip():
	return "Please enter a personal statement to analyze.", gr.update(visible=True)

	progress(0.1, desc="Loading embedding model & vector DB...")
	load_models_lazily()

	word_count = len(test_text.split())
	if word_count < 150:
	return f"Invalid Submission: Your essay is only {word_count} words. The Common App personal statement has a strict minimum of 150 words. Do not submit fragments or test strings.", gr.update(visible=True)
	if word_count > 1000:
	return f"Invalid Submission: Your essay is {word_count} words, which exceeds the strict 1,000-word maximum limit of the VIORRA. Please edit it down before requesting Ivy League analysis.", gr.update(visible=True)

	# CUDA EXECUTION
	device = torch.device("cuda")

	# Move models to GPU
	embedder.to(device)
	aes_model.to(device)
	llm.to(device)

	# 1. Base Score (AES)
	progress(0.3, desc="Calculating base grammar & syntax scores...")
	aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.no_grad():
	aes_outputs = aes_model(**aes_inputs)
	probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1)
	classes = torch.arange(len(probs), dtype=torch.float32, device=device)
	raw_score = (probs * classes).sum().item()
	final_aes_score = round(max(1.0, min(5.0, raw_score)), 2)

	# 2. RAG Context Retrieval
	progress(0.5, desc="Retrieving relevant historical examples...")
	query_embedding = embedder.encode([test_text], convert_to_numpy=True)
	distances, indices = index.search(query_embedding, 2)
	rag_examples = ""
	for i, idx in enumerate(indices[0]):
	rag_examples += f"\n--- SIMILAR ADMISSIONS ESSAY {i+1} ---\n"
	rag_examples += "EXCERPT: " + corpus_texts[idx][:300] + "...\n"
	rag_examples += "ADMISSIONS FEEDBACK: " + corpus_feedback[idx] + "\n"

	# AES
	aes_inputs = aes_tokenizer(test_text, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.no_grad():
	aes_outputs = aes_model(**aes_inputs)
	probs = torch.nn.functional.softmax(aes_outputs.logits[0], dim=-1)
	classes = torch.arange(len(probs), dtype=torch.float32, device=device)
	raw_score = (probs * classes).sum().item()
	final_aes_score = round(max(1.0, min(5.0, raw_score)), 2)

	try:
	with open("system_prompt.txt", "r", encoding="utf-8") as f:
	sys_prompt = f.read()
	except FileNotFoundError:
	sys_prompt = "[STUDENT ESSAY]\n\"[[TEST_TEXT]]\"\n[INSTRUCTIONS]\nOutput empty JSON."

	sys_prompt = sys_prompt.replace("[[TEST_TEXT]]", test_text)
	sys_prompt = sys_prompt.replace("[[AES_SCORE]]", str(final_aes_score))
	sys_prompt = sys_prompt.replace("[[NARRATIVE_HOOK]]", str(int((final_aes_score/5.0)*100)))
	sys_prompt = sys_prompt.replace("[[RAG_EXAMPLES]]", rag_examples)


	messages = [{"role": "user", "content": sys_prompt}]
	try:
	prompt_text = llm_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	except Exception:
	prompt_text = f"{sys_prompt}\n"

	inputs = llm_tokenizer(prompt_text, return_tensors="pt").to(device)

	progress(0.7, desc="Analyzing Statement (this may take up to 40 seconds)...")
	with torch.no_grad():
	outputs = llm.generate(
	**inputs,
	max_new_tokens=2048,
	do_sample=True,
	temperature=0.7
	)

	output_text = llm_tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)

	# Parse JSON
	progress(0.9, desc="Formatting evaluation report...")
	try:
	# Robust JSON extraction using regex
	json_str = output_text
	match = re.search(r'\{[\s\S]*\}', output_text)
	if match:
	json_str = match.group(0)

	data = json.loads(json_str)

	clarity = data.get("scores", {}).get("clarity_meter", 0)
	motivation = data.get("scores", {}).get("motivation_index", 0)
	fit = data.get("scores", {}).get("institutional_fit", 0)

	md_report = f"""
	### 📊 Final Evaluation Scores
	* Clarity Meter: {clarity}/100
	* Motivation Index: {motivation}/100
	* Institutional Fit: {fit}/100

	### 🧠 Expert Diagnostics
	"""
	diagnostics = data.get("diagnostics", [])
	for diag in diagnostics:
	md_report += f"- {diag.get('title', 'Issue')}: \"{diag.get('quote', '')}\" \n - Feedback: {diag.get('feedback', '')}\n\n"

	return md_report, gr.update(visible=True)

	except Exception as e:
	return f"Failed to parse JSON. Raw output: \n{output_text}\n\nError: {str(e)}", gr.update(visible=True)

	ivy_theme = gr.themes.Default(
	primary_hue="red",
	font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
	)

	css = """
	/* ===== VIORRA PREMIUM REDESIGN ===== */

	/* Global: lock to cream-white, override Gradio dark mode completely */
	body, .dark, .gradio-container, .dark .gradio-container,
	.main, .dark .main, .contain, .dark .contain {
	background-color: #faf8f5 !important;
	color: #1e293b !important;
	}

	/* All block/panel backgrounds */
	.block, .dark .block, .panel, .dark .panel,
	.form, .dark .form, .wrap, .dark .wrap {
	background-color: transparent !important;
	border: none !important;
	box-shadow: none !important;
	}

	/* Main column: min height to prevent footer jump */
	.gradio-column, .dark .gradio-column {
	min-height: 50vh !important;
	}

	/* Textarea — warm cream, not stark white */
	textarea, .dark textarea {
	background-color: #faf8f5 !important;
	color: #1e293b !important;
	border: 1.5px solid #d4cdc3 !important;
	border-radius: 12px !important;
	padding: 1.25rem !important;
	font-size: 1rem !important;
	line-height: 1.7 !important;
	font-family: 'Inter', sans-serif !important;
	transition: border-color 0.2s ease, box-shadow 0.2s ease !important;
	}
	textarea:focus, .dark textarea:focus {
	border-color: #8b0000 !important;
	box-shadow: 0 0 0 3px rgba(139, 0, 0, 0.08) !important;
	outline: none !important;
	}
	textarea::placeholder {
	color: #9ca3af !important;
	font-style: italic !important;
	}

	/* Primary button */
	button.primary, .dark button.primary {
	background: linear-gradient(135deg, #8b0000, #a31515) !important;
	color: #ffffff !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 0.9rem 2rem !important;
	font-size: 1.05rem !important;
	font-weight: 600 !important;
	letter-spacing: 0.02em !important;
	cursor: pointer !important;
	transition: all 0.25s ease !important;
	box-shadow: 0 2px 8px rgba(139, 0, 0, 0.15) !important;
	}
	button.primary:hover, .dark button.primary:hover {
	background: linear-gradient(135deg, #a31515, #c0392b) !important;
	box-shadow: 0 4px 16px rgba(139, 0, 0, 0.25) !important;
	transform: translateY(-1px) !important;
	}

	/* Secondary button */
	button.secondary, .dark button.secondary {
	background-color: #faf8f5 !important;
	color: #8b0000 !important;
	border: 1.5px solid #8b0000 !important;
	border-radius: 12px !important;
	padding: 0.9rem 2rem !important;
	font-size: 1.05rem !important;
	font-weight: 600 !important;
	cursor: pointer !important;
	transition: all 0.25s ease !important;
	}
	button.secondary:hover, .dark button.secondary:hover {
	background-color: #fef2f2 !important;
	box-shadow: 0 2px 8px rgba(139, 0, 0, 0.1) !important;
	}

	/* Markdown / prose output */
	.prose, .dark .prose,
	.prose , .dark .prose ,
	.markdown-text, .dark .markdown-text {
	color: #1e293b !important;
	}
	.prose strong, .dark .prose strong {
	color: #0f172a !important;
	}
	.prose h1, .prose h2, .prose h3,
	.dark .prose h1, .dark .prose h2, .dark .prose h3 {
	color: #0f172a !important;
	font-weight: 700 !important;
	}
	.prose em, .dark .prose em {
	color: #1e293b !important;
	}
	.prose li, .dark .prose li {
	color: #1e293b !important;
	}

	/* Copy button — styled nicely */
	button[title="copy"], .dark button[title="copy"],
	.copy-btn, .dark .copy-btn {
	background-color: #f1ece4 !important;
	color: #8b0000 !important;
	border: 1px solid #d4cdc3 !important;
	border-radius: 8px !important;
	padding: 6px 10px !important;
	transition: all 0.2s ease !important;
	}
	button[title="copy"]:hover, .dark button[title="copy"]:hover,
	.copy-btn:hover, .dark .copy-btn:hover {
	background-color: #8b0000 !important;
	color: #ffffff !important;
	border-color: #8b0000 !important;
	}

	/* Header */
	.viorra-hero {
	text-align: center;
	padding: 3rem 1rem 2rem;
	margin-bottom: 0.5rem;
	}
	.viorra-hero h1 {
	font-size: 3.8rem;
	font-weight: 800;
	color: #8b0000;
	letter-spacing: 6px;
	text-transform: uppercase;
	margin: 0 0 0.6rem 0;
	line-height: 1;
	}
	.viorra-hero .tagline {
	font-size: 1.05rem;
	color: #64748b;
	font-weight: 400;
	letter-spacing: 0.5px;
	}
	.viorra-hero .divider {
	width: 60px;
	height: 3px;
	background: linear-gradient(90deg, #8b0000, #c0392b);
	margin: 1.2rem auto 0;
	border-radius: 2px;
	}

	/* Footer */
	.viorra-footer {
	text-align: center;
	margin-top: 3rem;
	padding: 2rem 1rem;
	border-top: 1px solid #e2ddd5;
	}
	.viorra-footer .team-label {
	font-size: 0.95rem;
	color: #475569;
	margin-bottom: 0.75rem;
	font-weight: 500;
	}
	.viorra-footer .team-label strong {
	color: #1e293b;
	}
	.viorra-footer .members {
	font-size: 0.85rem;
	color: #64748b;
	line-height: 1.8;
	}
	.viorra-footer .members strong {
	color: #334155;
	font-weight: 600;
	}
	.viorra-footer .members .role {
	color: #94a3b8;
	font-size: 0.8rem;
	}

	/* Progress bar */
	.progress-bar, .dark .progress-bar {
	background-color: #8b0000 !important;
	}
	.progress-text, .dark .progress-text {
	color: #1e293b !important;
	}
	.eta-bar {
	background: linear-gradient(135deg, #8b0000, #c0392b) !important;
	}

	/* Warning Box */
	.viorra-warning {
	background-color: #fef2f2;
	border: 1px solid #f87171;
	color: #991b1b;
	padding: 1rem;
	border-radius: 8px;
	text-align: center;
	margin: 0 auto 2rem auto;
	max-width: 800px;
	font-size: 0.95rem;
	line-height: 1.5;
	}
	.viorra-warning strong {
	color: #7f1d1d;
	}

	/* Remove Gradio default borders and shadows on containers */
	.gradio-container .contain, .dark .gradio-container .contain {
	box-shadow: none !important;
	}
	"""

	with gr.Blocks(theme=ivy_theme, css=css) as demo:
	gr.HTML('''
	<div class="viorra-hero">
	<h1>VIORRA</h1>
	<p class="tagline">Smart Personal Statement Checker • Fast, Accurate, and Reliable</p>
	<div class="divider"></div>
	</div>

	<div class="viorra-warning">
	<strong>⚠️ LIMITED DEMO VERSION</strong><br>
	This is a lightweight cloud preview. For the complete VIORRA experience—including 100% offline privacy, interactive chat follow-ups, and session history—<a href="https://github.com/qsardor/VIORRA" target="_blank" style="color: #8b0000; text-decoration: underline; font-weight: 600;">install the full application from GitHub</a>.
	</div>
	''')

	with gr.Column():
	with gr.Column(visible=True) as input_container:
	essay_input = gr.Textbox(
	lines=12,
	placeholder="Paste the student's Personal Statement here.\n\nVIORRA will instantly analyze clarity, score the document, and provide expert-level diagnostics...",
	label="Personal Statement",
	show_label=False,
	container=False
	)
	submit_btn = gr.Button("Analyze Personal Statement", variant="primary", size="lg")

	with gr.Column(visible=False) as output_container:
	report_output = gr.Markdown(
	label="Evaluation Report",
	value="Analyzing... Please wait.",
	show_copy_button=True
	)
	reset_btn = gr.Button("Analyze Another Statement", variant="secondary", size="lg", visible=False)

	def show_loading():
	return (
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=True),
	gr.update(value="Analyzing your statement... This may take up to 40 seconds."),
	gr.update(visible=False)
	)

	def reset_ui():
	return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(value="")

	submit_btn.click(
	fn=show_loading,
	inputs=[],
	outputs=[essay_input, submit_btn, output_container, report_output, reset_btn],
	queue=False
	).then(
	fn=analyze_essay,
	inputs=essay_input,
	outputs=[report_output, reset_btn]
	)

	reset_btn.click(
	fn=reset_ui,
	inputs=[],
	outputs=[essay_input, submit_btn, output_container, essay_input],
	queue=False
	)

	gr.HTML("""
	<div class="viorra-footer">
	<p class="team-label">Created with ❤️ by <strong>Team Violets</strong></p>
	<p class="members">
	<strong>Azizakhan Rustamova</strong> <span class="role">Founder & Marketing</span> •
	<strong>Sardor Qurbonov</strong> <span class="role">Main Developer of Software</span><br>
	<strong>Ruhshona Farhodova</strong> <span class="role">Business Developer</span> •
	<strong>Damirbek Xolnazarov</strong> <span class="role">Full Stack Developer</span>
	</p>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()