Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +233 -159
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import json
|
|
|
|
| 4 |
import shutil
|
| 5 |
import time
|
|
|
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
from google import genai
|
|
@@ -17,9 +20,43 @@ import io
|
|
| 17 |
# On HF Spaces, set this in "Settings" -> "Secrets"
|
| 18 |
load_dotenv()
|
| 19 |
API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
|
|
| 20 |
ACCESS_PASSWORD = os.getenv("APP_PASSWORD")
|
| 21 |
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# -----------------------------------------------------------------------------
|
| 25 |
# LOGIC: CONVERSION (PDF -> IMAGES)
|
|
@@ -35,11 +72,11 @@ def convert_to_images(file_path):
|
|
| 35 |
|
| 36 |
if ext == ".pdf":
|
| 37 |
print("Converting PDF to images...")
|
| 38 |
-
images = convert_from_path(file_path, dpi=
|
| 39 |
image_paths = []
|
| 40 |
for i, img in enumerate(images):
|
| 41 |
-
path = output_dir / f"slide-{i+1:02d}.
|
| 42 |
-
img.save(path, "
|
| 43 |
image_paths.append(path)
|
| 44 |
return image_paths
|
| 45 |
else:
|
|
@@ -54,25 +91,37 @@ def scan_slides(client, image_paths):
|
|
| 54 |
inventory = []
|
| 55 |
total = len(image_paths)
|
| 56 |
|
| 57 |
-
|
|
|
|
| 58 |
for i, img_path in enumerate(image_paths):
|
| 59 |
slide_num = i + 1
|
| 60 |
yield f"Reading Slide {slide_num}/{total}...", None
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
print(f"Scanning Slide {slide_num}...")
|
| 63 |
|
| 64 |
# Rate Limiting: Sleep to respect API limits (avoid 429 errors)
|
| 65 |
-
|
| 66 |
-
file_size_mb = os.path.getsize(img_path) / (1024 * 1024)
|
| 67 |
|
| 68 |
if file_size_mb > 1.0:
|
| 69 |
print(f" Large file ({file_size_mb:.1f}MB). Pausing 10s to refill quota...")
|
| 70 |
-
time.sleep(10)
|
| 71 |
else:
|
| 72 |
-
time.sleep(2)
|
| 73 |
-
|
| 74 |
-
with open(img_path, "rb") as f:
|
| 75 |
-
img_bytes = f.read()
|
| 76 |
|
| 77 |
prompt = f"""
|
| 78 |
Analyze this slide (Slide {slide_num}).
|
|
@@ -98,9 +147,9 @@ def scan_slides(client, image_paths):
|
|
| 98 |
for attempt in range(max_retries):
|
| 99 |
try:
|
| 100 |
response = client.models.generate_content(
|
| 101 |
-
model=
|
| 102 |
contents=[
|
| 103 |
-
types.Part.from_bytes(data=img_bytes, mime_type="image/
|
| 104 |
prompt
|
| 105 |
],
|
| 106 |
config=types.GenerateContentConfig(
|
|
@@ -110,7 +159,6 @@ def scan_slides(client, image_paths):
|
|
| 110 |
)
|
| 111 |
data = json.loads(response.text)
|
| 112 |
|
| 113 |
-
# Robustness: Handle case where model returns a list [ { ... } ] instead of { ... }
|
| 114 |
if isinstance(data, list):
|
| 115 |
if len(data) > 0 and isinstance(data[0], dict):
|
| 116 |
data = data[0]
|
|
@@ -120,22 +168,24 @@ def scan_slides(client, image_paths):
|
|
| 120 |
|
| 121 |
if isinstance(data, dict):
|
| 122 |
inventory.append(data)
|
|
|
|
| 123 |
else:
|
| 124 |
print(f"Warning: Slide {slide_num} did not return a valid JSON dictionary. Data: {data}")
|
| 125 |
|
| 126 |
-
# If successful, break retry loop
|
| 127 |
break
|
| 128 |
|
| 129 |
except Exception as e:
|
| 130 |
error_str = str(e)
|
| 131 |
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
|
| 132 |
-
wait_time = (attempt + 1) * 5
|
| 133 |
print(f" β οΈ Rate Limit (429) on Slide {slide_num}. Retrying in {wait_time}s...")
|
| 134 |
yield f"β οΈ High Traffic. Retrying Slide {slide_num} in {wait_time}s...", None
|
| 135 |
time.sleep(wait_time)
|
| 136 |
else:
|
| 137 |
print(f"Error scanning slide {slide_num}: {e}")
|
| 138 |
-
break
|
|
|
|
|
|
|
| 139 |
|
| 140 |
yield "Scan Complete", inventory
|
| 141 |
|
|
@@ -154,200 +204,217 @@ def debug_inventory(inventory):
|
|
| 154 |
# -----------------------------------------------------------------------------
|
| 155 |
# LOGIC: PASS 2 (COACH CRITIQUE)
|
| 156 |
# -----------------------------------------------------------------------------
|
| 157 |
-
def
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
for s in active:
|
| 172 |
-
visuals = s.get("visual_elements", {})
|
| 173 |
-
if not isinstance(visuals, dict): visuals = {} # Safety
|
| 174 |
-
|
| 175 |
-
busy = "BUSY" if visuals.get("is_busy") else "OK"
|
| 176 |
-
title = s.get('title', 'No Title')
|
| 177 |
-
num = s.get('slide_number', '?')
|
| 178 |
-
takeaway = s.get('key_takeaway', '')
|
| 179 |
-
desc = s.get('visual_description', '')
|
| 180 |
-
|
| 181 |
-
entry = f"Slide {num}: {title}\n- Content: {takeaway}\n- Visuals: {desc} [{busy}]"
|
| 182 |
-
script.append(entry)
|
| 183 |
-
|
| 184 |
-
full_text = "\n".join(script)
|
| 185 |
-
|
| 186 |
-
# THE MENTOR PROMPT (Synced with run_pass_2_gemini.py)
|
| 187 |
-
prompt = f"""
|
| 188 |
-
You are Dr. Jones, an expert Data Science Mentor.
|
| 189 |
-
Your goal is to guide a student to professional excellence.
|
| 190 |
-
|
| 191 |
-
SLIDE INVENTORY:
|
| 192 |
-
{full_text}
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
| 196 |
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
2. Data Structure
|
| 200 |
-
3. Targets & Metrics
|
| 201 |
-
4. Candidate Models
|
| 202 |
-
5. HPO Strategy
|
| 203 |
-
6. Best Model Selection
|
| 204 |
-
7. Validation
|
| 205 |
-
8. Business Impact
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
5. **Summary**: Write a robust 2-paragraph summary (approx 150 words) explaining the overall impression and main areas for improvement.
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
}}
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
)
|
| 234 |
)
|
| 235 |
-
print("DEBUG: Received Pass 2 Response from Gemini.")
|
| 236 |
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
# Robustness: Handle list return from Coach
|
| 240 |
if isinstance(critique, list):
|
| 241 |
if len(critique) > 0 and isinstance(critique[0], dict):
|
| 242 |
critique = critique[0]
|
| 243 |
else:
|
| 244 |
-
# If it's a list but not a list of dicts, or empty, fail gracefully
|
| 245 |
raise ValueError(f"Coach returned a list, expected a dictionary. Output: {critique}")
|
| 246 |
|
| 247 |
return critique
|
| 248 |
|
| 249 |
except Exception as e:
|
| 250 |
-
print(f"CRITICAL ERROR in Pass 2: {e}")
|
| 251 |
-
# Return a fallback critique so the UI doesn't hang
|
| 252 |
return {
|
| 253 |
"overall_summary": f"Error generating critique: {e}",
|
| 254 |
"structure_roadmap": [],
|
| 255 |
-
"slide_tips": []
|
| 256 |
}
|
| 257 |
|
| 258 |
# -----------------------------------------------------------------------------
|
| 259 |
# GRADIO INTERFACE
|
| 260 |
# -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
def process_presentation(file_obj, password, temperature):
|
| 262 |
print("--- NEW JOB STARTED ---")
|
| 263 |
if file_obj is None:
|
| 264 |
-
return "β Error: No file uploaded",
|
| 265 |
|
| 266 |
if password != ACCESS_PASSWORD:
|
| 267 |
-
yield
|
| 268 |
-
return "β Incorrect Password",
|
| 269 |
|
| 270 |
if not API_KEY:
|
| 271 |
-
return "β Server Error: API Key missing",
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
|
|
|
|
| 274 |
|
| 275 |
try:
|
| 276 |
# 1. Convert
|
| 277 |
print("Step 1: Converting PDF...")
|
| 278 |
-
yield "Converting PDF...",
|
| 279 |
images = convert_to_images(file_obj.name)
|
| 280 |
print(f" Converted {len(images)} slides.")
|
| 281 |
|
| 282 |
-
# 2. Scan
|
| 283 |
-
yield f"Scanning {len(images)} slides...
|
| 284 |
-
"", "", None, None, "Starting Scan..."
|
| 285 |
print("Step 2: Scanning Slides (Pass 1)...")
|
| 286 |
|
| 287 |
-
|
| 288 |
-
scanner = scan_slides(client, images)
|
| 289 |
inventory = []
|
| 290 |
|
| 291 |
for msg, result in scanner:
|
| 292 |
if result is None:
|
| 293 |
-
|
| 294 |
-
yield msg, "", "", None, None, msg
|
| 295 |
else:
|
| 296 |
-
# Scan complete, result is the inventory
|
| 297 |
inventory = result
|
| 298 |
|
| 299 |
print(" Scan Complete.")
|
| 300 |
|
| 301 |
-
# Save Inventory
|
| 302 |
original_stem = Path(file_obj.name).stem
|
| 303 |
-
|
| 304 |
-
# Save to 'slides_images/[Stem]' to match project structure
|
| 305 |
target_dir = Path("slides_images") / original_stem
|
| 306 |
target_dir.mkdir(parents=True, exist_ok=True)
|
| 307 |
|
| 308 |
inventory_filename = target_dir / f"{original_stem}_Inventory.json"
|
| 309 |
-
|
| 310 |
with open(inventory_filename, "w") as f:
|
| 311 |
json.dump(inventory, f, indent=4)
|
| 312 |
print(f" Saved Inventory to {inventory_filename}")
|
| 313 |
|
| 314 |
-
# 3. Coach
|
| 315 |
-
debug_inventory(inventory)
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
-
yield "
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
# 4. Format Output
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
for item in critique.get("structure_roadmap", []): # Updated key to match schema
|
| 329 |
-
icon = item.get('status_icon', 'β')
|
| 330 |
-
step = item.get('step_name', 'Step')
|
| 331 |
-
note = item.get('coach_notes', '')
|
| 332 |
-
table_md += f"| **{step}** | <span style='font-size: 1.5em'>{icon}</span> | {note} |\n"
|
| 333 |
|
| 334 |
-
# Create Report File
|
| 335 |
-
# original_stem is already defined above
|
| 336 |
report_filename = f"{original_stem}_Review.md"
|
| 337 |
-
|
| 338 |
with open(report_filename, "w") as f:
|
| 339 |
f.write(f"# Dr. Jones Feedback for {original_stem}\n\n")
|
| 340 |
-
f.write("##
|
| 341 |
-
f.write(
|
| 342 |
-
f.write("##
|
| 343 |
-
f.write(
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
except Exception as e:
|
| 349 |
print(f"CRITICAL ERROR: {e}")
|
| 350 |
-
yield f"β Error: {str(e)}",
|
| 351 |
|
| 352 |
# Define a custom maroon color palette
|
| 353 |
maroon = gr.themes.Color(
|
|
@@ -367,13 +434,13 @@ maroon = gr.themes.Color(
|
|
| 367 |
with gr.Blocks(title="Dr. Jones AI Coach",
|
| 368 |
theme=gr.themes.Default(primary_hue=maroon, text_size="lg")) as demo:
|
| 369 |
gr.Markdown("# π Capstone Slide Review")
|
| 370 |
-
gr.Markdown("Upload your slides (PDF) for feedback
|
| 371 |
|
| 372 |
with gr.Row():
|
| 373 |
with gr.Column(scale=3):
|
| 374 |
file_input = gr.File(label="Upload PDF Slides",
|
| 375 |
-
file_types=[".pdf"
|
| 376 |
-
height=150)
|
| 377 |
with gr.Column(scale=1):
|
| 378 |
pass_input = gr.Textbox(label="Password", type="password")
|
| 379 |
temp_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Coach Temperature")
|
|
@@ -382,28 +449,35 @@ with gr.Blocks(title="Dr. Jones AI Coach",
|
|
| 382 |
|
| 383 |
status = gr.Markdown("**Status**: Ready")
|
| 384 |
|
| 385 |
-
# Results Area
|
| 386 |
with gr.Row():
|
| 387 |
with gr.Column(scale=1):
|
| 388 |
preview_img = gr.Image(label="Title Slide", interactive=False)
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
with gr.Column(scale=1):
|
| 392 |
-
progress_status = gr.Markdown(value="") # Temporary status next to download
|
| 393 |
-
|
| 394 |
-
with gr.Column(scale=2):
|
| 395 |
-
gr.Markdown("### π¨βπ« Coach Summary")
|
| 396 |
-
summary_display = gr.Textbox(label="", show_label=False, lines=7, interactive=False)
|
| 397 |
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
btn.click(
|
| 403 |
fn=process_presentation,
|
| 404 |
inputs=[file_input, pass_input, temp_input],
|
| 405 |
-
outputs=[status,
|
| 406 |
-
|
|
|
|
| 407 |
)
|
| 408 |
|
| 409 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
+
import hashlib
|
| 5 |
import shutil
|
| 6 |
import time
|
| 7 |
+
import re
|
| 8 |
+
import anthropic
|
| 9 |
from pathlib import Path
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
from google import genai
|
|
|
|
| 20 |
# On HF Spaces, set this in "Settings" -> "Secrets"
|
| 21 |
load_dotenv()
|
| 22 |
API_KEY = os.getenv("GOOGLE_API_KEY")
|
| 23 |
+
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY")
|
| 24 |
ACCESS_PASSWORD = os.getenv("APP_PASSWORD")
|
| 25 |
|
| 26 |
+
SCANNER_MODEL = "gemini-2.0-flash"
|
| 27 |
+
COACH_MODEL = "claude-sonnet-4-6"
|
| 28 |
+
CACHE_DIR = Path("cache/slides")
|
| 29 |
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
COACH_PERSONAS = {
|
| 32 |
+
"business": {
|
| 33 |
+
"name": "Business Strategy Coach",
|
| 34 |
+
"icon": "πΌ",
|
| 35 |
+
"role": "You are a Senior Business Strategist and executive communication expert.",
|
| 36 |
+
"focus": (
|
| 37 |
+
"Evaluate through a BUSINESS LENS:\n"
|
| 38 |
+
"- Is the business problem clearly articulated? Would a VP understand it?\n"
|
| 39 |
+
"- Does the executive summary lead with the answer, not the methodology?\n"
|
| 40 |
+
"- Is the value proposition compelling with specific ROI numbers?\n"
|
| 41 |
+
"- Is the business impact quantified and positioned persuasively?\n"
|
| 42 |
+
"- Would this presentation convince decision-makers to act?"
|
| 43 |
+
)
|
| 44 |
+
},
|
| 45 |
+
"analytics": {
|
| 46 |
+
"name": "Analytics & Methodology Coach",
|
| 47 |
+
"icon": "π",
|
| 48 |
+
"role": "You are a Senior Data Scientist and ML methodology expert.",
|
| 49 |
+
"focus": (
|
| 50 |
+
"Evaluate through a TECHNICAL/ANALYTICAL LENS:\n"
|
| 51 |
+
"- Is the data structure and preparation approach well-documented?\n"
|
| 52 |
+
"- Are the target variables and evaluation metrics appropriate and justified?\n"
|
| 53 |
+
"- Is model selection rigorous? Were enough candidates explored?\n"
|
| 54 |
+
"- Is the HPO strategy systematic and well-explained?\n"
|
| 55 |
+
"- Is validation thorough (holdout tests, cross-validation, confidence intervals)?\n"
|
| 56 |
+
"- Are results reproducible from what is shown?"
|
| 57 |
+
)
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
|
| 61 |
# -----------------------------------------------------------------------------
|
| 62 |
# LOGIC: CONVERSION (PDF -> IMAGES)
|
|
|
|
| 72 |
|
| 73 |
if ext == ".pdf":
|
| 74 |
print("Converting PDF to images...")
|
| 75 |
+
images = convert_from_path(file_path, dpi=150)
|
| 76 |
image_paths = []
|
| 77 |
for i, img in enumerate(images):
|
| 78 |
+
path = output_dir / f"slide-{i+1:02d}.jpg"
|
| 79 |
+
img.save(path, "JPEG", quality=85, optimize=True)
|
| 80 |
image_paths.append(path)
|
| 81 |
return image_paths
|
| 82 |
else:
|
|
|
|
| 91 |
inventory = []
|
| 92 |
total = len(image_paths)
|
| 93 |
|
| 94 |
+
cache_hits = 0
|
| 95 |
+
|
| 96 |
for i, img_path in enumerate(image_paths):
|
| 97 |
slide_num = i + 1
|
| 98 |
yield f"Reading Slide {slide_num}/{total}...", None
|
| 99 |
|
| 100 |
+
with open(img_path, "rb") as f:
|
| 101 |
+
img_bytes = f.read()
|
| 102 |
+
|
| 103 |
+
# Check slide cache by image hash
|
| 104 |
+
img_hash = hashlib.sha256(img_bytes).hexdigest()
|
| 105 |
+
cache_path = CACHE_DIR / f"{img_hash}.json"
|
| 106 |
+
|
| 107 |
+
if cache_path.exists():
|
| 108 |
+
data = json.loads(cache_path.read_text())
|
| 109 |
+
data["slide_number"] = slide_num
|
| 110 |
+
inventory.append(data)
|
| 111 |
+
cache_hits += 1
|
| 112 |
+
print(f" Slide {slide_num}: CACHE HIT")
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
print(f"Scanning Slide {slide_num}...")
|
| 116 |
|
| 117 |
# Rate Limiting: Sleep to respect API limits (avoid 429 errors)
|
| 118 |
+
file_size_mb = len(img_bytes) / (1024 * 1024)
|
|
|
|
| 119 |
|
| 120 |
if file_size_mb > 1.0:
|
| 121 |
print(f" Large file ({file_size_mb:.1f}MB). Pausing 10s to refill quota...")
|
| 122 |
+
time.sleep(10)
|
| 123 |
else:
|
| 124 |
+
time.sleep(2)
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
prompt = f"""
|
| 127 |
Analyze this slide (Slide {slide_num}).
|
|
|
|
| 147 |
for attempt in range(max_retries):
|
| 148 |
try:
|
| 149 |
response = client.models.generate_content(
|
| 150 |
+
model=SCANNER_MODEL,
|
| 151 |
contents=[
|
| 152 |
+
types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"),
|
| 153 |
prompt
|
| 154 |
],
|
| 155 |
config=types.GenerateContentConfig(
|
|
|
|
| 159 |
)
|
| 160 |
data = json.loads(response.text)
|
| 161 |
|
|
|
|
| 162 |
if isinstance(data, list):
|
| 163 |
if len(data) > 0 and isinstance(data[0], dict):
|
| 164 |
data = data[0]
|
|
|
|
| 168 |
|
| 169 |
if isinstance(data, dict):
|
| 170 |
inventory.append(data)
|
| 171 |
+
cache_path.write_text(json.dumps(data, indent=2))
|
| 172 |
else:
|
| 173 |
print(f"Warning: Slide {slide_num} did not return a valid JSON dictionary. Data: {data}")
|
| 174 |
|
|
|
|
| 175 |
break
|
| 176 |
|
| 177 |
except Exception as e:
|
| 178 |
error_str = str(e)
|
| 179 |
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
|
| 180 |
+
wait_time = (attempt + 1) * 5
|
| 181 |
print(f" β οΈ Rate Limit (429) on Slide {slide_num}. Retrying in {wait_time}s...")
|
| 182 |
yield f"β οΈ High Traffic. Retrying Slide {slide_num} in {wait_time}s...", None
|
| 183 |
time.sleep(wait_time)
|
| 184 |
else:
|
| 185 |
print(f"Error scanning slide {slide_num}: {e}")
|
| 186 |
+
break
|
| 187 |
+
|
| 188 |
+
print(f" Cache: {cache_hits}/{total} slides cached, {total - cache_hits} scanned via API")
|
| 189 |
|
| 190 |
yield "Scan Complete", inventory
|
| 191 |
|
|
|
|
| 204 |
# -----------------------------------------------------------------------------
|
| 205 |
# LOGIC: PASS 2 (COACH CRITIQUE)
|
| 206 |
# -----------------------------------------------------------------------------
|
| 207 |
+
def build_inventory_script(inventory):
|
| 208 |
+
"""Shared logic: filter appendices and build the text script from inventory."""
|
| 209 |
+
def get_title(slide):
|
| 210 |
+
if not isinstance(slide, dict): return ""
|
| 211 |
+
t = slide.get("title")
|
| 212 |
+
return t if t else ""
|
| 213 |
+
|
| 214 |
+
active = [s for s in inventory if isinstance(s, dict) and "appendix" not in get_title(s).lower()]
|
| 215 |
+
print(f"DEBUG: Pass 2 using {len(active)} active slides (excluding appendices).")
|
| 216 |
+
|
| 217 |
+
script = []
|
| 218 |
+
for s in active:
|
| 219 |
+
visuals = s.get("visual_elements", {})
|
| 220 |
+
if not isinstance(visuals, dict): visuals = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
+
busy = "BUSY" if visuals.get("is_busy") else "OK"
|
| 223 |
+
title = s.get('title', 'No Title')
|
| 224 |
+
num = s.get('slide_number', '?')
|
| 225 |
+
takeaway = s.get('key_takeaway', '')
|
| 226 |
+
desc = s.get('visual_description', '')
|
| 227 |
|
| 228 |
+
entry = f"Slide {num}: {title}\n- Content: {takeaway}\n- Visuals: {desc} [{busy}]"
|
| 229 |
+
script.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
+
return "\n".join(script)
|
| 232 |
+
|
| 233 |
+
def generate_critique(coach_client, inventory, persona, temperature=0.2):
|
| 234 |
+
try:
|
| 235 |
+
full_text = build_inventory_script(inventory)
|
|
|
|
| 236 |
|
| 237 |
+
prompt = f"""{persona['role']}
|
| 238 |
+
Your goal is to guide a Data Science student to professional excellence.
|
| 239 |
+
|
| 240 |
+
{persona['focus']}
|
| 241 |
+
|
| 242 |
+
SLIDE INVENTORY:
|
| 243 |
+
{full_text}
|
| 244 |
+
|
| 245 |
+
TASK:
|
| 246 |
+
Coach this student based on the 8-Step Story Arc.
|
| 247 |
+
|
| 248 |
+
REQUIRED STORY ARC:
|
| 249 |
+
1. Executive Summary
|
| 250 |
+
2. Data Structure
|
| 251 |
+
3. Targets & Metrics
|
| 252 |
+
4. Candidate Models
|
| 253 |
+
5. HPO Strategy
|
| 254 |
+
6. Best Model Selection
|
| 255 |
+
7. Validation
|
| 256 |
+
8. Business Impact
|
| 257 |
+
|
| 258 |
+
INSTRUCTIONS:
|
| 259 |
+
1. **Fill the Roadmap**: For each of the 8 steps above, determine status (β
, β οΈ, β, β).
|
| 260 |
+
2. **Check for Specifics**: If the student provides specific numbers (e.g. "$5,065 savings", "98% accuracy"), YOU MUST QUOTE THEM in the notes. Do not give generic advice if the specific data is present.
|
| 261 |
+
3. **Slide Refs**: Cite specific slide numbers in the notes.
|
| 262 |
+
4. **Tone**: Encouraging but precise.
|
| 263 |
+
5. **Summary**: Write a robust 2-paragraph summary (approx 150 words) from your perspective as {persona['name']}.
|
| 264 |
+
|
| 265 |
+
OUTPUT STRICT JSON (no markdown fences, no extra text):
|
| 266 |
+
{{
|
| 267 |
+
"overall_summary": "Encouraging feedback (2 paragraphs).",
|
| 268 |
+
"structure_roadmap": [
|
| 269 |
+
{{
|
| 270 |
+
"step_name": "String (e.g. '1. Exec Summary')",
|
| 271 |
+
"status_icon": "String (β
, β οΈ, β, β)",
|
| 272 |
+
"coach_notes": "String"
|
| 273 |
}}
|
| 274 |
+
]
|
| 275 |
+
}}"""
|
| 276 |
+
|
| 277 |
+
response = coach_client.messages.create(
|
| 278 |
+
model=COACH_MODEL,
|
| 279 |
+
max_tokens=4096,
|
| 280 |
+
temperature=temperature,
|
| 281 |
+
messages=[{"role": "user", "content": prompt}]
|
|
|
|
| 282 |
)
|
|
|
|
| 283 |
|
| 284 |
+
raw_text = response.content[0].text
|
| 285 |
+
print(f"DEBUG: {persona['name']} response received from {COACH_MODEL}.")
|
| 286 |
+
|
| 287 |
+
cleaned = raw_text.strip()
|
| 288 |
+
fence_match = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
|
| 289 |
+
if fence_match:
|
| 290 |
+
cleaned = fence_match.group(1).strip()
|
| 291 |
+
|
| 292 |
+
critique = json.loads(cleaned)
|
| 293 |
|
|
|
|
| 294 |
if isinstance(critique, list):
|
| 295 |
if len(critique) > 0 and isinstance(critique[0], dict):
|
| 296 |
critique = critique[0]
|
| 297 |
else:
|
|
|
|
| 298 |
raise ValueError(f"Coach returned a list, expected a dictionary. Output: {critique}")
|
| 299 |
|
| 300 |
return critique
|
| 301 |
|
| 302 |
except Exception as e:
|
| 303 |
+
print(f"CRITICAL ERROR in Pass 2 ({persona['name']}): {e}")
|
|
|
|
| 304 |
return {
|
| 305 |
"overall_summary": f"Error generating critique: {e}",
|
| 306 |
"structure_roadmap": [],
|
|
|
|
| 307 |
}
|
| 308 |
|
| 309 |
# -----------------------------------------------------------------------------
|
| 310 |
# GRADIO INTERFACE
|
| 311 |
# -----------------------------------------------------------------------------
|
| 312 |
+
def format_roadmap_table(critique):
|
| 313 |
+
"""Build a markdown table from a critique's structure_roadmap."""
|
| 314 |
+
table_md = (
|
| 315 |
+
"| <span style='display:inline-block; min-width:180px'>STEP</span> "
|
| 316 |
+
"| <span style='display:inline-block; min-width:60px'>FLAG</span> "
|
| 317 |
+
"| COACH NOTES |\n|---|:---:|---|\n"
|
| 318 |
+
)
|
| 319 |
+
for item in critique.get("structure_roadmap", []):
|
| 320 |
+
icon = item.get('status_icon', 'β')
|
| 321 |
+
step = item.get('step_name', 'Step')
|
| 322 |
+
note = item.get('coach_notes', '')
|
| 323 |
+
table_md += f"| **{step}** | <span style='font-size: 1.5em'>{icon}</span> | {note} |\n"
|
| 324 |
+
return table_md
|
| 325 |
+
|
| 326 |
+
EMPTY_OUTPUTS = ("", "", "", "", None, None, "")
|
| 327 |
+
|
| 328 |
def process_presentation(file_obj, password, temperature):
|
| 329 |
print("--- NEW JOB STARTED ---")
|
| 330 |
if file_obj is None:
|
| 331 |
+
return ("β Error: No file uploaded",) + EMPTY_OUTPUTS
|
| 332 |
|
| 333 |
if password != ACCESS_PASSWORD:
|
| 334 |
+
yield ("β Incorrect Password",) + EMPTY_OUTPUTS
|
| 335 |
+
return ("β Incorrect Password",) + EMPTY_OUTPUTS
|
| 336 |
|
| 337 |
if not API_KEY:
|
| 338 |
+
return ("β Server Error: Google API Key missing",) + EMPTY_OUTPUTS
|
| 339 |
+
if not CLAUDE_API_KEY:
|
| 340 |
+
return ("β Server Error: Claude API Key missing",) + EMPTY_OUTPUTS
|
| 341 |
|
| 342 |
+
scanner_client = genai.Client(api_key=API_KEY)
|
| 343 |
+
coach_client = anthropic.Anthropic(api_key=CLAUDE_API_KEY)
|
| 344 |
|
| 345 |
try:
|
| 346 |
# 1. Convert
|
| 347 |
print("Step 1: Converting PDF...")
|
| 348 |
+
yield ("Converting PDF...",) + EMPTY_OUTPUTS
|
| 349 |
images = convert_to_images(file_obj.name)
|
| 350 |
print(f" Converted {len(images)} slides.")
|
| 351 |
|
| 352 |
+
# 2. Scan (Pass 1 - Gemini Flash)
|
| 353 |
+
yield (f"Scanning {len(images)} slides...",) + EMPTY_OUTPUTS
|
|
|
|
| 354 |
print("Step 2: Scanning Slides (Pass 1)...")
|
| 355 |
|
| 356 |
+
scanner = scan_slides(scanner_client, images)
|
|
|
|
| 357 |
inventory = []
|
| 358 |
|
| 359 |
for msg, result in scanner:
|
| 360 |
if result is None:
|
| 361 |
+
yield (msg,) + EMPTY_OUTPUTS
|
|
|
|
| 362 |
else:
|
|
|
|
| 363 |
inventory = result
|
| 364 |
|
| 365 |
print(" Scan Complete.")
|
| 366 |
|
| 367 |
+
# Save Inventory
|
| 368 |
original_stem = Path(file_obj.name).stem
|
|
|
|
|
|
|
| 369 |
target_dir = Path("slides_images") / original_stem
|
| 370 |
target_dir.mkdir(parents=True, exist_ok=True)
|
| 371 |
|
| 372 |
inventory_filename = target_dir / f"{original_stem}_Inventory.json"
|
|
|
|
| 373 |
with open(inventory_filename, "w") as f:
|
| 374 |
json.dump(inventory, f, indent=4)
|
| 375 |
print(f" Saved Inventory to {inventory_filename}")
|
| 376 |
|
| 377 |
+
# 3. Coach (Pass 2 - Sonnet 4.6, two personas)
|
| 378 |
+
debug_inventory(inventory)
|
| 379 |
+
|
| 380 |
+
biz_persona = COACH_PERSONAS["business"]
|
| 381 |
+
ana_persona = COACH_PERSONAS["analytics"]
|
| 382 |
|
| 383 |
+
yield (f"πΌ {biz_persona['name']} reviewing...",) + EMPTY_OUTPUTS
|
| 384 |
+
print(f"Step 3a: {biz_persona['name']} [Temp: {temperature}]...")
|
| 385 |
+
biz_critique = generate_critique(coach_client, inventory, biz_persona, temperature)
|
| 386 |
+
print(f" {biz_persona['name']} done.")
|
| 387 |
+
|
| 388 |
+
yield (f"π {ana_persona['name']} reviewing...",) + EMPTY_OUTPUTS
|
| 389 |
+
print(f"Step 3b: {ana_persona['name']} [Temp: {temperature}]...")
|
| 390 |
+
ana_critique = generate_critique(coach_client, inventory, ana_persona, temperature)
|
| 391 |
+
print(f" {ana_persona['name']} done.")
|
| 392 |
|
| 393 |
# 4. Format Output
|
| 394 |
+
biz_summary = biz_critique.get("overall_summary", "")
|
| 395 |
+
biz_table = format_roadmap_table(biz_critique)
|
| 396 |
+
ana_summary = ana_critique.get("overall_summary", "")
|
| 397 |
+
ana_table = format_roadmap_table(ana_critique)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
+
# Create Combined Report File
|
|
|
|
| 400 |
report_filename = f"{original_stem}_Review.md"
|
|
|
|
| 401 |
with open(report_filename, "w") as f:
|
| 402 |
f.write(f"# Dr. Jones Feedback for {original_stem}\n\n")
|
| 403 |
+
f.write(f"## {biz_persona['icon']} {biz_persona['name']}\n\n")
|
| 404 |
+
f.write(biz_summary + "\n\n")
|
| 405 |
+
f.write("### Business Roadmap\n")
|
| 406 |
+
f.write(biz_table + "\n\n")
|
| 407 |
+
f.write(f"## {ana_persona['icon']} {ana_persona['name']}\n\n")
|
| 408 |
+
f.write(ana_summary + "\n\n")
|
| 409 |
+
f.write("### Analytics Roadmap\n")
|
| 410 |
+
f.write(ana_table)
|
| 411 |
+
|
| 412 |
+
yield "β
Done!", biz_summary, biz_table, ana_summary, ana_table, \
|
| 413 |
+
images[0], report_filename, ""
|
| 414 |
|
| 415 |
except Exception as e:
|
| 416 |
print(f"CRITICAL ERROR: {e}")
|
| 417 |
+
yield (f"β Error: {str(e)}",) + EMPTY_OUTPUTS
|
| 418 |
|
| 419 |
# Define a custom maroon color palette
|
| 420 |
maroon = gr.themes.Color(
|
|
|
|
| 434 |
with gr.Blocks(title="Dr. Jones AI Coach",
|
| 435 |
theme=gr.themes.Default(primary_hue=maroon, text_size="lg")) as demo:
|
| 436 |
gr.Markdown("# π Capstone Slide Review")
|
| 437 |
+
gr.Markdown("Upload your slides (PDF) for feedback from your AI coaching committee.")
|
| 438 |
|
| 439 |
with gr.Row():
|
| 440 |
with gr.Column(scale=3):
|
| 441 |
file_input = gr.File(label="Upload PDF Slides",
|
| 442 |
+
file_types=[".pdf", "application/pdf"],
|
| 443 |
+
type="filepath", height=150)
|
| 444 |
with gr.Column(scale=1):
|
| 445 |
pass_input = gr.Textbox(label="Password", type="password")
|
| 446 |
temp_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Coach Temperature")
|
|
|
|
| 449 |
|
| 450 |
status = gr.Markdown("**Status**: Ready")
|
| 451 |
|
|
|
|
| 452 |
with gr.Row():
|
| 453 |
with gr.Column(scale=1):
|
| 454 |
preview_img = gr.Image(label="Title Slide", interactive=False)
|
| 455 |
+
download_btn = gr.File(label="Download Full Report")
|
| 456 |
+
progress_status = gr.Markdown(value="")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
|
| 458 |
+
with gr.Column(scale=2):
|
| 459 |
+
with gr.Tabs():
|
| 460 |
+
with gr.TabItem("πΌ Business Strategy Coach"):
|
| 461 |
+
biz_summary_display = gr.Textbox(label="Business Summary",
|
| 462 |
+
show_label=False, lines=6, interactive=False)
|
| 463 |
+
|
| 464 |
+
with gr.TabItem("π Analytics & Methodology Coach"):
|
| 465 |
+
ana_summary_display = gr.Textbox(label="Analytics Summary",
|
| 466 |
+
show_label=False, lines=6, interactive=False)
|
| 467 |
+
|
| 468 |
+
with gr.Tabs():
|
| 469 |
+
with gr.TabItem("πΌ Business Roadmap"):
|
| 470 |
+
biz_roadmap_display = gr.Markdown()
|
| 471 |
+
|
| 472 |
+
with gr.TabItem("π Analytics Roadmap"):
|
| 473 |
+
ana_roadmap_display = gr.Markdown()
|
| 474 |
|
| 475 |
btn.click(
|
| 476 |
fn=process_presentation,
|
| 477 |
inputs=[file_input, pass_input, temp_input],
|
| 478 |
+
outputs=[status, biz_summary_display, biz_roadmap_display,
|
| 479 |
+
ana_summary_display, ana_roadmap_display,
|
| 480 |
+
preview_img, download_btn, progress_status]
|
| 481 |
)
|
| 482 |
|
| 483 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
gradio
|
| 2 |
google-genai
|
|
|
|
| 3 |
python-dotenv
|
| 4 |
pdf2image
|
| 5 |
pillow
|
|
|
|
| 1 |
gradio
|
| 2 |
google-genai
|
| 3 |
+
anthropic
|
| 4 |
python-dotenv
|
| 5 |
pdf2image
|
| 6 |
pillow
|