anly656 commited on
Commit
dfdc7ba
Β·
verified Β·
1 Parent(s): de07280

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +233 -159
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import gradio as gr
2
  import os
3
  import json
 
4
  import shutil
5
  import time
 
 
6
  from pathlib import Path
7
  from dotenv import load_dotenv
8
  from google import genai
@@ -17,9 +20,43 @@ import io
17
  # On HF Spaces, set this in "Settings" -> "Secrets"
18
  load_dotenv()
19
  API_KEY = os.getenv("GOOGLE_API_KEY")
 
20
  ACCESS_PASSWORD = os.getenv("APP_PASSWORD")
21
 
22
- MODEL_NAME = "gemini-2.0-flash"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # -----------------------------------------------------------------------------
25
  # LOGIC: CONVERSION (PDF -> IMAGES)
@@ -35,11 +72,11 @@ def convert_to_images(file_path):
35
 
36
  if ext == ".pdf":
37
  print("Converting PDF to images...")
38
- images = convert_from_path(file_path, dpi=200) # 200 is fine for Gemini Vision
39
  image_paths = []
40
  for i, img in enumerate(images):
41
- path = output_dir / f"slide-{i+1:02d}.png"
42
- img.save(path, "PNG")
43
  image_paths.append(path)
44
  return image_paths
45
  else:
@@ -54,25 +91,37 @@ def scan_slides(client, image_paths):
54
  inventory = []
55
  total = len(image_paths)
56
 
57
- # Process 1-by-1 to preserve context and allow progress updates
 
58
  for i, img_path in enumerate(image_paths):
59
  slide_num = i + 1
60
  yield f"Reading Slide {slide_num}/{total}...", None
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  print(f"Scanning Slide {slide_num}...")
63
 
64
  # Rate Limiting: Sleep to respect API limits (avoid 429 errors)
65
- # Check file size to adjust sleep time
66
- file_size_mb = os.path.getsize(img_path) / (1024 * 1024)
67
 
68
  if file_size_mb > 1.0:
69
  print(f" Large file ({file_size_mb:.1f}MB). Pausing 10s to refill quota...")
70
- time.sleep(10) # Wait longer for big files
71
  else:
72
- time.sleep(2) # Standard pause for normal files
73
-
74
- with open(img_path, "rb") as f:
75
- img_bytes = f.read()
76
 
77
  prompt = f"""
78
  Analyze this slide (Slide {slide_num}).
@@ -98,9 +147,9 @@ def scan_slides(client, image_paths):
98
  for attempt in range(max_retries):
99
  try:
100
  response = client.models.generate_content(
101
- model=MODEL_NAME,
102
  contents=[
103
- types.Part.from_bytes(data=img_bytes, mime_type="image/png"),
104
  prompt
105
  ],
106
  config=types.GenerateContentConfig(
@@ -110,7 +159,6 @@ def scan_slides(client, image_paths):
110
  )
111
  data = json.loads(response.text)
112
 
113
- # Robustness: Handle case where model returns a list [ { ... } ] instead of { ... }
114
  if isinstance(data, list):
115
  if len(data) > 0 and isinstance(data[0], dict):
116
  data = data[0]
@@ -120,22 +168,24 @@ def scan_slides(client, image_paths):
120
 
121
  if isinstance(data, dict):
122
  inventory.append(data)
 
123
  else:
124
  print(f"Warning: Slide {slide_num} did not return a valid JSON dictionary. Data: {data}")
125
 
126
- # If successful, break retry loop
127
  break
128
 
129
  except Exception as e:
130
  error_str = str(e)
131
  if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
132
- wait_time = (attempt + 1) * 5 # 5, 10, 15, 20...
133
  print(f" ⚠️ Rate Limit (429) on Slide {slide_num}. Retrying in {wait_time}s...")
134
  yield f"⚠️ High Traffic. Retrying Slide {slide_num} in {wait_time}s...", None
135
  time.sleep(wait_time)
136
  else:
137
  print(f"Error scanning slide {slide_num}: {e}")
138
- break # Don't retry other errors
 
 
139
 
140
  yield "Scan Complete", inventory
141
 
@@ -154,200 +204,217 @@ def debug_inventory(inventory):
154
  # -----------------------------------------------------------------------------
155
  # LOGIC: PASS 2 (COACH CRITIQUE)
156
  # -----------------------------------------------------------------------------
157
- def generate_critique(client, inventory, temperature=0.2):
158
- try:
159
- # Filter Appendix (Robustly)
160
- def get_title(slide):
161
- if not isinstance(slide, dict): return ""
162
- t = slide.get("title")
163
- return t if t else ""
164
-
165
- # Ensure 's' is a dict before accessing
166
- active = [s for s in inventory if isinstance(s, dict) and "appendix" not in get_title(s).lower()]
167
- print(f"DEBUG: Pass 2 Prompt contains {len(active)} active slides (excluding appendices).")
168
-
169
- # Script
170
- script = []
171
- for s in active:
172
- visuals = s.get("visual_elements", {})
173
- if not isinstance(visuals, dict): visuals = {} # Safety
174
-
175
- busy = "BUSY" if visuals.get("is_busy") else "OK"
176
- title = s.get('title', 'No Title')
177
- num = s.get('slide_number', '?')
178
- takeaway = s.get('key_takeaway', '')
179
- desc = s.get('visual_description', '')
180
-
181
- entry = f"Slide {num}: {title}\n- Content: {takeaway}\n- Visuals: {desc} [{busy}]"
182
- script.append(entry)
183
-
184
- full_text = "\n".join(script)
185
-
186
- # THE MENTOR PROMPT (Synced with run_pass_2_gemini.py)
187
- prompt = f"""
188
- You are Dr. Jones, an expert Data Science Mentor.
189
- Your goal is to guide a student to professional excellence.
190
-
191
- SLIDE INVENTORY:
192
- {full_text}
193
 
194
- TASK:
195
- Coach this student based on the 8-Step Story Arc.
 
 
 
196
 
197
- REQUIRED STORY ARC:
198
- 1. Executive Summary
199
- 2. Data Structure
200
- 3. Targets & Metrics
201
- 4. Candidate Models
202
- 5. HPO Strategy
203
- 6. Best Model Selection
204
- 7. Validation
205
- 8. Business Impact
206
 
207
- INSTRUCTIONS:
208
- 1. **Fill the Roadmap**: For each of the 8 steps above, determine status (βœ…, ⚠️, ❓, β­•).
209
- 2. **Check for Specifics**: If the student provides specific numbers (e.g. "$5,065 savings", "98% accuracy"), YOU MUST QUOTE THEM in the notes. Do not give generic advice if the specific data is present.
210
- 3. **Slide Refs**: Cite specific slide numbers in the notes.
211
- 4. **Tone**: Encouraging but precise.
212
- 5. **Summary**: Write a robust 2-paragraph summary (approx 150 words) explaining the overall impression and main areas for improvement.
213
 
214
- OUTPUT JSON SCHEMA:
215
- {{
216
- "overall_summary": "Encouraging feedback (2 paragraphs).",
217
- "structure_roadmap": [
218
- {{
219
- "step_name": "String (e.g. '1. Exec Summary')",
220
- "status_icon": "String (βœ…, ⚠️, ❓, β­•)",
221
- "coach_notes": "String"
222
- }}
223
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  }}
225
- """
226
-
227
- response = client.models.generate_content(
228
- model=MODEL_NAME,
229
- contents=prompt,
230
- config=types.GenerateContentConfig(
231
- response_mime_type="application/json",
232
- temperature=temperature
233
- )
234
  )
235
- print("DEBUG: Received Pass 2 Response from Gemini.")
236
 
237
- critique = json.loads(response.text)
 
 
 
 
 
 
 
 
238
 
239
- # Robustness: Handle list return from Coach
240
  if isinstance(critique, list):
241
  if len(critique) > 0 and isinstance(critique[0], dict):
242
  critique = critique[0]
243
  else:
244
- # If it's a list but not a list of dicts, or empty, fail gracefully
245
  raise ValueError(f"Coach returned a list, expected a dictionary. Output: {critique}")
246
 
247
  return critique
248
 
249
  except Exception as e:
250
- print(f"CRITICAL ERROR in Pass 2: {e}")
251
- # Return a fallback critique so the UI doesn't hang
252
  return {
253
  "overall_summary": f"Error generating critique: {e}",
254
  "structure_roadmap": [],
255
- "slide_tips": []
256
  }
257
 
258
  # -----------------------------------------------------------------------------
259
  # GRADIO INTERFACE
260
  # -----------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  def process_presentation(file_obj, password, temperature):
262
  print("--- NEW JOB STARTED ---")
263
  if file_obj is None:
264
- return "❌ Error: No file uploaded", "", "", None, None, ""
265
 
266
  if password != ACCESS_PASSWORD:
267
- yield "❌ Incorrect Password", "", "", None, None, ""
268
- return "❌ Incorrect Password", "", "", None, None, ""
269
 
270
  if not API_KEY:
271
- return "❌ Server Error: API Key missing", "", "", None, None, ""
 
 
272
 
273
- client = genai.Client(api_key=API_KEY)
 
274
 
275
  try:
276
  # 1. Convert
277
  print("Step 1: Converting PDF...")
278
- yield "Converting PDF...", "", "", None, None, "Converting PDF..."
279
  images = convert_to_images(file_obj.name)
280
  print(f" Converted {len(images)} slides.")
281
 
282
- # 2. Scan
283
- yield f"Scanning {len(images)} slides... (Adaptive Speed)", \
284
- "", "", None, None, "Starting Scan..."
285
  print("Step 2: Scanning Slides (Pass 1)...")
286
 
287
- # Iterate through the generator from scan_slides
288
- scanner = scan_slides(client, images)
289
  inventory = []
290
 
291
  for msg, result in scanner:
292
  if result is None:
293
- # Still scanning, update status
294
- yield msg, "", "", None, None, msg
295
  else:
296
- # Scan complete, result is the inventory
297
  inventory = result
298
 
299
  print(" Scan Complete.")
300
 
301
- # Save Inventory JSON (Artifact 1)
302
  original_stem = Path(file_obj.name).stem
303
-
304
- # Save to 'slides_images/[Stem]' to match project structure
305
  target_dir = Path("slides_images") / original_stem
306
  target_dir.mkdir(parents=True, exist_ok=True)
307
 
308
  inventory_filename = target_dir / f"{original_stem}_Inventory.json"
309
-
310
  with open(inventory_filename, "w") as f:
311
  json.dump(inventory, f, indent=4)
312
  print(f" Saved Inventory to {inventory_filename}")
313
 
314
- # 3. Coach
315
- debug_inventory(inventory) # Print debug info to console
 
 
 
316
 
317
- yield "Reviewing Presentation (Pass 2)...", "", "", \
318
- None, None, "Analyzing Structure ..."
319
- print(f"Step 3: Calling Coach (Pass 2) [Temp: {temperature}]...")
320
- critique = generate_critique(client, inventory, temperature)
321
- print(" Critique Received.")
 
 
 
 
322
 
323
  # 4. Format Output
324
- summary = critique.get("overall_summary", "")
325
-
326
- # Build Table Markdown
327
- table_md = "| STEP| FLAG | COACH NOTES |\n|---|:---:|---|\n"
328
- for item in critique.get("structure_roadmap", []): # Updated key to match schema
329
- icon = item.get('status_icon', '❓')
330
- step = item.get('step_name', 'Step')
331
- note = item.get('coach_notes', '')
332
- table_md += f"| **{step}** | <span style='font-size: 1.5em'>{icon}</span> | {note} |\n"
333
 
334
- # Create Report File
335
- # original_stem is already defined above
336
  report_filename = f"{original_stem}_Review.md"
337
-
338
  with open(report_filename, "w") as f:
339
  f.write(f"# Dr. Jones Feedback for {original_stem}\n\n")
340
- f.write("## Coach Summary\n")
341
- f.write(summary + "\n\n")
342
- f.write("## Story Roadmap\n")
343
- f.write(table_md)
344
-
345
- # Return: Status, Summary, Table, Preview Image, Report File, Small Status
346
- yield "βœ… Done!", summary, table_md, images[0], report_filename, ""
 
 
 
 
347
 
348
  except Exception as e:
349
  print(f"CRITICAL ERROR: {e}")
350
- yield f"❌ Error: {str(e)}", "", "", None, None, "Error"
351
 
352
  # Define a custom maroon color palette
353
  maroon = gr.themes.Color(
@@ -367,13 +434,13 @@ maroon = gr.themes.Color(
367
  with gr.Blocks(title="Dr. Jones AI Coach",
368
  theme=gr.themes.Default(primary_hue=maroon, text_size="lg")) as demo:
369
  gr.Markdown("# πŸŽ“ Capstone Slide Review")
370
- gr.Markdown("Upload your slides (PDF) for feedback on your Capstone Story Arc.")
371
 
372
  with gr.Row():
373
  with gr.Column(scale=3):
374
  file_input = gr.File(label="Upload PDF Slides",
375
- file_types=[".pdf"], type="filepath",
376
- height=150)
377
  with gr.Column(scale=1):
378
  pass_input = gr.Textbox(label="Password", type="password")
379
  temp_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Coach Temperature")
@@ -382,28 +449,35 @@ with gr.Blocks(title="Dr. Jones AI Coach",
382
 
383
  status = gr.Markdown("**Status**: Ready")
384
 
385
- # Results Area
386
  with gr.Row():
387
  with gr.Column(scale=1):
388
  preview_img = gr.Image(label="Title Slide", interactive=False)
389
- with gr.Row():
390
- download_btn = gr.File(label="Click Filesize to Download Report", scale=5)
391
- with gr.Column(scale=1):
392
- progress_status = gr.Markdown(value="") # Temporary status next to download
393
-
394
- with gr.Column(scale=2):
395
- gr.Markdown("### πŸ‘¨β€πŸ« Coach Summary")
396
- summary_display = gr.Textbox(label="", show_label=False, lines=7, interactive=False)
397
 
398
- with gr.Group():
399
- gr.Markdown("### πŸ“‹ The Story Roadmap")
400
- roadmap_display = gr.Markdown()
 
 
 
 
 
 
 
 
 
 
 
 
 
401
 
402
  btn.click(
403
  fn=process_presentation,
404
  inputs=[file_input, pass_input, temp_input],
405
- outputs=[status, summary_display, roadmap_display, preview_img,
406
- download_btn, progress_status]
 
407
  )
408
 
409
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import os
3
  import json
4
+ import hashlib
5
  import shutil
6
  import time
7
+ import re
8
+ import anthropic
9
  from pathlib import Path
10
  from dotenv import load_dotenv
11
  from google import genai
 
20
  # On HF Spaces, set this in "Settings" -> "Secrets"
21
  load_dotenv()
22
  API_KEY = os.getenv("GOOGLE_API_KEY")
23
+ CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY")
24
  ACCESS_PASSWORD = os.getenv("APP_PASSWORD")
25
 
26
+ SCANNER_MODEL = "gemini-2.0-flash"
27
+ COACH_MODEL = "claude-sonnet-4-6"
28
+ CACHE_DIR = Path("cache/slides")
29
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
30
+
31
+ COACH_PERSONAS = {
32
+ "business": {
33
+ "name": "Business Strategy Coach",
34
+ "icon": "πŸ’Ό",
35
+ "role": "You are a Senior Business Strategist and executive communication expert.",
36
+ "focus": (
37
+ "Evaluate through a BUSINESS LENS:\n"
38
+ "- Is the business problem clearly articulated? Would a VP understand it?\n"
39
+ "- Does the executive summary lead with the answer, not the methodology?\n"
40
+ "- Is the value proposition compelling with specific ROI numbers?\n"
41
+ "- Is the business impact quantified and positioned persuasively?\n"
42
+ "- Would this presentation convince decision-makers to act?"
43
+ )
44
+ },
45
+ "analytics": {
46
+ "name": "Analytics & Methodology Coach",
47
+ "icon": "πŸ“Š",
48
+ "role": "You are a Senior Data Scientist and ML methodology expert.",
49
+ "focus": (
50
+ "Evaluate through a TECHNICAL/ANALYTICAL LENS:\n"
51
+ "- Is the data structure and preparation approach well-documented?\n"
52
+ "- Are the target variables and evaluation metrics appropriate and justified?\n"
53
+ "- Is model selection rigorous? Were enough candidates explored?\n"
54
+ "- Is the HPO strategy systematic and well-explained?\n"
55
+ "- Is validation thorough (holdout tests, cross-validation, confidence intervals)?\n"
56
+ "- Are results reproducible from what is shown?"
57
+ )
58
+ }
59
+ }
60
 
61
  # -----------------------------------------------------------------------------
62
  # LOGIC: CONVERSION (PDF -> IMAGES)
 
72
 
73
  if ext == ".pdf":
74
  print("Converting PDF to images...")
75
+ images = convert_from_path(file_path, dpi=150)
76
  image_paths = []
77
  for i, img in enumerate(images):
78
+ path = output_dir / f"slide-{i+1:02d}.jpg"
79
+ img.save(path, "JPEG", quality=85, optimize=True)
80
  image_paths.append(path)
81
  return image_paths
82
  else:
 
91
  inventory = []
92
  total = len(image_paths)
93
 
94
+ cache_hits = 0
95
+
96
  for i, img_path in enumerate(image_paths):
97
  slide_num = i + 1
98
  yield f"Reading Slide {slide_num}/{total}...", None
99
 
100
+ with open(img_path, "rb") as f:
101
+ img_bytes = f.read()
102
+
103
+ # Check slide cache by image hash
104
+ img_hash = hashlib.sha256(img_bytes).hexdigest()
105
+ cache_path = CACHE_DIR / f"{img_hash}.json"
106
+
107
+ if cache_path.exists():
108
+ data = json.loads(cache_path.read_text())
109
+ data["slide_number"] = slide_num
110
+ inventory.append(data)
111
+ cache_hits += 1
112
+ print(f" Slide {slide_num}: CACHE HIT")
113
+ continue
114
+
115
  print(f"Scanning Slide {slide_num}...")
116
 
117
  # Rate Limiting: Sleep to respect API limits (avoid 429 errors)
118
+ file_size_mb = len(img_bytes) / (1024 * 1024)
 
119
 
120
  if file_size_mb > 1.0:
121
  print(f" Large file ({file_size_mb:.1f}MB). Pausing 10s to refill quota...")
122
+ time.sleep(10)
123
  else:
124
+ time.sleep(2)
 
 
 
125
 
126
  prompt = f"""
127
  Analyze this slide (Slide {slide_num}).
 
147
  for attempt in range(max_retries):
148
  try:
149
  response = client.models.generate_content(
150
+ model=SCANNER_MODEL,
151
  contents=[
152
+ types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg"),
153
  prompt
154
  ],
155
  config=types.GenerateContentConfig(
 
159
  )
160
  data = json.loads(response.text)
161
 
 
162
  if isinstance(data, list):
163
  if len(data) > 0 and isinstance(data[0], dict):
164
  data = data[0]
 
168
 
169
  if isinstance(data, dict):
170
  inventory.append(data)
171
+ cache_path.write_text(json.dumps(data, indent=2))
172
  else:
173
  print(f"Warning: Slide {slide_num} did not return a valid JSON dictionary. Data: {data}")
174
 
 
175
  break
176
 
177
  except Exception as e:
178
  error_str = str(e)
179
  if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str:
180
+ wait_time = (attempt + 1) * 5
181
  print(f" ⚠️ Rate Limit (429) on Slide {slide_num}. Retrying in {wait_time}s...")
182
  yield f"⚠️ High Traffic. Retrying Slide {slide_num} in {wait_time}s...", None
183
  time.sleep(wait_time)
184
  else:
185
  print(f"Error scanning slide {slide_num}: {e}")
186
+ break
187
+
188
+ print(f" Cache: {cache_hits}/{total} slides cached, {total - cache_hits} scanned via API")
189
 
190
  yield "Scan Complete", inventory
191
 
 
204
  # -----------------------------------------------------------------------------
205
  # LOGIC: PASS 2 (COACH CRITIQUE)
206
  # -----------------------------------------------------------------------------
207
+ def build_inventory_script(inventory):
208
+ """Shared logic: filter appendices and build the text script from inventory."""
209
+ def get_title(slide):
210
+ if not isinstance(slide, dict): return ""
211
+ t = slide.get("title")
212
+ return t if t else ""
213
+
214
+ active = [s for s in inventory if isinstance(s, dict) and "appendix" not in get_title(s).lower()]
215
+ print(f"DEBUG: Pass 2 using {len(active)} active slides (excluding appendices).")
216
+
217
+ script = []
218
+ for s in active:
219
+ visuals = s.get("visual_elements", {})
220
+ if not isinstance(visuals, dict): visuals = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
+ busy = "BUSY" if visuals.get("is_busy") else "OK"
223
+ title = s.get('title', 'No Title')
224
+ num = s.get('slide_number', '?')
225
+ takeaway = s.get('key_takeaway', '')
226
+ desc = s.get('visual_description', '')
227
 
228
+ entry = f"Slide {num}: {title}\n- Content: {takeaway}\n- Visuals: {desc} [{busy}]"
229
+ script.append(entry)
 
 
 
 
 
 
 
230
 
231
+ return "\n".join(script)
232
+
233
+ def generate_critique(coach_client, inventory, persona, temperature=0.2):
234
+ try:
235
+ full_text = build_inventory_script(inventory)
 
236
 
237
+ prompt = f"""{persona['role']}
238
+ Your goal is to guide a Data Science student to professional excellence.
239
+
240
+ {persona['focus']}
241
+
242
+ SLIDE INVENTORY:
243
+ {full_text}
244
+
245
+ TASK:
246
+ Coach this student based on the 8-Step Story Arc.
247
+
248
+ REQUIRED STORY ARC:
249
+ 1. Executive Summary
250
+ 2. Data Structure
251
+ 3. Targets & Metrics
252
+ 4. Candidate Models
253
+ 5. HPO Strategy
254
+ 6. Best Model Selection
255
+ 7. Validation
256
+ 8. Business Impact
257
+
258
+ INSTRUCTIONS:
259
+ 1. **Fill the Roadmap**: For each of the 8 steps above, determine status (βœ…, ⚠️, ❓, β­•).
260
+ 2. **Check for Specifics**: If the student provides specific numbers (e.g. "$5,065 savings", "98% accuracy"), YOU MUST QUOTE THEM in the notes. Do not give generic advice if the specific data is present.
261
+ 3. **Slide Refs**: Cite specific slide numbers in the notes.
262
+ 4. **Tone**: Encouraging but precise.
263
+ 5. **Summary**: Write a robust 2-paragraph summary (approx 150 words) from your perspective as {persona['name']}.
264
+
265
+ OUTPUT STRICT JSON (no markdown fences, no extra text):
266
+ {{
267
+ "overall_summary": "Encouraging feedback (2 paragraphs).",
268
+ "structure_roadmap": [
269
+ {{
270
+ "step_name": "String (e.g. '1. Exec Summary')",
271
+ "status_icon": "String (βœ…, ⚠️, ❓, β­•)",
272
+ "coach_notes": "String"
273
  }}
274
+ ]
275
+ }}"""
276
+
277
+ response = coach_client.messages.create(
278
+ model=COACH_MODEL,
279
+ max_tokens=4096,
280
+ temperature=temperature,
281
+ messages=[{"role": "user", "content": prompt}]
 
282
  )
 
283
 
284
+ raw_text = response.content[0].text
285
+ print(f"DEBUG: {persona['name']} response received from {COACH_MODEL}.")
286
+
287
+ cleaned = raw_text.strip()
288
+ fence_match = re.search(r"```(?:json)?\s*\n?(.*?)```", cleaned, re.DOTALL)
289
+ if fence_match:
290
+ cleaned = fence_match.group(1).strip()
291
+
292
+ critique = json.loads(cleaned)
293
 
 
294
  if isinstance(critique, list):
295
  if len(critique) > 0 and isinstance(critique[0], dict):
296
  critique = critique[0]
297
  else:
 
298
  raise ValueError(f"Coach returned a list, expected a dictionary. Output: {critique}")
299
 
300
  return critique
301
 
302
  except Exception as e:
303
+ print(f"CRITICAL ERROR in Pass 2 ({persona['name']}): {e}")
 
304
  return {
305
  "overall_summary": f"Error generating critique: {e}",
306
  "structure_roadmap": [],
 
307
  }
308
 
309
  # -----------------------------------------------------------------------------
310
  # GRADIO INTERFACE
311
  # -----------------------------------------------------------------------------
312
+ def format_roadmap_table(critique):
313
+ """Build a markdown table from a critique's structure_roadmap."""
314
+ table_md = (
315
+ "| <span style='display:inline-block; min-width:180px'>STEP</span> "
316
+ "| <span style='display:inline-block; min-width:60px'>FLAG</span> "
317
+ "| COACH NOTES |\n|---|:---:|---|\n"
318
+ )
319
+ for item in critique.get("structure_roadmap", []):
320
+ icon = item.get('status_icon', '❓')
321
+ step = item.get('step_name', 'Step')
322
+ note = item.get('coach_notes', '')
323
+ table_md += f"| **{step}** | <span style='font-size: 1.5em'>{icon}</span> | {note} |\n"
324
+ return table_md
325
+
326
+ EMPTY_OUTPUTS = ("", "", "", "", None, None, "")
327
+
328
  def process_presentation(file_obj, password, temperature):
329
  print("--- NEW JOB STARTED ---")
330
  if file_obj is None:
331
+ return ("❌ Error: No file uploaded",) + EMPTY_OUTPUTS
332
 
333
  if password != ACCESS_PASSWORD:
334
+ yield ("❌ Incorrect Password",) + EMPTY_OUTPUTS
335
+ return ("❌ Incorrect Password",) + EMPTY_OUTPUTS
336
 
337
  if not API_KEY:
338
+ return ("❌ Server Error: Google API Key missing",) + EMPTY_OUTPUTS
339
+ if not CLAUDE_API_KEY:
340
+ return ("❌ Server Error: Claude API Key missing",) + EMPTY_OUTPUTS
341
 
342
+ scanner_client = genai.Client(api_key=API_KEY)
343
+ coach_client = anthropic.Anthropic(api_key=CLAUDE_API_KEY)
344
 
345
  try:
346
  # 1. Convert
347
  print("Step 1: Converting PDF...")
348
+ yield ("Converting PDF...",) + EMPTY_OUTPUTS
349
  images = convert_to_images(file_obj.name)
350
  print(f" Converted {len(images)} slides.")
351
 
352
+ # 2. Scan (Pass 1 - Gemini Flash)
353
+ yield (f"Scanning {len(images)} slides...",) + EMPTY_OUTPUTS
 
354
  print("Step 2: Scanning Slides (Pass 1)...")
355
 
356
+ scanner = scan_slides(scanner_client, images)
 
357
  inventory = []
358
 
359
  for msg, result in scanner:
360
  if result is None:
361
+ yield (msg,) + EMPTY_OUTPUTS
 
362
  else:
 
363
  inventory = result
364
 
365
  print(" Scan Complete.")
366
 
367
+ # Save Inventory
368
  original_stem = Path(file_obj.name).stem
 
 
369
  target_dir = Path("slides_images") / original_stem
370
  target_dir.mkdir(parents=True, exist_ok=True)
371
 
372
  inventory_filename = target_dir / f"{original_stem}_Inventory.json"
 
373
  with open(inventory_filename, "w") as f:
374
  json.dump(inventory, f, indent=4)
375
  print(f" Saved Inventory to {inventory_filename}")
376
 
377
+ # 3. Coach (Pass 2 - Sonnet 4.6, two personas)
378
+ debug_inventory(inventory)
379
+
380
+ biz_persona = COACH_PERSONAS["business"]
381
+ ana_persona = COACH_PERSONAS["analytics"]
382
 
383
+ yield (f"πŸ’Ό {biz_persona['name']} reviewing...",) + EMPTY_OUTPUTS
384
+ print(f"Step 3a: {biz_persona['name']} [Temp: {temperature}]...")
385
+ biz_critique = generate_critique(coach_client, inventory, biz_persona, temperature)
386
+ print(f" {biz_persona['name']} done.")
387
+
388
+ yield (f"πŸ“Š {ana_persona['name']} reviewing...",) + EMPTY_OUTPUTS
389
+ print(f"Step 3b: {ana_persona['name']} [Temp: {temperature}]...")
390
+ ana_critique = generate_critique(coach_client, inventory, ana_persona, temperature)
391
+ print(f" {ana_persona['name']} done.")
392
 
393
  # 4. Format Output
394
+ biz_summary = biz_critique.get("overall_summary", "")
395
+ biz_table = format_roadmap_table(biz_critique)
396
+ ana_summary = ana_critique.get("overall_summary", "")
397
+ ana_table = format_roadmap_table(ana_critique)
 
 
 
 
 
398
 
399
+ # Create Combined Report File
 
400
  report_filename = f"{original_stem}_Review.md"
 
401
  with open(report_filename, "w") as f:
402
  f.write(f"# Dr. Jones Feedback for {original_stem}\n\n")
403
+ f.write(f"## {biz_persona['icon']} {biz_persona['name']}\n\n")
404
+ f.write(biz_summary + "\n\n")
405
+ f.write("### Business Roadmap\n")
406
+ f.write(biz_table + "\n\n")
407
+ f.write(f"## {ana_persona['icon']} {ana_persona['name']}\n\n")
408
+ f.write(ana_summary + "\n\n")
409
+ f.write("### Analytics Roadmap\n")
410
+ f.write(ana_table)
411
+
412
+ yield "βœ… Done!", biz_summary, biz_table, ana_summary, ana_table, \
413
+ images[0], report_filename, ""
414
 
415
  except Exception as e:
416
  print(f"CRITICAL ERROR: {e}")
417
+ yield (f"❌ Error: {str(e)}",) + EMPTY_OUTPUTS
418
 
419
  # Define a custom maroon color palette
420
  maroon = gr.themes.Color(
 
434
  with gr.Blocks(title="Dr. Jones AI Coach",
435
  theme=gr.themes.Default(primary_hue=maroon, text_size="lg")) as demo:
436
  gr.Markdown("# πŸŽ“ Capstone Slide Review")
437
+ gr.Markdown("Upload your slides (PDF) for feedback from your AI coaching committee.")
438
 
439
  with gr.Row():
440
  with gr.Column(scale=3):
441
  file_input = gr.File(label="Upload PDF Slides",
442
+ file_types=[".pdf", "application/pdf"],
443
+ type="filepath", height=150)
444
  with gr.Column(scale=1):
445
  pass_input = gr.Textbox(label="Password", type="password")
446
  temp_input = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Coach Temperature")
 
449
 
450
  status = gr.Markdown("**Status**: Ready")
451
 
 
452
  with gr.Row():
453
  with gr.Column(scale=1):
454
  preview_img = gr.Image(label="Title Slide", interactive=False)
455
+ download_btn = gr.File(label="Download Full Report")
456
+ progress_status = gr.Markdown(value="")
 
 
 
 
 
 
457
 
458
+ with gr.Column(scale=2):
459
+ with gr.Tabs():
460
+ with gr.TabItem("πŸ’Ό Business Strategy Coach"):
461
+ biz_summary_display = gr.Textbox(label="Business Summary",
462
+ show_label=False, lines=6, interactive=False)
463
+
464
+ with gr.TabItem("πŸ“Š Analytics & Methodology Coach"):
465
+ ana_summary_display = gr.Textbox(label="Analytics Summary",
466
+ show_label=False, lines=6, interactive=False)
467
+
468
+ with gr.Tabs():
469
+ with gr.TabItem("πŸ’Ό Business Roadmap"):
470
+ biz_roadmap_display = gr.Markdown()
471
+
472
+ with gr.TabItem("πŸ“Š Analytics Roadmap"):
473
+ ana_roadmap_display = gr.Markdown()
474
 
475
  btn.click(
476
  fn=process_presentation,
477
  inputs=[file_input, pass_input, temp_input],
478
+ outputs=[status, biz_summary_display, biz_roadmap_display,
479
+ ana_summary_display, ana_roadmap_display,
480
+ preview_img, download_btn, progress_status]
481
  )
482
 
483
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  gradio
2
  google-genai
 
3
  python-dotenv
4
  pdf2image
5
  pillow
 
1
  gradio
2
  google-genai
3
+ anthropic
4
  python-dotenv
5
  pdf2image
6
  pillow