sparshmehta commited on
Commit
04fd86b
·
verified ·
1 Parent(s): 81fe53e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -76
app.py CHANGED
@@ -315,20 +315,6 @@ class ContentAnalyzer:
315
  if progress_callback:
316
  progress_callback(0.2, "Preparing content analysis...")
317
 
318
- # Extract existing timestamps or generate them
319
- timestamps = re.findall(r'\[(\d{2}:\d{2})\]', transcript)
320
- if not timestamps:
321
- # Generate timestamps based on word position
322
- words = transcript.split()
323
- words_per_minute = 150 # average speaking rate
324
- marked_transcript = ""
325
- for i, word in enumerate(words):
326
- if i % 150 == 0: # Add marker every ~1 minute of speech
327
- minutes = i // 150
328
- marked_transcript += f"\n[{minutes:02d}:00] "
329
- marked_transcript += word + " "
330
- transcript = marked_transcript
331
-
332
  prompt = self._create_analysis_prompt(transcript)
333
 
334
  if progress_callback:
@@ -336,7 +322,7 @@ class ContentAnalyzer:
336
 
337
  try:
338
  response = self.client.chat.completions.create(
339
- model="gpt-4o-mini", # Changed from gpt-4o-mini to gpt-4 for better analysis
340
  messages=[
341
  {"role": "system", "content": """You are a strict teaching evaluator focusing on core teaching competencies.
342
  For each assessment point, you MUST include specific timestamps [MM:SS] from the transcript.
@@ -351,7 +337,6 @@ class ContentAnalyzer:
351
  Maintain high standards and require clear evidence of quality teaching."""},
352
  {"role": "user", "content": prompt}
353
  ],
354
- response_format={"type": "json_object"},
355
  temperature=0.3
356
  )
357
 
@@ -364,77 +349,102 @@ class ContentAnalyzer:
364
  logger.info(f"Raw API response: {result_text[:500]}...")
365
 
366
  try:
367
- result = json.loads(result_text)
368
- logger.info("Successfully parsed JSON response")
369
-
370
- # Validate the response structure
371
- required_categories = {
372
- "Concept Assessment": [
373
- "Subject Matter Accuracy",
374
- "First Principles Approach",
375
- "Examples and Business Context",
376
- "Cohesive Storytelling",
377
- "Engagement and Interaction",
378
- "Professional Tone"
379
- ],
380
- "Code Assessment": [
381
- "Depth of Explanation",
382
- "Output Interpretation",
383
- "Breaking down Complexity"
384
- ]
385
  }
386
 
387
- # Check if response has required structure
388
- for category, subcategories in required_categories.items():
 
 
 
 
 
 
 
389
  if category not in result:
390
- logger.error(f"Missing category: {category}")
391
- raise ValueError(f"Response missing required category: {category}")
392
-
393
- for subcategory in subcategories:
394
- if subcategory not in result[category]:
395
- logger.error(f"Missing subcategory: {subcategory} in {category}")
396
- raise ValueError(f"Response missing required subcategory: {subcategory}")
397
-
398
- subcat_data = result[category][subcategory]
399
- if not isinstance(subcat_data, dict):
400
- logger.error(f"Invalid format for {category}.{subcategory}")
401
- raise ValueError(f"Invalid format for {category}.{subcategory}")
402
-
403
- if "Score" not in subcat_data or "Citations" not in subcat_data:
404
- logger.error(f"Missing Score or Citations in {category}.{subcategory}")
405
- raise ValueError(f"Missing Score or Citations in {category}.{subcategory}")
406
 
407
  return result
408
 
409
- except json.JSONDecodeError as json_error:
410
- logger.error(f"JSON parsing error: {str(json_error)}")
411
- logger.error(f"Invalid JSON response: {result_text}")
412
- raise
413
- except ValueError as val_error:
414
- logger.error(f"Validation error: {str(val_error)}")
415
- raise
416
-
417
  except Exception as e:
418
  logger.error(f"Content analysis attempt {attempt + 1} failed: {str(e)}")
419
  if attempt == self.retry_count - 1:
420
- logger.error("All attempts failed, returning default structure")
421
- return {
422
- "Concept Assessment": {
423
- "Subject Matter Accuracy": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
424
- "First Principles Approach": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
425
- "Examples and Business Context": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
426
- "Cohesive Storytelling": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
427
- "Engagement and Interaction": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
428
- "Professional Tone": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]}
429
- },
430
- "Code Assessment": {
431
- "Depth of Explanation": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
432
- "Output Interpretation": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
433
- "Breaking down Complexity": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]}
434
- }
435
- }
436
  time.sleep(self.retry_delay * (2 ** attempt))
437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  def _create_analysis_prompt(self, transcript: str) -> str:
439
  """Create the analysis prompt with stricter evaluation criteria"""
440
  # First try to extract existing timestamps
 
315
  if progress_callback:
316
  progress_callback(0.2, "Preparing content analysis...")
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  prompt = self._create_analysis_prompt(transcript)
319
 
320
  if progress_callback:
 
322
 
323
  try:
324
  response = self.client.chat.completions.create(
325
+ model="gpt-4", # Changed from gpt-4o-mini to gpt-4 for better analysis
326
  messages=[
327
  {"role": "system", "content": """You are a strict teaching evaluator focusing on core teaching competencies.
328
  For each assessment point, you MUST include specific timestamps [MM:SS] from the transcript.
 
337
  Maintain high standards and require clear evidence of quality teaching."""},
338
  {"role": "user", "content": prompt}
339
  ],
 
340
  temperature=0.3
341
  )
342
 
 
349
  logger.info(f"Raw API response: {result_text[:500]}...")
350
 
351
  try:
352
+ # Ensure proper JSON structure even if API returns non-JSON
353
+ default_structure = {
354
+ "Concept Assessment": {
355
+ "Subject Matter Accuracy": {"Score": 0, "Citations": ["No valid assessment available"]},
356
+ "First Principles Approach": {"Score": 0, "Citations": ["No valid assessment available"]},
357
+ "Examples and Business Context": {"Score": 0, "Citations": ["No valid assessment available"]},
358
+ "Cohesive Storytelling": {"Score": 0, "Citations": ["No valid assessment available"]},
359
+ "Engagement and Interaction": {"Score": 0, "Citations": ["No valid assessment available"]},
360
+ "Professional Tone": {"Score": 0, "Citations": ["No valid assessment available"]}
361
+ },
362
+ "Code Assessment": {
363
+ "Depth of Explanation": {"Score": 0, "Citations": ["No valid assessment available"]},
364
+ "Output Interpretation": {"Score": 0, "Citations": ["No valid assessment available"]},
365
+ "Breaking down Complexity": {"Score": 0, "Citations": ["No valid assessment available"]}
366
+ }
 
 
 
367
  }
368
 
369
+ # Try to parse the API response
370
+ try:
371
+ result = json.loads(result_text)
372
+ except json.JSONDecodeError:
373
+ # If JSON parsing fails, try to extract structured data from text
374
+ result = self._extract_structured_data(result_text)
375
+
376
+ # Merge with default structure to ensure all required fields
377
+ for category in default_structure:
378
  if category not in result:
379
+ result[category] = default_structure[category]
380
+ else:
381
+ for subcategory in default_structure[category]:
382
+ if subcategory not in result[category]:
383
+ result[category][subcategory] = default_structure[category][subcategory]
384
+ else:
385
+ # Ensure Score and Citations exist
386
+ if "Score" not in result[category][subcategory]:
387
+ result[category][subcategory]["Score"] = 0
388
+ if "Citations" not in result[category][subcategory]:
389
+ result[category][subcategory]["Citations"] = ["No citations provided"]
 
 
 
 
 
390
 
391
  return result
392
 
393
+ except Exception as parse_error:
394
+ logger.error(f"Error parsing response: {parse_error}")
395
+ return default_structure
396
+
 
 
 
 
397
  except Exception as e:
398
  logger.error(f"Content analysis attempt {attempt + 1} failed: {str(e)}")
399
  if attempt == self.retry_count - 1:
400
+ return default_structure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  time.sleep(self.retry_delay * (2 ** attempt))
402
 
403
+ def _extract_structured_data(self, text: str) -> Dict[str, Any]:
404
+ """Extract structured data from text response when JSON parsing fails"""
405
+ default_structure = {
406
+ "Concept Assessment": {},
407
+ "Code Assessment": {}
408
+ }
409
+
410
+ try:
411
+ # Simple pattern matching to extract scores and citations
412
+ sections = text.split('\n\n')
413
+ current_category = None
414
+ current_subcategory = None
415
+
416
+ for section in sections:
417
+ if "Concept Assessment" in section:
418
+ current_category = "Concept Assessment"
419
+ elif "Code Assessment" in section:
420
+ current_category = "Code Assessment"
421
+ elif current_category and ':' in section:
422
+ title, content = section.split(':', 1)
423
+ current_subcategory = title.strip()
424
+
425
+ # Extract score (assuming 0 or 1 is mentioned)
426
+ score = 1 if "pass" in content.lower() or "score: 1" in content.lower() else 0
427
+
428
+ # Extract citations (assuming they're in [MM:SS] format)
429
+ citations = re.findall(r'\[\d{2}:\d{2}\].*?(?=\[|$)', content)
430
+ citations = [c.strip() for c in citations if c.strip()]
431
+
432
+ if not citations:
433
+ citations = ["No specific citations found"]
434
+
435
+ if current_category and current_subcategory:
436
+ if current_category not in default_structure:
437
+ default_structure[current_category] = {}
438
+ default_structure[current_category][current_subcategory] = {
439
+ "Score": score,
440
+ "Citations": citations
441
+ }
442
+
443
+ return default_structure
444
+ except Exception as e:
445
+ logger.error(f"Error extracting structured data: {e}")
446
+ return default_structure
447
+
448
  def _create_analysis_prompt(self, transcript: str) -> str:
449
  """Create the analysis prompt with stricter evaluation criteria"""
450
  # First try to extract existing timestamps