sparshmehta commited on
Commit
15a2a98
·
verified ·
1 Parent(s): f54e6c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +806 -329
app.py CHANGED
@@ -56,9 +56,9 @@ class ProgressTracker:
56
  self.status = status_container
57
  self.progress = progress_bar
58
  self.current_step = 0
59
- self.total_steps = 6 # Update total steps to include speech metrics
60
- self.substep_container = st.empty()
61
- self.metrics_container = st.container()
62
 
63
  def update(self, progress: float, message: str, substep: str = "", metrics: Dict[str, Any] = None):
64
  """Update progress bar and status message with enhanced UI feedback
@@ -662,7 +662,7 @@ Important:
662
 
663
  def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
664
  progress_callback=None) -> Dict[str, Any]:
665
- """Evaluate speech metrics with improved accuracy"""
666
  try:
667
  if progress_callback:
668
  progress_callback(0.2, "Calculating speech metrics...")
@@ -670,21 +670,87 @@ Important:
670
  # Calculate words and duration
671
  words = len(transcript.split())
672
  duration_minutes = float(audio_features.get('duration', 0)) / 60
673
- words_per_minute = float(words / duration_minutes if duration_minutes > 0 else 0)
674
 
675
- # Calculate fluency metrics
676
- filler_words = ['um', 'uh', 'like', 'you know', 'sort of', 'kind of']
677
- filler_count = sum(transcript.lower().count(filler) for filler in filler_words)
678
- fillers_per_minute = float(filler_count / duration_minutes if duration_minutes > 0 else 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
- # Detect speech errors (repetitions, incomplete sentences)
681
- words_list = transcript.split()
682
- repetitions = sum(1 for i in range(len(words_list)-1) if words_list[i] == words_list[i+1])
683
- incomplete_sentences = len(re.findall(r'[.!?]\s*[a-z]|[^.!?]$', transcript))
684
- total_errors = repetitions + incomplete_sentences
685
- errors_per_minute = float(total_errors / duration_minutes if duration_minutes > 0 else 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686
 
687
- # Basic speech metrics calculation
688
  return {
689
  "speed": {
690
  "score": 1 if 120 <= words_per_minute <= 180 else 0,
@@ -693,28 +759,27 @@ Important:
693
  "duration_minutes": duration_minutes
694
  },
695
  "fluency": {
696
- "score": 1 if fillers_per_minute <= 3 and errors_per_minute <= 1 else 0,
697
  "errorsPerMin": errors_per_minute,
698
- "fillersPerMin": fillers_per_minute,
699
- "maxErrorsThreshold": 1.0,
700
- "maxFillersThreshold": 3.0,
701
- "details": {
702
- "filler_count": filler_count,
703
- "repetitions": repetitions,
704
- "incomplete_sentences": incomplete_sentences
705
- }
706
  },
707
  "flow": {
708
  "score": 1 if audio_features.get("pauses_per_minute", 0) <= 12 else 0,
709
  "pausesPerMin": audio_features.get("pauses_per_minute", 0)
710
  },
711
  "intonation": {
712
- "pitch": audio_features.get("pitch_mean", 0),
713
- "pitchScore": 1 if 20 <= (audio_features.get("pitch_std", 0) / audio_features.get("pitch_mean", 0) * 100 if audio_features.get("pitch_mean", 0) > 0 else 0) <= 40 else 0,
714
- "pitchVariation": audio_features.get("pitch_std", 0),
715
- "patternScore": 1 if audio_features.get("variations_per_minute", 0) >= 120 else 0,
716
- "risingPatterns": audio_features.get("rising_patterns", 0),
717
- "fallingPatterns": audio_features.get("falling_patterns", 0),
718
  "variationsPerMin": audio_features.get("variations_per_minute", 0)
719
  },
720
  "energy": {
@@ -724,7 +789,7 @@ Important:
724
  "variationScore": 1 if 0.05 <= audio_features.get("amplitude_deviation", 0) <= 0.15 else 0
725
  }
726
  }
727
-
728
  except Exception as e:
729
  logger.error(f"Error in speech metrics evaluation: {e}")
730
  raise
@@ -747,12 +812,621 @@ Important:
747
  Format as a JSON array with a single string."""}
748
  ],
749
  response_format={"type": "json_object"},
750
- temperature=0.3
751
  )
752
 
753
- return json.loads(response.choices[0].message.content)
 
 
754
  except Exception as e:
755
  logger.error(f"Error generating suggestions: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
  def validate_video_file(file_path: str):
757
  """Validate video file before processing"""
758
  MAX_SIZE = 1024 * 1024 * 1024 # 500MB limit
@@ -1026,89 +1700,121 @@ def display_evaluation(evaluation: Dict[str, Any]):
1026
  if "summary" in recommendations:
1027
  st.markdown("""
1028
  <div class="summary-card">
1029
- <h4>📊 Overall Assessment</h4>
1030
  <div class="summary-content">
1031
- {}
1032
- </div>
1033
- </div>
1034
- """.format(recommendations["summary"]), unsafe_allow_html=True)
1035
 
1036
- # Display improvements in categorized columns
1037
  st.markdown("<h4>💡 Areas for Improvement</h4>", unsafe_allow_html=True)
1038
  improvements = recommendations.get("improvements", [])
1039
 
1040
- # Initialize category buckets
1041
- categorized_improvements = {
1042
- "Communication": [],
1043
- "Teaching": [],
1044
- "Technical": []
1045
- }
1046
-
1047
- # Sort improvements into categories
1048
- for improvement in improvements:
1049
- if isinstance(improvement, dict):
1050
- category = improvement.get("category", "").upper()
1051
- message = improvement.get("message", "")
1052
-
1053
- if "COMMUNICATION" in category:
1054
- categorized_improvements["Communication"].append(message)
1055
- elif "TEACHING" in category:
1056
- categorized_improvements["Teaching"].append(message)
1057
- elif "TECHNICAL" in category:
1058
- categorized_improvements["Technical"].append(message)
1059
- else:
1060
- # Handle string improvements (legacy format)
1061
- categorized_improvements["Technical"].append(str(improvement))
1062
-
1063
- # Create columns for each category
1064
- cols = st.columns(3)
1065
-
1066
- # Display improvements in columns with icons
1067
- for col, (category, items) in zip(cols, categorized_improvements.items()):
1068
- with col:
1069
- icon = "🗣️" if category == "Communication" else "📚" if category == "Teaching" else "💻"
1070
- st.markdown(f"""
1071
- <div class="improvement-card">
1072
- <h5>{icon} {category}</h5>
1073
- <div class="improvement-list">
1074
  """, unsafe_allow_html=True)
1075
-
1076
- if items:
1077
  for item in items:
1078
  st.markdown(f"""
1079
  <div class="improvement-item">
1080
  • {item}
1081
  </div>
1082
  """, unsafe_allow_html=True)
1083
- else:
1084
- st.markdown("""
1085
- <div class="improvement-item no-improvements">
1086
- No specific improvements needed in this category.
1087
- </div>
1088
- """, unsafe_allow_html=True)
1089
-
1090
- st.markdown("</div></div>", unsafe_allow_html=True)
1091
 
1092
- # Add additional CSS for recommendations styling
1093
  st.markdown("""
1094
  <style>
1095
- .summary-card {
1096
- background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%);
1097
  border-radius: 8px;
1098
  padding: 20px;
1099
- margin: 15px 0;
1100
- border-left: 4px solid #1f77b4;
1101
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1102
  }
1103
 
1104
- .summary-card h4 {
1105
- color: #1f77b4;
 
 
1106
  margin-bottom: 15px;
1107
  }
1108
 
1109
- .summary-content {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1110
  color: #495057;
1111
- line-height: 1.6;
 
 
 
 
 
 
 
 
1112
  }
1113
 
1114
  .improvement-card {
@@ -1118,14 +1824,13 @@ def display_evaluation(evaluation: Dict[str, Any]):
1118
  margin: 10px 0;
1119
  height: 100%;
1120
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1121
- border-left: 4px solid #28a745;
1122
  }
1123
 
1124
  .improvement-card h5 {
1125
  color: #1f77b4;
1126
- margin-bottom: 15px;
1127
- padding-bottom: 10px;
1128
  border-bottom: 2px solid #f0f0f0;
 
1129
  }
1130
 
1131
  .improvement-list {
@@ -1133,22 +1838,12 @@ def display_evaluation(evaluation: Dict[str, Any]):
1133
  }
1134
 
1135
  .improvement-item {
1136
- padding: 8px;
1137
- margin: 5px 0;
1138
- background: #f8f9fa;
1139
- border-radius: 4px;
1140
- color: #495057;
1141
- transition: transform 0.2s ease;
1142
  }
1143
 
1144
- .improvement-item:hover {
1145
- transform: translateX(5px);
1146
- background: #f0f0f0;
1147
- }
1148
-
1149
- .no-improvements {
1150
- color: #6c757d;
1151
- font-style: italic;
1152
  }
1153
  </style>
1154
  """, unsafe_allow_html=True)
@@ -1934,15 +2629,9 @@ def main():
1934
  """, unsafe_allow_html=True)
1935
 
1936
  evaluator = MentorEvaluator()
1937
-
1938
- # Read transcript content if provided
1939
- transcript_content = None
1940
- if uploaded_transcript:
1941
- transcript_content = uploaded_transcript.getvalue().decode('utf-8')
1942
-
1943
  st.session_state.evaluation_results = evaluator.evaluate_video(
1944
  video_path,
1945
- transcript_content # Pass the transcript content instead of the file object
1946
  )
1947
  st.session_state.processing_complete = True
1948
 
@@ -1987,217 +2676,5 @@ def main():
1987
  except Exception as e:
1988
  st.error(f"Application error: {str(e)}")
1989
 
1990
- class MentorEvaluator:
1991
- """Coordinates the evaluation process for mentor demos"""
1992
- def __init__(self):
1993
- self.audio_extractor = AudioFeatureExtractor()
1994
- self.content_analyzer = ContentAnalyzer(st.secrets["OPENAI_API_KEY"])
1995
-
1996
- def evaluate_video(self, video_path: str, transcript_content: Optional[str] = None) -> Dict[str, Any]:
1997
- """
1998
- Evaluate a teaching video and generate comprehensive analysis
1999
- """
2000
- try:
2001
- # Create progress tracking
2002
- status_container = st.empty()
2003
- progress_bar = st.progress(0)
2004
- progress = ProgressTracker(status_container, progress_bar)
2005
-
2006
- # Create a temporary directory that will persist throughout the function
2007
- with tempfile.TemporaryDirectory() as temp_dir:
2008
- # Step 1: Extract audio from video
2009
- progress.update(0.0, "Extracting audio from video...")
2010
- audio_path = os.path.join(temp_dir, 'audio.wav')
2011
-
2012
- try:
2013
- subprocess.run([
2014
- 'ffmpeg', '-i', video_path,
2015
- '-vn', '-acodec', 'pcm_s16le',
2016
- '-ar', '16000', '-ac', '1',
2017
- audio_path
2018
- ], check=True, capture_output=True)
2019
- except subprocess.SubprocessError as e:
2020
- logger.error(f"FFmpeg error: {e}")
2021
- raise AudioProcessingError(f"Failed to process video audio: {str(e)}")
2022
-
2023
- progress.next_step()
2024
-
2025
- # Step 2: Generate transcript if not provided
2026
- progress.update(0.0, "Processing audio...")
2027
- if transcript_content:
2028
- transcript = transcript_content
2029
- else:
2030
- # Initialize Whisper model
2031
- model = WhisperModel("base", device="cpu", compute_type="int8")
2032
- segments, _ = model.transcribe(audio_path, beam_size=5)
2033
- transcript = " ".join([segment.text for segment in segments])
2034
- progress.next_step()
2035
-
2036
- # Step 3: Extract audio features
2037
- progress.update(0.0, "Analyzing audio features...")
2038
- # Verify file exists before processing
2039
- if not os.path.exists(audio_path):
2040
- raise AudioProcessingError(f"Audio file not found at {audio_path}")
2041
-
2042
- audio_features = self.audio_extractor.extract_features(
2043
- audio_path,
2044
- progress_callback=lambda p, m: progress.update(p, "Analyzing audio features...", m)
2045
- )
2046
- progress.next_step()
2047
-
2048
- # Step 4: Calculate speech metrics (Add this step)
2049
- progress.update(0.0, "Analyzing speech patterns...")
2050
- speech_metrics = self._evaluate_speech_metrics(
2051
- transcript,
2052
- audio_features,
2053
- progress_callback=lambda p, m: progress.update(p, "Analyzing speech patterns...", m)
2054
- )
2055
- progress.next_step()
2056
-
2057
- # Step 5: Analyze teaching content
2058
- progress.update(0.0, "Analyzing teaching content...")
2059
- teaching_analysis = self.content_analyzer.analyze_content(
2060
- transcript,
2061
- progress_callback=lambda p, m: progress.update(p, "Analyzing teaching content...", m)
2062
- )
2063
- progress.next_step()
2064
-
2065
- # Step 6: Generate final evaluation
2066
- progress.update(0.0, "Generating final evaluation...")
2067
- evaluation = {
2068
- "audio_features": audio_features,
2069
- "speech_metrics": speech_metrics, # Include speech metrics in the evaluation
2070
- "transcript": transcript,
2071
- "teaching": teaching_analysis,
2072
- "recommendations": self._generate_recommendations(audio_features, teaching_analysis)
2073
- }
2074
- progress.next_step()
2075
-
2076
- return evaluation
2077
-
2078
- except Exception as e:
2079
- logger.error(f"Evaluation error: {str(e)}")
2080
- raise
2081
-
2082
- def _generate_recommendations(self, audio_features: Dict[str, float],
2083
- teaching_analysis: Dict[str, Any]) -> Dict[str, Any]:
2084
- """Generate recommendations based on analysis results"""
2085
- recommendations = {
2086
- "summary": "",
2087
- "improvements": []
2088
- }
2089
-
2090
- try:
2091
- # Generate summary and improvements using GPT-4
2092
- analysis_prompt = f"""
2093
- Based on the following teaching analysis and audio metrics, provide:
2094
- 1. A brief summary of the teaching performance
2095
- 2. Specific areas for improvement with actionable suggestions
2096
-
2097
- Audio Metrics:
2098
- {json.dumps(audio_features, indent=2)}
2099
-
2100
- Teaching Analysis:
2101
- {json.dumps(teaching_analysis, indent=2)}
2102
-
2103
- Format response as JSON:
2104
- {{
2105
- "summary": "brief overall assessment",
2106
- "improvements": [
2107
- {{"category": "COMMUNICATION/TEACHING/TECHNICAL", "message": "specific suggestion"}}
2108
- ]
2109
- }}
2110
- """
2111
-
2112
- response = self.content_analyzer.client.chat.completions.create(
2113
- model="gpt-4o-mini",
2114
- messages=[
2115
- {"role": "system", "content": "You are a teaching evaluation expert providing constructive feedback."},
2116
- {"role": "user", "content": analysis_prompt}
2117
- ],
2118
- response_format={"type": "json_object"},
2119
- temperature=0.3
2120
- )
2121
-
2122
- recommendations = json.loads(response.choices[0].message.content)
2123
-
2124
- except Exception as e:
2125
- logger.error(f"Error generating recommendations: {e}")
2126
- recommendations["summary"] = "Error generating detailed recommendations."
2127
- recommendations["improvements"] = [
2128
- {"category": "TECHNICAL", "message": "Unable to generate specific recommendations."}
2129
- ]
2130
-
2131
- return recommendations
2132
-
2133
- def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
2134
- progress_callback=None) -> Dict[str, Any]:
2135
- """Evaluate speech metrics with improved accuracy"""
2136
- try:
2137
- if progress_callback:
2138
- progress_callback(0.2, "Calculating speech metrics...")
2139
-
2140
- # Calculate words and duration
2141
- words = len(transcript.split())
2142
- duration_minutes = float(audio_features.get('duration', 0)) / 60
2143
- words_per_minute = float(words / duration_minutes if duration_minutes > 0 else 0)
2144
-
2145
- # Calculate fluency metrics
2146
- filler_words = ['um', 'uh', 'like', 'you know', 'sort of', 'kind of']
2147
- filler_count = sum(transcript.lower().count(filler) for filler in filler_words)
2148
- fillers_per_minute = float(filler_count / duration_minutes if duration_minutes > 0 else 0)
2149
-
2150
- # Detect speech errors (repetitions, incomplete sentences)
2151
- words_list = transcript.split()
2152
- repetitions = sum(1 for i in range(len(words_list)-1) if words_list[i] == words_list[i+1])
2153
- incomplete_sentences = len(re.findall(r'[.!?]\s*[a-z]|[^.!?]$', transcript))
2154
- total_errors = repetitions + incomplete_sentences
2155
- errors_per_minute = float(total_errors / duration_minutes if duration_minutes > 0 else 0)
2156
-
2157
- # Basic speech metrics calculation
2158
- return {
2159
- "speed": {
2160
- "score": 1 if 120 <= words_per_minute <= 180 else 0,
2161
- "wpm": words_per_minute,
2162
- "total_words": words,
2163
- "duration_minutes": duration_minutes
2164
- },
2165
- "fluency": {
2166
- "score": 1 if fillers_per_minute <= 3 and errors_per_minute <= 1 else 0,
2167
- "errorsPerMin": errors_per_minute,
2168
- "fillersPerMin": fillers_per_minute,
2169
- "maxErrorsThreshold": 1.0,
2170
- "maxFillersThreshold": 3.0,
2171
- "details": {
2172
- "filler_count": filler_count,
2173
- "repetitions": repetitions,
2174
- "incomplete_sentences": incomplete_sentences
2175
- }
2176
- },
2177
- "flow": {
2178
- "score": 1 if audio_features.get("pauses_per_minute", 0) <= 12 else 0,
2179
- "pausesPerMin": audio_features.get("pauses_per_minute", 0)
2180
- },
2181
- "intonation": {
2182
- "pitch": audio_features.get("pitch_mean", 0),
2183
- "pitchScore": 1 if 20 <= (audio_features.get("pitch_std", 0) / audio_features.get("pitch_mean", 0) * 100 if audio_features.get("pitch_mean", 0) > 0 else 0) <= 40 else 0,
2184
- "pitchVariation": audio_features.get("pitch_std", 0),
2185
- "patternScore": 1 if audio_features.get("variations_per_minute", 0) >= 120 else 0,
2186
- "risingPatterns": audio_features.get("rising_patterns", 0),
2187
- "fallingPatterns": audio_features.get("falling_patterns", 0),
2188
- "variationsPerMin": audio_features.get("variations_per_minute", 0)
2189
- },
2190
- "energy": {
2191
- "score": 1 if 60 <= audio_features.get("mean_amplitude", 0) <= 75 else 0,
2192
- "meanAmplitude": audio_features.get("mean_amplitude", 0),
2193
- "amplitudeDeviation": audio_features.get("amplitude_deviation", 0),
2194
- "variationScore": 1 if 0.05 <= audio_features.get("amplitude_deviation", 0) <= 0.15 else 0
2195
- }
2196
- }
2197
-
2198
- except Exception as e:
2199
- logger.error(f"Error in speech metrics evaluation: {e}")
2200
- raise
2201
-
2202
  if __name__ == "__main__":
2203
  main()
 
56
  self.status = status_container
57
  self.progress = progress_bar
58
  self.current_step = 0
59
+ self.total_steps = 5 # Total number of main processing steps
60
+ self.substep_container = st.empty() # Add container for substep details
61
+ self.metrics_container = st.container() # Add container for metrics
62
 
63
  def update(self, progress: float, message: str, substep: str = "", metrics: Dict[str, Any] = None):
64
  """Update progress bar and status message with enhanced UI feedback
 
662
 
663
  def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
664
  progress_callback=None) -> Dict[str, Any]:
665
+ """Evaluate speech metrics with improved accuracy and stricter checks"""
666
  try:
667
  if progress_callback:
668
  progress_callback(0.2, "Calculating speech metrics...")
 
670
  # Calculate words and duration
671
  words = len(transcript.split())
672
  duration_minutes = float(audio_features.get('duration', 0)) / 60
 
673
 
674
+ # Enhanced grammatical error detection with stricter patterns
675
+ grammatical_errors = []
676
+
677
+ # Subject-verb agreement errors
678
+ sv_errors = re.findall(r'\b(they is|he are|she are|it are|there are \w+s|there is \w+s)\b', transcript.lower())
679
+ grammatical_errors.extend([("Subject-Verb Agreement", err) for err in sv_errors])
680
+
681
+ # Article misuse
682
+ article_errors = re.findall(r'\b(a [aeiou]\w+|an [^aeiou\s]\w+)\b', transcript.lower())
683
+ grammatical_errors.extend([("Article Misuse", err) for err in article_errors])
684
+
685
+ # Double negatives
686
+ double_neg = re.findall(r'\b(don\'t.*no|doesn\'t.*no|didn\'t.*no|never.*no)\b', transcript.lower())
687
+ grammatical_errors.extend([("Double Negative", err) for err in double_neg])
688
+
689
+ # Preposition errors
690
+ prep_errors = re.findall(r'\b(depend of|different than|identical than)\b', transcript.lower())
691
+ grammatical_errors.extend([("Preposition Error", err) for err in prep_errors])
692
+
693
+ # Incomplete sentences (stricter detection)
694
+ incomplete = re.findall(r'[a-zA-Z]+\s*[.!?]\s*(?![A-Z])|[a-zA-Z]+\s*-\s+|[a-zA-Z]+\s*\.\.\.', transcript)
695
+ grammatical_errors.extend([("Incomplete Sentence", err) for err in incomplete])
696
 
697
+ # Calculate errors per minute with stricter threshold
698
+ errors_count = len(grammatical_errors)
699
+ errors_per_minute = float(errors_count / duration_minutes if duration_minutes > 0 else 0)
700
+
701
+ # Stricter threshold for errors (max 1 error per minute)
702
+ max_errors = 1.0
703
+
704
+ # Calculate monotone score with stricter thresholds
705
+ pitch_mean = float(audio_features.get("pitch_mean", 0))
706
+ pitch_std = float(audio_features.get("pitch_std", 0))
707
+ pitch_variation_coeff = (pitch_std / pitch_mean * 100) if pitch_mean > 0 else 0
708
+ direction_changes = float(audio_features.get("direction_changes_per_min", 0))
709
+ pitch_range = float(audio_features.get("pitch_range", 0))
710
+
711
+ # Recalibrated scoring factors with stricter ranges
712
+ # Variation factor: needs wider variation (20-40% is good)
713
+ variation_factor = min(1.0, max(0.0,
714
+ 1.0 if 20 <= pitch_variation_coeff <= 40
715
+ else 0.5 if 15 <= pitch_variation_coeff <= 45
716
+ else 0.0
717
+ ))
718
+
719
+ # Range factor: needs wider range (200-300% is good)
720
+ range_ratio = (pitch_range / pitch_mean * 100) if pitch_mean > 0 else 0
721
+ range_factor = min(1.0, max(0.0,
722
+ 1.0 if 200 <= range_ratio <= 300
723
+ else 0.5 if 150 <= range_ratio <= 350
724
+ else 0.0
725
+ ))
726
+
727
+ # Changes factor: needs more frequent changes (450-650 changes/min is good)
728
+ changes_factor = min(1.0, max(0.0,
729
+ 1.0 if 450 <= direction_changes <= 650
730
+ else 0.5 if 350 <= direction_changes <= 750
731
+ else 0.0
732
+ ))
733
+
734
+ # Calculate final monotone score (0-1, higher means more monotonous)
735
+ # Using weighted average to emphasize variation importance
736
+ weights = [0.4, 0.3, 0.3] # More weight on pitch variation
737
+ monotone_score = 1.0 - (
738
+ (variation_factor * weights[0] +
739
+ range_factor * weights[1] +
740
+ changes_factor * weights[2])
741
+ )
742
+
743
+ # Add debug logging
744
+ logger.info(f"""Monotone score calculation:
745
+ Pitch variation coeff: {pitch_variation_coeff:.2f}
746
+ Pitch range ratio: {range_ratio:.2f}%
747
+ Changes per minute: {direction_changes:.2f}
748
+ Variation factor: {variation_factor:.2f}
749
+ Range factor: {range_factor:.2f}
750
+ Changes factor: {changes_factor:.2f}
751
+ Final score: {monotone_score:.2f}
752
+ """)
753
 
 
754
  return {
755
  "speed": {
756
  "score": 1 if 120 <= words_per_minute <= 180 else 0,
 
759
  "duration_minutes": duration_minutes
760
  },
761
  "fluency": {
762
+ "score": 1 if errors_per_minute <= max_errors else 0,
763
  "errorsPerMin": errors_per_minute,
764
+ "maxErrorsThreshold": max_errors,
765
+ "detectedErrors": [
766
+ {
767
+ "type": error_type,
768
+ "context": error_text
769
+ } for error_type, error_text in grammatical_errors
770
+ ]
 
771
  },
772
  "flow": {
773
  "score": 1 if audio_features.get("pauses_per_minute", 0) <= 12 else 0,
774
  "pausesPerMin": audio_features.get("pauses_per_minute", 0)
775
  },
776
  "intonation": {
777
+ "pitch": pitch_mean,
778
+ "pitchScore": 1 if not any(monotone_indicators.values()) else 0,
779
+ "pitchVariation": pitch_variation_coeff,
780
+ "monotoneScore": monotone_score,
781
+ "monotoneIndicators": monotone_indicators,
782
+ "directionChanges": direction_changes,
783
  "variationsPerMin": audio_features.get("variations_per_minute", 0)
784
  },
785
  "energy": {
 
789
  "variationScore": 1 if 0.05 <= audio_features.get("amplitude_deviation", 0) <= 0.15 else 0
790
  }
791
  }
792
+
793
  except Exception as e:
794
  logger.error(f"Error in speech metrics evaluation: {e}")
795
  raise
 
812
  Format as a JSON array with a single string."""}
813
  ],
814
  response_format={"type": "json_object"},
815
+ temperature=0.7
816
  )
817
 
818
+ result = json.loads(response.choices[0].message.content)
819
+ return result.get("suggestions", [])
820
+
821
  except Exception as e:
822
  logger.error(f"Error generating suggestions: {e}")
823
+ return [f"Unable to generate specific suggestions: {str(e)}"]
824
+
825
+ class RecommendationGenerator:
826
+ """Generates teaching recommendations using OpenAI API"""
827
+ def __init__(self, api_key: str):
828
+ self.client = OpenAI(api_key=api_key)
829
+ self.retry_count = 3
830
+ self.retry_delay = 1
831
+
832
+ def generate_recommendations(self,
833
+ metrics: Dict[str, Any],
834
+ content_analysis: Dict[str, Any],
835
+ progress_callback=None) -> Dict[str, Any]:
836
+ """Generate recommendations with robust JSON handling"""
837
+ for attempt in range(self.retry_count):
838
+ try:
839
+ if progress_callback:
840
+ progress_callback(0.2, "Preparing recommendation analysis...")
841
+
842
+ prompt = self._create_recommendation_prompt(metrics, content_analysis)
843
+
844
+ if progress_callback:
845
+ progress_callback(0.5, "Generating recommendations...")
846
+
847
+ response = self.client.chat.completions.create(
848
+ model="gpt-4o-mini",
849
+ messages=[
850
+ {"role": "system", "content": """You are a teaching expert providing actionable recommendations.
851
+ Each improvement must be categorized as one of:
852
+ - COMMUNICATION: Related to speaking, pace, tone, clarity, delivery
853
+ - TEACHING: Related to explanation, examples, engagement, structure
854
+ - TECHNICAL: Related to code, implementation, technical concepts
855
+
856
+ Always respond with a valid JSON object containing categorized improvements."""},
857
+ {"role": "user", "content": prompt}
858
+ ],
859
+ response_format={"type": "json_object"}
860
+ )
861
+
862
+ if progress_callback:
863
+ progress_callback(0.8, "Formatting recommendations...")
864
+
865
+ result_text = response.choices[0].message.content.strip()
866
+
867
+ try:
868
+ result = json.loads(result_text)
869
+ # Ensure improvements are properly formatted
870
+ if "improvements" in result:
871
+ formatted_improvements = []
872
+ for imp in result["improvements"]:
873
+ if isinstance(imp, str):
874
+ # Default categorization for legacy format
875
+ formatted_improvements.append({
876
+ "category": "TECHNICAL",
877
+ "message": imp
878
+ })
879
+ elif isinstance(imp, dict):
880
+ # Ensure proper structure for dict format
881
+ formatted_improvements.append({
882
+ "category": imp.get("category", "TECHNICAL"),
883
+ "message": imp.get("message", str(imp))
884
+ })
885
+ result["improvements"] = formatted_improvements
886
+ except json.JSONDecodeError:
887
+ result = {
888
+ "geographyFit": "Unknown",
889
+ "improvements": [
890
+ {
891
+ "category": "TECHNICAL",
892
+ "message": "Unable to generate specific recommendations"
893
+ }
894
+ ],
895
+ "rigor": "Undetermined",
896
+ "profileMatches": []
897
+ }
898
+
899
+ if progress_callback:
900
+ progress_callback(1.0, "Recommendations complete!")
901
+
902
+ return result
903
+
904
+ except Exception as e:
905
+ logger.error(f"Recommendation generation attempt {attempt + 1} failed: {e}")
906
+ if attempt == self.retry_count - 1:
907
+ return {
908
+ "geographyFit": "Unknown",
909
+ "improvements": [
910
+ {
911
+ "category": "TECHNICAL",
912
+ "message": f"Unable to generate specific recommendations: {str(e)}"
913
+ }
914
+ ],
915
+ "rigor": "Undetermined",
916
+ "profileMatches": []
917
+ }
918
+ time.sleep(self.retry_delay * (2 ** attempt))
919
+
920
+ def _create_recommendation_prompt(self, metrics: Dict[str, Any], content_analysis: Dict[str, Any]) -> str:
921
+ """Create the recommendation prompt"""
922
+ return f"""Based on the following metrics and analysis, provide recommendations:
923
+ Metrics: {json.dumps(metrics)}
924
+ Content Analysis: {json.dumps(content_analysis)}
925
+
926
+ Analyze the teaching style and provide:
927
+ 1. A concise performance summary (2-3 paragraphs highlighting key strengths and areas for improvement)
928
+ 2. Geography fit assessment
929
+ 3. Specific improvements needed (each must be categorized as COMMUNICATION, TEACHING, or TECHNICAL)
930
+ 4. Profile matching for different learner types (choose ONLY ONE best match)
931
+ 5. Overall teaching rigor assessment
932
+
933
+ Required JSON structure:
934
+ {{
935
+ "summary": "Comprehensive summary of teaching performance, strengths, and areas for improvement",
936
+ "geographyFit": "String describing geographical market fit",
937
+ "improvements": [
938
+ {{
939
+ "category": "COMMUNICATION",
940
+ "message": "Specific improvement recommendation"
941
+ }},
942
+ {{
943
+ "category": "TEACHING",
944
+ "message": "Specific improvement recommendation"
945
+ }},
946
+ {{
947
+ "category": "TECHNICAL",
948
+ "message": "Specific improvement recommendation"
949
+ }}
950
+ ],
951
+ "rigor": "Assessment of teaching rigor",
952
+ "profileMatches": [
953
+ {{
954
+ "profile": "junior_technical",
955
+ "match": false,
956
+ "reason": "Detailed explanation why this profile is not the best match"
957
+ }},
958
+ {{
959
+ "profile": "senior_non_technical",
960
+ "match": false,
961
+ "reason": "Detailed explanation why this profile is not the best match"
962
+ }},
963
+ {{
964
+ "profile": "junior_expert",
965
+ "match": false,
966
+ "reason": "Detailed explanation why this profile is not the best match"
967
+ }},
968
+ {{
969
+ "profile": "senior_expert",
970
+ "match": false,
971
+ "reason": "Detailed explanation why this profile is not the best match"
972
+ }}
973
+ ]
974
+ }}
975
+
976
+ Consider:
977
+ - Teaching pace and complexity level
978
+ - Balance of technical vs business context
979
+ - Depth of code explanations
980
+ - Use of examples and analogies
981
+ - Engagement style
982
+ - Communication metrics
983
+ - Teaching assessment scores"""
984
+
985
+ class CostCalculator:
986
+ """Calculates API and processing costs"""
987
+ def __init__(self):
988
+ self.GPT4_INPUT_COST = 0.15 / 1_000_000 # $0.15 per 1M tokens input
989
+ self.GPT4_OUTPUT_COST = 0.60 / 1_000_000 # $0.60 per 1M tokens output
990
+ self.WHISPER_COST = 0.006 / 60 # $0.006 per minute
991
+ self.costs = {
992
+ 'transcription': 0.0,
993
+ 'content_analysis': 0.0,
994
+ 'recommendations': 0.0,
995
+ 'total': 0.0
996
+ }
997
+
998
+ def estimate_tokens(self, text: str) -> int:
999
+ """Rough estimation of token count based on words"""
1000
+ return len(text.split()) * 1.3 # Approximate tokens per word
1001
+
1002
+ def add_transcription_cost(self, duration_seconds: float):
1003
+ """Calculate Whisper transcription cost"""
1004
+ cost = (duration_seconds / 60) * self.WHISPER_COST
1005
+ self.costs['transcription'] = cost
1006
+ self.costs['total'] += cost
1007
+ print(f"\nTranscription Cost: ${cost:.4f}")
1008
+
1009
+ def add_gpt4_cost(self, input_text: str, output_text: str, operation: str):
1010
+ """Calculate GPT-4 API cost for a single operation"""
1011
+ input_tokens = self.estimate_tokens(input_text)
1012
+ output_tokens = self.estimate_tokens(output_text)
1013
+
1014
+ input_cost = input_tokens * self.GPT4_INPUT_COST
1015
+ output_cost = output_tokens * self.GPT4_OUTPUT_COST
1016
+ total_cost = input_cost + output_cost
1017
+
1018
+ self.costs[operation] = total_cost
1019
+ self.costs['total'] += total_cost
1020
+
1021
+ print(f"\n{operation.replace('_', ' ').title()} Cost:")
1022
+ print(f"Input tokens: {input_tokens:.0f} (${input_cost:.4f})")
1023
+ print(f"Output tokens: {output_tokens:.0f} (${output_cost:.4f})")
1024
+ print(f"Operation total: ${total_cost:.4f}")
1025
+
1026
+ def print_total_cost(self):
1027
+ """Print total cost breakdown"""
1028
+ print("\n=== Cost Breakdown ===")
1029
+ for key, cost in self.costs.items():
1030
+ if key != 'total':
1031
+ print(f"{key.replace('_', ' ').title()}: ${cost:.4f}")
1032
+ print(f"\nTotal Cost: ${self.costs['total']:.4f}")
1033
+
1034
+ class MentorEvaluator:
1035
+ """Main class for video evaluation"""
1036
+ def __init__(self, model_cache_dir: Optional[str] = None):
1037
+ # Fix potential API key issue
1038
+ self.api_key = st.secrets.get("OPENAI_API_KEY") # Use get() method
1039
+ if not self.api_key:
1040
+ raise ValueError("OpenAI API key not found in secrets")
1041
+
1042
+ # Add error handling for model cache directory
1043
+ try:
1044
+ if model_cache_dir:
1045
+ self.model_cache_dir = Path(model_cache_dir)
1046
+ else:
1047
+ self.model_cache_dir = Path.home() / ".cache" / "whisper"
1048
+ self.model_cache_dir.mkdir(parents=True, exist_ok=True)
1049
+ except Exception as e:
1050
+ raise RuntimeError(f"Failed to create model cache directory: {e}")
1051
+
1052
+ # Initialize components with proper error handling
1053
+ try:
1054
+ self.feature_extractor = AudioFeatureExtractor()
1055
+ self.content_analyzer = ContentAnalyzer(self.api_key)
1056
+ self.recommendation_generator = RecommendationGenerator(self.api_key)
1057
+ self.cost_calculator = CostCalculator()
1058
+ except Exception as e:
1059
+ raise RuntimeError(f"Failed to initialize components: {e}")
1060
+
1061
+ def _get_cached_result(self, key: str) -> Optional[Any]:
1062
+ """Get cached result if available and not expired"""
1063
+ if key in self._cache:
1064
+ timestamp, value = self._cache[key]
1065
+ if time.time() - timestamp < self.cache_ttl:
1066
+ return value
1067
+ return None
1068
+
1069
+ def _set_cached_result(self, key: str, value: Any):
1070
+ """Cache result with timestamp"""
1071
+ self._cache[key] = (time.time(), value)
1072
+
1073
+ def _extract_audio(self, video_path: str, output_path: str, progress_callback=None) -> str:
1074
+ """Extract audio from video with optimized settings"""
1075
+ try:
1076
+ if progress_callback:
1077
+ progress_callback(0.1, "Checking dependencies...")
1078
+
1079
+ # Add optimized ffmpeg settings
1080
+ ffmpeg_cmd = [
1081
+ 'ffmpeg',
1082
+ '-i', video_path,
1083
+ '-ar', '16000', # Set sample rate to 16kHz
1084
+ '-ac', '1', # Convert to mono
1085
+ '-f', 'wav', # Output format
1086
+ '-v', 'warning', # Reduce verbosity
1087
+ '-y', # Overwrite output file
1088
+ # Add these optimizations:
1089
+ '-c:a', 'pcm_s16le', # Use simple audio codec
1090
+ '-movflags', 'faststart', # Optimize for streaming
1091
+ '-threads', str(max(1, multiprocessing.cpu_count() - 1)), # Use multiple threads
1092
+ output_path
1093
+ ]
1094
+
1095
+ # Use subprocess with optimized buffer size
1096
+ result = subprocess.run(
1097
+ ffmpeg_cmd,
1098
+ capture_output=True,
1099
+ text=True,
1100
+ bufsize=10*1024*1024 # 10MB buffer
1101
+ )
1102
+
1103
+ if result.returncode != 0:
1104
+ raise AudioProcessingError(f"FFmpeg Error: {result.stderr}")
1105
+
1106
+ if not os.path.exists(output_path):
1107
+ raise AudioProcessingError("Audio extraction failed: output file not created")
1108
+
1109
+ if progress_callback:
1110
+ progress_callback(1.0, "Audio extraction complete!")
1111
+
1112
+ return output_path
1113
+
1114
+ except Exception as e:
1115
+ logger.error(f"Error in audio extraction: {e}")
1116
+ raise AudioProcessingError(f"Audio extraction failed: {str(e)}")
1117
+
1118
+ def _preprocess_audio(self, input_path: str, output_path: Optional[str] = None) -> str:
1119
+ """Preprocess audio for analysis"""
1120
+ try:
1121
+ if not os.path.exists(input_path):
1122
+ raise FileNotFoundError(f"Input audio file not found: {input_path}")
1123
+
1124
+ # If no output path specified, use the input path
1125
+ if output_path is None:
1126
+ output_path = input_path
1127
+
1128
+ # Load audio
1129
+ audio, sr = librosa.load(input_path, sr=16000)
1130
+
1131
+ # Apply preprocessing steps
1132
+ # 1. Normalize audio
1133
+ audio = librosa.util.normalize(audio)
1134
+
1135
+ # 2. Remove silence
1136
+ non_silent = librosa.effects.trim(audio, top_db=20)[0]
1137
+
1138
+ # 3. Save processed audio
1139
+ sf.write(output_path, non_silent, sr)
1140
+
1141
+ return output_path
1142
+
1143
+ except Exception as e:
1144
+ logger.error(f"Error in audio preprocessing: {e}")
1145
+ raise AudioProcessingError(f"Audio preprocessing failed: {str(e)}")
1146
+
1147
+ def evaluate_video(self, video_path: str, transcript_file: Optional[str] = None) -> Dict[str, Any]:
1148
+ try:
1149
+ # Add input validation
1150
+ if not os.path.exists(video_path):
1151
+ raise FileNotFoundError(f"Video file not found: {video_path}")
1152
+
1153
+ # Validate video file format
1154
+ valid_extensions = {'.mp4', '.avi', '.mov'}
1155
+ if not any(video_path.lower().endswith(ext) for ext in valid_extensions):
1156
+ raise ValueError("Unsupported video format. Use MP4, AVI, or MOV")
1157
+
1158
+ # Create progress tracking containers with error handling
1159
+ try:
1160
+ status = st.empty()
1161
+ progress = st.progress(0)
1162
+ tracker = ProgressTracker(status, progress)
1163
+ except Exception as e:
1164
+ logger.error(f"Failed to create progress trackers: {e}")
1165
+ raise
1166
+
1167
+ # Add cleanup for temporary files
1168
+ temp_files = []
1169
+ try:
1170
+ with temporary_file(suffix=".wav") as temp_audio, \
1171
+ temporary_file(suffix=".wav") as processed_audio:
1172
+ temp_files.extend([temp_audio, processed_audio])
1173
+
1174
+ # Step 1: Extract audio from video
1175
+ tracker.update(0.1, "Extracting audio from video")
1176
+ self._extract_audio(video_path, temp_audio)
1177
+ tracker.next_step()
1178
+
1179
+ # Step 2: Preprocess audio
1180
+ tracker.update(0.2, "Preprocessing audio")
1181
+ self._preprocess_audio(temp_audio, processed_audio)
1182
+ tracker.next_step()
1183
+
1184
+ # Step 3: Extract features
1185
+ tracker.update(0.4, "Extracting audio features")
1186
+ audio_features = self.feature_extractor.extract_features(processed_audio)
1187
+ tracker.next_step()
1188
+
1189
+ # Step 4: Get transcript - Modified to handle 3-argument progress callback
1190
+ tracker.update(0.6, "Processing transcript")
1191
+ if transcript_file:
1192
+ transcript = transcript_file.getvalue().decode('utf-8')
1193
+ else:
1194
+ # Update progress callback to handle 3 arguments
1195
+ tracker.update(0.6, "Transcribing audio")
1196
+ transcript = self._transcribe_audio(
1197
+ processed_audio,
1198
+ lambda p, m, extra=None: tracker.update(0.6 + p * 0.2, m)
1199
+ )
1200
+ tracker.next_step()
1201
+
1202
+ # Step 5: Analyze content
1203
+ tracker.update(0.8, "Analyzing teaching content")
1204
+ content_analysis = self.content_analyzer.analyze_content(transcript)
1205
+
1206
+ # Step 6: Generate recommendations
1207
+ tracker.update(0.9, "Generating recommendations")
1208
+ recommendations = self.recommendation_generator.generate_recommendations(
1209
+ audio_features,
1210
+ content_analysis
1211
+ )
1212
+ tracker.next_step()
1213
+
1214
+ # Add speech metrics evaluation
1215
+ speech_metrics = self._evaluate_speech_metrics(transcript, audio_features)
1216
+
1217
+ # Clear progress indicators
1218
+ status.empty()
1219
+ progress.empty()
1220
+
1221
+ return {
1222
+ "audio_features": audio_features,
1223
+ "transcript": transcript,
1224
+ "teaching": content_analysis,
1225
+ "recommendations": recommendations,
1226
+ "speech_metrics": speech_metrics
1227
+ }
1228
+
1229
+ finally:
1230
+ # Clean up any remaining temporary files
1231
+ for temp_file in temp_files:
1232
+ try:
1233
+ if os.path.exists(temp_file):
1234
+ os.remove(temp_file)
1235
+ except Exception as e:
1236
+ logger.warning(f"Failed to remove temporary file {temp_file}: {e}")
1237
+
1238
+ except Exception as e:
1239
+ logger.error(f"Error in video evaluation: {e}")
1240
+ # Clean up UI elements on error
1241
+ if 'status' in locals():
1242
+ status.empty()
1243
+ if 'progress' in locals():
1244
+ progress.empty()
1245
+ raise RuntimeError(f"Analysis failed: {str(e)}")
1246
+
1247
+ def _transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
1248
+ """Transcribe audio using Whisper with direct approach and timing"""
1249
+ try:
1250
+ if progress_callback:
1251
+ progress_callback(0.1, "Loading transcription model...")
1252
+
1253
+ # Generate cache key based on file content
1254
+ cache_key = f"transcript_{hashlib.md5(open(audio_path, 'rb').read()).hexdigest()}"
1255
+
1256
+ # Check cache first
1257
+ if cache_key in st.session_state:
1258
+ logger.info("Using cached transcription")
1259
+ if progress_callback:
1260
+ progress_callback(1.0, "Retrieved from cache")
1261
+ return st.session_state[cache_key]
1262
+
1263
+ # Add validation for audio file
1264
+ if not os.path.exists(audio_path):
1265
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
1266
+
1267
+ if progress_callback:
1268
+ progress_callback(0.2, "Initializing model...")
1269
+
1270
+ # Start timing
1271
+ start_time = time.time()
1272
+
1273
+ try:
1274
+ # Load and transcribe with Whisper
1275
+ model = whisper.load_model("medium")
1276
+ result = model.transcribe(audio_path)
1277
+ transcript = result["text"]
1278
+
1279
+ # Calculate elapsed time
1280
+ end_time = time.time()
1281
+ elapsed_time = end_time - start_time
1282
+ logger.info(f"Transcription completed in {elapsed_time:.2f} seconds")
1283
+
1284
+ if progress_callback:
1285
+ progress_callback(0.9, f"Transcription completed in {elapsed_time:.2f} seconds")
1286
+
1287
+ # Validate transcript
1288
+ if not transcript.strip():
1289
+ raise ValueError("Transcription produced empty result")
1290
+
1291
+ # Cache the result
1292
+ st.session_state[cache_key] = transcript
1293
+
1294
+ if progress_callback:
1295
+ progress_callback(1.0, "Transcription complete!")
1296
+
1297
+ return transcript
1298
+
1299
+ except Exception as e:
1300
+ logger.error(f"Error during transcription: {e}")
1301
+ raise RuntimeError(f"Transcription failed: {str(e)}")
1302
+
1303
+ except Exception as e:
1304
+ logger.error(f"Error in transcription: {e}")
1305
+ if progress_callback:
1306
+ progress_callback(1.0, "Error in transcription", str(e))
1307
+ raise
1308
+
1309
+ def _merge_transcripts(self, transcripts: List[str]) -> str:
1310
+ """Merge transcripts with overlap deduplication"""
1311
+ if not transcripts:
1312
+ return ""
1313
+
1314
+ def clean_text(text):
1315
+ # Remove extra spaces and normalize punctuation
1316
+ return ' '.join(text.split())
1317
+
1318
+ def find_overlap(text1, text2):
1319
+ # Find overlapping text between consecutive chunks
1320
+ words1 = text1.split()
1321
+ words2 = text2.split()
1322
+
1323
+ for i in range(min(len(words1), 20), 0, -1): # Check up to 20 words
1324
+ if ' '.join(words1[-i:]) == ' '.join(words2[:i]):
1325
+ return i
1326
+ return 0
1327
+
1328
+ merged = clean_text(transcripts[0])
1329
+
1330
+ for i in range(1, len(transcripts)):
1331
+ current = clean_text(transcripts[i])
1332
+ overlap_size = find_overlap(merged, current)
1333
+ merged += ' ' + current.split(' ', overlap_size)[-1]
1334
+
1335
+ return merged
1336
+
1337
+ def calculate_speech_metrics(self, transcript: str, audio_duration: float) -> Dict[str, float]:
1338
+ """Calculate words per minute and other speech metrics."""
1339
+ words = len(transcript.split())
1340
+ minutes = audio_duration / 60
1341
+ return {
1342
+ 'words_per_minute': words / minutes if minutes > 0 else 0,
1343
+ 'total_words': words,
1344
+ 'duration_minutes': minutes
1345
+ }
1346
+
1347
+ def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
1348
+ progress_callback=None) -> Dict[str, Any]:
1349
+ """Evaluate speech metrics with improved accuracy"""
1350
+ try:
1351
+ if progress_callback:
1352
+ progress_callback(0.2, "Calculating speech metrics...")
1353
+
1354
+ # Calculate words and duration
1355
+ words = len(transcript.split())
1356
+ duration_minutes = float(audio_features.get('duration', 0)) / 60
1357
+
1358
+ # Calculate words per minute with updated range (130-160 WPM is ideal for teaching)
1359
+ words_per_minute = float(words / duration_minutes if duration_minutes > 0 else 0)
1360
+
1361
+ # Improved filler word detection (2-3 per minute is acceptable)
1362
+ filler_words = re.findall(r'\b(um|uh|like|you\s+know|basically|actually|literally)\b',
1363
+ transcript.lower())
1364
+ fillers_count = len(filler_words)
1365
+ fillers_per_minute = float(fillers_count / duration_minutes if duration_minutes > 0 else 0)
1366
+
1367
+ # Improved error detection (1-2 per minute is acceptable)
1368
+ repeated_words = len(re.findall(r'\b(\w+)\s+\1\b', transcript.lower()))
1369
+ incomplete_sentences = len(re.findall(r'[a-zA-Z]+\s*\.\.\.|\b[a-zA-Z]+\s*-\s+', transcript))
1370
+ errors_count = repeated_words + incomplete_sentences
1371
+ errors_per_minute = float(errors_count / duration_minutes if duration_minutes > 0 else 0)
1372
+
1373
+ # Set default thresholds if analysis fails
1374
+ max_errors = 1.0
1375
+ max_fillers = 3.0
1376
+ threshold_explanation = "Using standard thresholds"
1377
+ grammatical_errors = []
1378
+
1379
+ # Calculate fluency score based on both errors and fillers
1380
+ fluency_score = 1 if (errors_per_minute <= max_errors and fillers_per_minute <= max_fillers) else 0
1381
+
1382
+ return {
1383
+ "speed": {
1384
+ "score": 1 if 120 <= words_per_minute <= 180 else 0,
1385
+ "wpm": words_per_minute,
1386
+ "total_words": words,
1387
+ "duration_minutes": duration_minutes
1388
+ },
1389
+ "fluency": {
1390
+ "score": fluency_score, # Add explicit fluency score
1391
+ "errorsPerMin": errors_per_minute,
1392
+ "fillersPerMin": fillers_per_minute,
1393
+ "maxErrorsThreshold": max_errors,
1394
+ "maxFillersThreshold": max_fillers,
1395
+ "thresholdExplanation": threshold_explanation,
1396
+ "detectedErrors": [
1397
+ {
1398
+ "type": "Grammar",
1399
+ "context": error,
1400
+ } for error in grammatical_errors
1401
+ ],
1402
+ "detectedFillers": filler_words
1403
+ },
1404
+ "flow": {
1405
+ "score": 1 if audio_features.get("pauses_per_minute", 0) <= 12 else 0,
1406
+ "pausesPerMin": audio_features.get("pauses_per_minute", 0)
1407
+ },
1408
+ "intonation": {
1409
+ "pitch": audio_features.get("pitch_mean", 0),
1410
+ "pitchScore": 1 if 20 <= (audio_features.get("pitch_std", 0) / audio_features.get("pitch_mean", 0) * 100 if audio_features.get("pitch_mean", 0) > 0 else 0) <= 40 else 0,
1411
+ "pitchVariation": audio_features.get("pitch_std", 0),
1412
+ "patternScore": 1 if audio_features.get("variations_per_minute", 0) >= 120 else 0,
1413
+ "risingPatterns": audio_features.get("rising_patterns", 0),
1414
+ "fallingPatterns": audio_features.get("falling_patterns", 0),
1415
+ "variationsPerMin": audio_features.get("variations_per_minute", 0),
1416
+ "mu": audio_features.get("pitch_mean", 0)
1417
+ },
1418
+ "energy": {
1419
+ "score": 1 if 60 <= audio_features.get("mean_amplitude", 0) <= 75 else 0,
1420
+ "meanAmplitude": audio_features.get("mean_amplitude", 0),
1421
+ "amplitudeDeviation": audio_features.get("amplitude_deviation", 0),
1422
+ "variationScore": 1 if 0.05 <= audio_features.get("amplitude_deviation", 0) <= 0.15 else 0
1423
+ }
1424
+ }
1425
+
1426
+ except Exception as e:
1427
+ logger.error(f"Error in speech metrics evaluation: {e}")
1428
+ raise
1429
+
1430
  def validate_video_file(file_path: str):
1431
  """Validate video file before processing"""
1432
  MAX_SIZE = 1024 * 1024 * 1024 # 500MB limit
 
1700
  if "summary" in recommendations:
1701
  st.markdown("""
1702
  <div class="summary-card">
1703
+ <h4>📊 Overall Summary</h4>
1704
  <div class="summary-content">
1705
+ """, unsafe_allow_html=True)
1706
+ st.markdown(recommendations["summary"])
1707
+ st.markdown("</div></div>", unsafe_allow_html=True)
 
1708
 
1709
+ # Display improvements using categories from content analysis
1710
  st.markdown("<h4>💡 Areas for Improvement</h4>", unsafe_allow_html=True)
1711
  improvements = recommendations.get("improvements", [])
1712
 
1713
+ if isinstance(improvements, list):
1714
+ # Use predefined categories
1715
+ categories = {
1716
+ "🗣️ Communication": [],
1717
+ "📚 Teaching": [],
1718
+ "💻 Technical": []
1719
+ }
1720
+
1721
+ # Each improvement should now come with a category from the content analysis
1722
+ for improvement in improvements:
1723
+ if isinstance(improvement, dict):
1724
+ category = improvement.get("category", "💻 Technical") # Default to Technical if no category
1725
+ message = improvement.get("message", str(improvement))
1726
+ if "COMMUNICATION" in category.upper():
1727
+ categories["🗣️ Communication"].append(message)
1728
+ elif "TEACHING" in category.upper():
1729
+ categories["📚 Teaching"].append(message)
1730
+ elif "TECHNICAL" in category.upper():
1731
+ categories["💻 Technical"].append(message)
1732
+ else:
1733
+ # Handle legacy format or plain strings
1734
+ categories["💻 Technical"].append(improvement)
1735
+
1736
+ # Display categorized improvements in columns
1737
+ cols = st.columns(len(categories))
1738
+ for col, (category, items) in zip(cols, categories.items()):
1739
+ with col:
1740
+ st.markdown(f"""
1741
+ <div class="improvement-card">
1742
+ <h5>{category}</h5>
1743
+ <div class="improvement-list">
 
 
 
1744
  """, unsafe_allow_html=True)
1745
+
 
1746
  for item in items:
1747
  st.markdown(f"""
1748
  <div class="improvement-item">
1749
  • {item}
1750
  </div>
1751
  """, unsafe_allow_html=True)
1752
+
1753
+ st.markdown("</div></div>", unsafe_allow_html=True)
 
 
 
 
 
 
1754
 
1755
+ # Add additional CSS for new components
1756
  st.markdown("""
1757
  <style>
1758
+ .teaching-card {
1759
+ background: white;
1760
  border-radius: 8px;
1761
  padding: 20px;
1762
+ margin: 10px 0;
 
1763
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1764
  }
1765
 
1766
+ .teaching-header {
1767
+ display: flex;
1768
+ justify-content: space-between;
1769
+ align-items: center;
1770
  margin-bottom: 15px;
1771
  }
1772
 
1773
+ .category-name {
1774
+ font-size: 1.2em;
1775
+ font-weight: bold;
1776
+ color: #1f77b4;
1777
+ }
1778
+
1779
+ .score-badge {
1780
+ padding: 5px 15px;
1781
+ border-radius: 15px;
1782
+ font-weight: bold;
1783
+ }
1784
+
1785
+ .score-pass {
1786
+ background-color: #28a745;
1787
+ color: white;
1788
+ }
1789
+
1790
+ .score-fail {
1791
+ background-color: #dc3545;
1792
+ color: white;
1793
+ }
1794
+
1795
+ .citations-container {
1796
+ margin-top: 10px;
1797
+ }
1798
+
1799
+ .citation-box {
1800
+ background: #f8f9fa;
1801
+ border-left: 3px solid #6c757d;
1802
+ padding: 10px;
1803
+ margin: 5px 0;
1804
+ border-radius: 0 4px 4px 0;
1805
+ }
1806
+
1807
+ .citation-text {
1808
  color: #495057;
1809
+ }
1810
+
1811
+ .summary-card {
1812
+ background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%);
1813
+ border-radius: 8px;
1814
+ padding: 20px;
1815
+ margin: 15px 0;
1816
+ border-left: 4px solid #1f77b4;
1817
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
1818
  }
1819
 
1820
  .improvement-card {
 
1824
  margin: 10px 0;
1825
  height: 100%;
1826
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
1827
  }
1828
 
1829
  .improvement-card h5 {
1830
  color: #1f77b4;
1831
+ margin-bottom: 10px;
 
1832
  border-bottom: 2px solid #f0f0f0;
1833
+ padding-bottom: 5px;
1834
  }
1835
 
1836
  .improvement-list {
 
1838
  }
1839
 
1840
  .improvement-item {
1841
+ padding: 5px 0;
1842
+ border-bottom: 1px solid #f0f0f0;
 
 
 
 
1843
  }
1844
 
1845
+ .improvement-item:last-child {
1846
+ border-bottom: none;
 
 
 
 
 
 
1847
  }
1848
  </style>
1849
  """, unsafe_allow_html=True)
 
2629
  """, unsafe_allow_html=True)
2630
 
2631
  evaluator = MentorEvaluator()
 
 
 
 
 
 
2632
  st.session_state.evaluation_results = evaluator.evaluate_video(
2633
  video_path,
2634
+ uploaded_transcript if input_type == "Video + Manual Transcript" else None
2635
  )
2636
  st.session_state.processing_complete = True
2637
 
 
2676
  except Exception as e:
2677
  st.error(f"Application error: {str(e)}")
2678
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2679
  if __name__ == "__main__":
2680
  main()