Spaces:

sparshmehta
/

main_app

Sleeping

App Files Files Community

sparshmehta commited on Feb 20, 2025

Commit

04fd86b

verified ·

1 Parent(s): 81fe53e

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -76

app.py CHANGED Viewed

@@ -315,20 +315,6 @@ class ContentAnalyzer:
                 if progress_callback:
                     progress_callback(0.2, "Preparing content analysis...")
-                # Extract existing timestamps or generate them
-                timestamps = re.findall(r'\[(\d{2}:\d{2})\]', transcript)
-                if not timestamps:
-                    # Generate timestamps based on word position
-                    words = transcript.split()
-                    words_per_minute = 150  # average speaking rate
-                    marked_transcript = ""
-                    for i, word in enumerate(words):
-                        if i % 150 == 0:  # Add marker every ~1 minute of speech
-                            minutes = i // 150
-                            marked_transcript += f"\n[{minutes:02d}:00] "
-                        marked_transcript += word + " "
-                    transcript = marked_transcript
                 prompt = self._create_analysis_prompt(transcript)
                 if progress_callback:
@@ -336,7 +322,7 @@ class ContentAnalyzer:
                 try:
                     response = self.client.chat.completions.create(
-                        model="gpt-4o-mini",  # Changed from gpt-4o-mini to gpt-4 for better analysis
                         messages=[
                             {"role": "system", "content": """You are a strict teaching evaluator focusing on core teaching competencies.
                              For each assessment point, you MUST include specific timestamps [MM:SS] from the transcript.
@@ -351,7 +337,6 @@ class ContentAnalyzer:
                              Maintain high standards and require clear evidence of quality teaching."""},
                             {"role": "user", "content": prompt}
                         ],
-                        response_format={"type": "json_object"},
                         temperature=0.3
                     )
@@ -364,77 +349,102 @@ class ContentAnalyzer:
                 logger.info(f"Raw API response: {result_text[:500]}...")
                 try:
-                    result = json.loads(result_text)
-                    logger.info("Successfully parsed JSON response")
-                    # Validate the response structure
-                    required_categories = {
-                        "Concept Assessment": [
-                            "Subject Matter Accuracy",
-                            "First Principles Approach",
-                            "Examples and Business Context",
-                            "Cohesive Storytelling",
-                            "Engagement and Interaction",
-                            "Professional Tone"
-                        ],
-                        "Code Assessment": [
-                            "Depth of Explanation",
-                            "Output Interpretation",
-                            "Breaking down Complexity"
-                        ]
                     }
-                    # Check if response has required structure
-                    for category, subcategories in required_categories.items():
                         if category not in result:
-                            logger.error(f"Missing category: {category}")
-                            raise ValueError(f"Response missing required category: {category}")
-                        for subcategory in subcategories:
-                            if subcategory not in result[category]:
-                                logger.error(f"Missing subcategory: {subcategory} in {category}")
-                                raise ValueError(f"Response missing required subcategory: {subcategory}")
-                            subcat_data = result[category][subcategory]
-                            if not isinstance(subcat_data, dict):
-                                logger.error(f"Invalid format for {category}.{subcategory}")
-                                raise ValueError(f"Invalid format for {category}.{subcategory}")
-                            if "Score" not in subcat_data or "Citations" not in subcat_data:
-                                logger.error(f"Missing Score or Citations in {category}.{subcategory}")
-                                raise ValueError(f"Missing Score or Citations in {category}.{subcategory}")
                     return result
-                except json.JSONDecodeError as json_error:
-                    logger.error(f"JSON parsing error: {str(json_error)}")
-                    logger.error(f"Invalid JSON response: {result_text}")
-                    raise
-                except ValueError as val_error:
-                    logger.error(f"Validation error: {str(val_error)}")
-                    raise
             except Exception as e:
                 logger.error(f"Content analysis attempt {attempt + 1} failed: {str(e)}")
                 if attempt == self.retry_count - 1:
-                    logger.error("All attempts failed, returning default structure")
-                    return {
-                        "Concept Assessment": {
-                            "Subject Matter Accuracy": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "First Principles Approach": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Examples and Business Context": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Cohesive Storytelling": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Engagement and Interaction": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Professional Tone": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]}
-                        },
-                        "Code Assessment": {
-                            "Depth of Explanation": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Output Interpretation": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]},
-                            "Breaking down Complexity": {"Score": 0, "Citations": [f"Analysis failed: {str(e)}"]}
-                        }
-                    }
                 time.sleep(self.retry_delay * (2 ** attempt))
     def _create_analysis_prompt(self, transcript: str) -> str:
         """Create the analysis prompt with stricter evaluation criteria"""
         # First try to extract existing timestamps

                 if progress_callback:
                     progress_callback(0.2, "Preparing content analysis...")
                 prompt = self._create_analysis_prompt(transcript)
                 if progress_callback:
                 try:
                     response = self.client.chat.completions.create(
+                        model="gpt-4",  # Changed from gpt-4o-mini to gpt-4 for better analysis
                         messages=[
                             {"role": "system", "content": """You are a strict teaching evaluator focusing on core teaching competencies.
                              For each assessment point, you MUST include specific timestamps [MM:SS] from the transcript.
                              Maintain high standards and require clear evidence of quality teaching."""},
                             {"role": "user", "content": prompt}
                         ],
                         temperature=0.3
                     )
                 logger.info(f"Raw API response: {result_text[:500]}...")
                 try:
+                    # Ensure proper JSON structure even if API returns non-JSON
+                    default_structure = {
+                        "Concept Assessment": {
+                            "Subject Matter Accuracy": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "First Principles Approach": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Examples and Business Context": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Cohesive Storytelling": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Engagement and Interaction": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Professional Tone": {"Score": 0, "Citations": ["No valid assessment available"]}
+                        },
+                        "Code Assessment": {
+                            "Depth of Explanation": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Output Interpretation": {"Score": 0, "Citations": ["No valid assessment available"]},
+                            "Breaking down Complexity": {"Score": 0, "Citations": ["No valid assessment available"]}
+                        }
                     }
+                    # Try to parse the API response
+                    try:
+                        result = json.loads(result_text)
+                    except json.JSONDecodeError:
+                        # If JSON parsing fails, try to extract structured data from text
+                        result = self._extract_structured_data(result_text)
+                    # Merge with default structure to ensure all required fields
+                    for category in default_structure:
                         if category not in result:
+                            result[category] = default_structure[category]
+                        else:
+                            for subcategory in default_structure[category]:
+                                if subcategory not in result[category]:
+                                    result[category][subcategory] = default_structure[category][subcategory]
+                                else:
+                                    # Ensure Score and Citations exist
+                                    if "Score" not in result[category][subcategory]:
+                                        result[category][subcategory]["Score"] = 0
+                                    if "Citations" not in result[category][subcategory]:
+                                        result[category][subcategory]["Citations"] = ["No citations provided"]
                     return result
+                except Exception as parse_error:
+                    logger.error(f"Error parsing response: {parse_error}")
+                    return default_structure
             except Exception as e:
                 logger.error(f"Content analysis attempt {attempt + 1} failed: {str(e)}")
                 if attempt == self.retry_count - 1:
+                    return default_structure
                 time.sleep(self.retry_delay * (2 ** attempt))
+    def _extract_structured_data(self, text: str) -> Dict[str, Any]:
+        """Extract structured data from text response when JSON parsing fails"""
+        default_structure = {
+            "Concept Assessment": {},
+            "Code Assessment": {}
+        }
+        try:
+            # Simple pattern matching to extract scores and citations
+            sections = text.split('\n\n')
+            current_category = None
+            current_subcategory = None
+            for section in sections:
+                if "Concept Assessment" in section:
+                    current_category = "Concept Assessment"
+                elif "Code Assessment" in section:
+                    current_category = "Code Assessment"
+                elif current_category and ':' in section:
+                    title, content = section.split(':', 1)
+                    current_subcategory = title.strip()
+                    # Extract score (assuming 0 or 1 is mentioned)
+                    score = 1 if "pass" in content.lower() or "score: 1" in content.lower() else 0
+                    # Extract citations (assuming they're in [MM:SS] format)
+                    citations = re.findall(r'\[\d{2}:\d{2}\].*?(?=\[|$)', content)
+                    citations = [c.strip() for c in citations if c.strip()]
+                    if not citations:
+                        citations = ["No specific citations found"]
+                    if current_category and current_subcategory:
+                        if current_category not in default_structure:
+                            default_structure[current_category] = {}
+                        default_structure[current_category][current_subcategory] = {
+                            "Score": score,
+                            "Citations": citations
+                        }
+            return default_structure
+        except Exception as e:
+            logger.error(f"Error extracting structured data: {e}")
+            return default_structure
     def _create_analysis_prompt(self, transcript: str) -> str:
         """Create the analysis prompt with stricter evaluation criteria"""
         # First try to extract existing timestamps