Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

jmisak commited on Nov 2, 2025

Commit

8056e83

verified ·

1 Parent(s): db39ccf

Upload 5 files

Browse files

Files changed (5) hide show

conversation_analytics.py +303 -0
conversation_flow.py +5 -0
conversation_moderator.py +6 -0
data_analyzer.py +6 -0
survey_generator.py +6 -0

conversation_analytics.py ADDED Viewed

	@@ -0,0 +1,303 @@

+"""
+Conversation Analytics - Multi-session analysis and insights
+"""
+import json
+import sys
+import os
+from typing import List, Dict, Optional
+from collections import Counter
+from datetime import datetime
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(__file__))
+from conversation_session import ConversationSession
+from llm_backend import LLMBackend
+class ConversationAnalytics:
+    """
+    Analyze multiple conversation sessions to identify patterns,
+    themes, and insights across interviews.
+    """
+    def __init__(self, llm_backend: Optional[LLMBackend] = None):
+        self.llm = llm_backend
+        self.sessions: List[ConversationSession] = []
+    def load_sessions(self, session_data_list: List[Dict]) -> int:
+        """
+        Load multiple sessions from dictionaries.
+        Args:
+            session_data_list: List of session dictionaries
+        Returns:
+            Number of sessions loaded
+        """
+        self.sessions = []
+        for session_data in session_data_list:
+            try:
+                session = ConversationSession.from_dict(session_data)
+                self.sessions.append(session)
+            except Exception as e:
+                print(f"Error loading session: {e}")
+                continue
+        return len(self.sessions)
+    def get_aggregate_stats(self) -> Dict:
+        """Get aggregate statistics across all sessions"""
+        if not self.sessions:
+            return {}
+        total_turns = sum(s.get_turn_count() for s in self.sessions)
+        total_user_turns = sum(len([t for t in s.conversation_history if t.role == "user"])
+                               for s in self.sessions)
+        total_ai_turns = sum(len([t for t in s.conversation_history if t.role == "ai"])
+                             for s in self.sessions)
+        # Calculate response lengths
+        all_user_responses = []
+        for session in self.sessions:
+            all_user_responses.extend([len(t.content) for t in session.conversation_history
+                                       if t.role == "user"])
+        avg_response_length = sum(all_user_responses) / len(all_user_responses) if all_user_responses else 0
+        # Calculate durations
+        all_durations = [s._calculate_duration_minutes() for s in self.sessions]
+        avg_duration = sum(all_durations) / len(all_durations) if all_durations else 0
+        # Status breakdown
+        status_counts = Counter(s.status for s in self.sessions)
+        return {
+            "total_sessions": len(self.sessions),
+            "total_turns": total_turns,
+            "total_user_turns": total_user_turns,
+            "total_ai_turns": total_ai_turns,
+            "avg_turns_per_session": total_turns / len(self.sessions),
+            "avg_response_length": avg_response_length,
+            "avg_duration_minutes": avg_duration,
+            "total_duration_minutes": sum(all_durations),
+            "status_breakdown": dict(status_counts),
+            "completed_sessions": status_counts.get("completed", 0),
+            "active_sessions": status_counts.get("active", 0),
+            "abandoned_sessions": status_counts.get("abandoned", 0)
+        }
+    def extract_all_responses(self) -> List[str]:
+        """Extract all user responses from all sessions"""
+        responses = []
+        for session in self.sessions:
+            for turn in session.conversation_history:
+                if turn.role == "user":
+                    responses.append(turn.content)
+        return responses
+    def identify_common_keywords(self, top_n: int = 20) -> List[tuple]:
+        """
+        Identify most common keywords across all user responses.
+        Args:
+            top_n: Number of top keywords to return
+        Returns:
+            List of (keyword, count) tuples
+        """
+        responses = self.extract_all_responses()
+        # Simple keyword extraction (filter common words)
+        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
+                      'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',
+                      'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
+                      'should', 'may', 'might', 'can', 'it', 'this', 'that', 'these', 'those',
+                      'i', 'you', 'he', 'she', 'we', 'they', 'my', 'your', 'his', 'her', 'our',
+                      'their', 'me', 'him', 'her', 'us', 'them'}
+        all_words = []
+        for response in responses:
+            words = response.lower().split()
+            # Filter out stop words and short words
+            filtered_words = [w.strip('.,!?;:"()[]{}') for w in words
+                            if len(w) > 3 and w.lower() not in stop_words]
+            all_words.extend(filtered_words)
+        word_counts = Counter(all_words)
+        return word_counts.most_common(top_n)
+    def generate_cross_session_insights(self) -> str:
+        """
+        Generate AI-powered insights across all sessions.
+        Returns:
+            Markdown formatted insights report
+        """
+        if not self.llm:
+            return "❌ LLM backend required for cross-session insights"
+        if not self.sessions:
+            return "❌ No sessions to analyze"
+        # Collect all user responses
+        all_responses = self.extract_all_responses()
+        if len(all_responses) < 10:
+            return "❌ Need at least 10 responses across sessions for meaningful analysis"
+        # Sample responses (to avoid token limits)
+        sample_size = min(50, len(all_responses))
+        import random
+        sampled_responses = random.sample(all_responses, sample_size) if len(all_responses) > sample_size else all_responses
+        responses_text = "\n\n".join([f"Response {i+1}: {r}" for i, r in enumerate(sampled_responses)])
+        system_prompt = """You are analyzing multiple qualitative research interview sessions.
+Identify patterns, themes, and insights across all the responses provided. Focus on:
+1. **Common Themes**: What topics come up repeatedly?
+2. **Sentiment Patterns**: Overall sentiment and emotional tone
+3. **Key Insights**: Important discoveries or patterns
+4. **Notable Quotes**: Particularly insightful or representative responses
+5. **Recommendations**: What actions should researchers take based on these findings?
+Provide a comprehensive analysis in a professional report format."""
+        user_prompt = f"""Analyze these {len(sampled_responses)} interview responses from {len(self.sessions)} different sessions:
+{responses_text}
+Generate a comprehensive cross-session analysis report."""
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+        try:
+            insights = self.llm.generate(messages, max_tokens=1000, temperature=0.5)
+            return insights.strip()
+        except Exception as e:
+            return f"❌ Error generating insights: {str(e)}"
+    def generate_comprehensive_report(self) -> str:
+        """
+        Generate a comprehensive markdown report of multi-session analysis.
+        Returns:
+            Markdown formatted report
+        """
+        if not self.sessions:
+            return "# Multi-Session Analysis Report\n\n❌ No sessions loaded for analysis."
+        stats = self.get_aggregate_stats()
+        keywords = self.identify_common_keywords(15)
+        report = f"""# Multi-Session Conversation Analysis Report
+**Generated:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+---
+## 📊 Aggregate Statistics
+**Session Overview:**
+- Total Sessions Analyzed: **{stats['total_sessions']}**
+- Completed Sessions: **{stats['completed_sessions']}**
+- Active Sessions: **{stats['active_sessions']}**
+- Abandoned Sessions: **{stats['abandoned_sessions']}**
+**Conversation Metrics:**
+- Total Conversation Turns: **{stats['total_turns']}**
+- User Responses: **{stats['total_user_turns']}**
+- AI Questions: **{stats['total_ai_turns']}**
+- Average Turns per Session: **{stats['avg_turns_per_session']:.1f}**
+**Quality Indicators:**
+- Average Response Length: **{stats['avg_response_length']:.0f} characters**
+- Average Session Duration: **{stats['avg_duration_minutes']:.1f} minutes**
+- Total Interview Time: **{stats['total_duration_minutes']:.1f} minutes** ({stats['total_duration_minutes']/60:.1f} hours)
+---
+## 🔑 Common Keywords & Topics
+Top keywords mentioned across all sessions:
+"""
+        for i, (keyword, count) in enumerate(keywords, 1):
+            report += f"{i}. **{keyword}** - mentioned {count} times\n"
+        report += "\n---\n\n## 💡 Cross-Session Insights\n\n"
+        if self.llm:
+            report += "*Generating AI-powered insights...*\n\n"
+            insights = self.generate_cross_session_insights()
+            report += insights
+        else:
+            report += "*AI insights unavailable (LLM backend not configured)*\n\n"
+            report += "**Manual Analysis Recommended:**\n"
+            report += "- Review individual session transcripts\n"
+            report += "- Look for patterns in the common keywords above\n"
+            report += "- Compare responses across different respondent demographics\n"
+        report += "\n\n---\n\n## 📋 Session Details\n\n"
+        for i, session in enumerate(self.sessions, 1):
+            stats = session.get_summary_stats()
+            report += f"""### Session {i}: {session.flow_name}
+- **Session ID:** `{session.id}`
+- **Status:** {session.status}
+- **Duration:** {stats['duration_minutes']:.1f} minutes
+- **Turns:** {stats['total_turns']} ({stats['user_turns']} user, {stats['ai_turns']} AI)
+- **Avg Response Length:** {stats['avg_user_response_length']:.0f} characters
+"""
+        report += "\n---\n\n## 🎯 Research Recommendations\n\n"
+        report += f"""Based on analysis of {stats['total_sessions']} sessions:
+1. **Data Quality:** {"✅ Good" if stats['completed_sessions'] / stats['total_sessions'] > 0.8 else "⚠️ Review incomplete sessions"}
+2. **Sample Size:** {"✅ Sufficient" if stats['total_sessions'] >= 10 else "⚠️ Consider conducting more interviews"}
+3. **Engagement:** {"✅ High" if stats['avg_response_length'] > 100 else "⚠️ Consider probing strategies"}
+4. **Duration:** {"✅ Appropriate" if 10 <= stats['avg_duration_minutes'] <= 30 else "⚠️ Review interview length"}
+**Next Steps:**
+- Export this report for team review
+- Identify 2-3 key themes for deep-dive analysis
+- Plan follow-up questions based on insights
+- Consider additional interviews to explore emerging themes
+---
+*This report was generated by Project Echo Multi-Session Analytics*
+"""
+        return report
+    def export_aggregated_data(self) -> Dict:
+        """
+        Export aggregated data in JSON format for further analysis.
+        Returns:
+            Dictionary with all aggregated data
+        """
+        return {
+            "generated_at": datetime.now().isoformat(),
+            "statistics": self.get_aggregate_stats(),
+            "keywords": self.identify_common_keywords(30),
+            "sessions": [
+                {
+                    "id": s.id,
+                    "flow_name": s.flow_name,
+                    "status": s.status,
+                    "started_at": s.started_at,
+                    "ended_at": s.ended_at,
+                    "turn_count": s.get_turn_count(),
+                    "summary_stats": s.get_summary_stats()
+                }
+                for s in self.sessions
+            ]
+        }

conversation_flow.py CHANGED Viewed

@@ -5,6 +5,11 @@ import json
 import uuid
 from typing import Dict, List, Optional
 from datetime import datetime
 class ConversationNode:

 import uuid
 from typing import Dict, List, Optional
 from datetime import datetime
+import sys
+import os
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(__file__))
 class ConversationNode:

conversation_moderator.py CHANGED Viewed

@@ -1,7 +1,13 @@
 """
 Conversation Moderator - AI-powered interview moderator
 """
 from typing import Dict, List, Optional, Tuple
 from llm_backend import LLMBackend
 from conversation_flow import ConversationFlow, ConversationNode
 from conversation_session import ConversationSession

 """
 Conversation Moderator - AI-powered interview moderator
 """
+import sys
+import os
 from typing import Dict, List, Optional, Tuple
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(__file__))
 from llm_backend import LLMBackend
 from conversation_flow import ConversationFlow, ConversationNode
 from conversation_session import ConversationSession

data_analyzer.py CHANGED Viewed

@@ -2,8 +2,14 @@
 Data Analysis Module - AI-assisted analysis of survey responses
 """
 import json
 from typing import Dict, List, Optional
 from collections import Counter
 from llm_backend import LLMBackend

 Data Analysis Module - AI-assisted analysis of survey responses
 """
 import json
+import sys
+import os
 from typing import Dict, List, Optional
 from collections import Counter
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(__file__))
 from llm_backend import LLMBackend

survey_generator.py CHANGED Viewed

@@ -2,7 +2,13 @@
 Survey Generation Module - Generate AI-powered surveys from outlines
 """
 import json
 from typing import List, Dict, Optional
 from llm_backend import LLMBackend

 Survey Generation Module - Generate AI-powered surveys from outlines
 """
 import json
+import sys
+import os
 from typing import List, Dict, Optional
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(__file__))
 from llm_backend import LLMBackend