""" Project Echo - AI-Powered Qualitative Research Assistant Production-grade survey generation, translation, and analysis platform """ import gradio as gr import json import os import traceback from typing import Dict, List, Optional from llm_backend import LLMBackend, LLMProvider from survey_generator import SurveyGenerator from survey_translator import SurveyTranslator from data_analyzer import DataAnalyzer from export_utils import (save_json_file, survey_to_csv, analysis_to_markdown_file, conversation_to_transcript, conversation_to_json, conversation_to_csv, flow_to_markdown) from conversation_flow import ConversationFlow, ConversationNode, create_example_flow from conversation_session import ConversationSession, SessionManager from conversation_moderator import ConversationModerator from conversation_analytics import ConversationAnalytics # Global state for current survey current_survey = None current_responses = [] # Global state for conversational research current_flow = None session_manager = SessionManager() current_session = None saved_flows = {} def initialize_backend(): """Initialize LLM backend based on environment""" try: # Debug: Print all environment variables related to LLM print("=== LLM Backend Initialization ===") print(f"HF_TOKEN: {'SET' if os.getenv('HF_TOKEN') else 'NOT SET'}") print(f"HUGGINGFACE_API_KEY: {'SET' if os.getenv('HUGGINGFACE_API_KEY') else 'NOT SET'}") print(f"OPENAI_API_KEY: {'SET' if os.getenv('OPENAI_API_KEY') else 'NOT SET'}") print(f"ANTHROPIC_API_KEY: {'SET' if os.getenv('ANTHROPIC_API_KEY') else 'NOT SET'}") print(f"LLM_PROVIDER: {os.getenv('LLM_PROVIDER', 'NOT SET')}") # Check for explicit provider setting provider_env = os.getenv("LLM_PROVIDER", "").lower() # Priority 1: Explicitly set provider if provider_env == "openai" and os.getenv("OPENAI_API_KEY"): print("Using OpenAI (explicit)") return LLMBackend(provider=LLMProvider.OPENAI) elif provider_env == "anthropic" and os.getenv("ANTHROPIC_API_KEY"): print("Using Anthropic (explicit)") return LLMBackend(provider=LLMProvider.ANTHROPIC) elif provider_env == "huggingface" and (os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")): api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN") print("Using HuggingFace (explicit)") return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key) elif provider_env == "lm_studio": print("Using LM Studio (explicit)") return LLMBackend(provider=LLMProvider.LM_STUDIO) # Priority 2: Auto-detect based on available credentials # HF_TOKEN is automatically available in HF Spaces, so check it first hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") if hf_token: print(f"Auto-detected HuggingFace credentials, using HF Inference API") print(f"Token preview: {hf_token[:10]}...") return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=hf_token) elif os.getenv("OPENAI_API_KEY"): print(f"Auto-detected OpenAI credentials") return LLMBackend(provider=LLMProvider.OPENAI) elif os.getenv("ANTHROPIC_API_KEY"): print(f"Auto-detected Anthropic credentials") return LLMBackend(provider=LLMProvider.ANTHROPIC) else: # No credentials found - return None to show error in UI print("="*60) print("WARNING: No LLM provider credentials found!") print("="*60) print("For HuggingFace Spaces:") print(" - HF_TOKEN should be automatically available") print(" - Make sure your Space is PUBLIC") print(" - Or add HUGGINGFACE_API_KEY in Settings") print("") print("For other providers, set one of:") print(" - OPENAI_API_KEY") print(" - ANTHROPIC_API_KEY") print(" - HUGGINGFACE_API_KEY") print("="*60) return None except Exception as e: print(f"Error during backend initialization: {e}") import traceback traceback.print_exc() return None # Initialize components llm_backend = initialize_backend() # Only initialize if backend is available if llm_backend: survey_gen = SurveyGenerator(llm_backend) survey_trans = SurveyTranslator(llm_backend) data_analyzer = DataAnalyzer(llm_backend) print(f"✓ Project Echo initialized with {llm_backend.provider.value} provider") else: survey_gen = None survey_trans = None data_analyzer = None print("✗ Project Echo initialization incomplete - no LLM credentials found") # =========================== # Survey Generation Functions # =========================== def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str): """Generate survey from user outline""" global current_survey # Check if backend is initialized if not survey_gen: return ( "❌ LLM backend not configured. Please set up API credentials:\n" "- For HuggingFace Spaces: HF_TOKEN is auto-available\n" "- For OpenAI: Set OPENAI_API_KEY\n" "- For Anthropic: Set ANTHROPIC_API_KEY\n" "- For HuggingFace: Set HUGGINGFACE_API_KEY", "", None ) if not outline or not outline.strip(): return "❌ Please provide an outline or topic description.", "", None # Validate inputs if num_questions < 1 or num_questions > 50: return "❌ Number of questions must be between 1 and 50.", "", None try: # Generate survey survey_data = survey_gen.generate_survey( outline=outline, survey_type=survey_type.lower(), num_questions=num_questions, target_audience=audience ) current_survey = survey_data # Format for display display_text = format_survey_display(survey_data) # Save to file for download filepath = save_json_file(survey_data, "survey") return ( f"✅ Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.", display_text, filepath ) except Exception as e: error_msg = f"❌ Error generating survey: {str(e)}" print(f"Survey generation error: {traceback.format_exc()}") return error_msg, "", None def format_survey_display(survey_data: Dict) -> str: """Format survey data for readable display""" output = f"# {survey_data.get('title', 'Survey')}\n\n" output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n" output += "## Questions\n\n" for i, q in enumerate(survey_data.get('questions', []), 1): output += f"**{i}. {q.get('question_text', '')}**\n" output += f" - Type: {q.get('question_type', 'N/A')}\n" if q.get('options'): output += " - Options:\n" for opt in q['options']: output += f" - {opt}\n" if q.get('help_text'): output += f" - Help: {q['help_text']}\n" output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n" output += f"## Closing\n{survey_data.get('closing', '')}\n" return output # =========================== # Translation Functions # =========================== def translate_current_survey(target_languages: List[str]): """Translate the current survey to selected languages""" global current_survey # Check if backend is initialized if not survey_trans: return ( "❌ LLM backend not configured. Please set up API credentials in Settings.", "", None ) if not current_survey: return "❌ Please generate or upload a survey first.", "", None if not target_languages: return "❌ Please select at least one target language.", "", None try: # Translate to all selected languages translations = {} status_messages = [] success_count = 0 for lang_code in target_languages: try: translated = survey_trans.translate_survey(current_survey, lang_code) translations[lang_code] = translated lang_name = survey_trans._resolve_language(lang_code) status_messages.append(f"✅ Translated to {lang_name}") success_count += 1 except Exception as e: lang_name = survey_trans._resolve_language(lang_code) status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}") print(f"Translation error for {lang_code}: {traceback.format_exc()}") if success_count == 0: return "❌ All translations failed. Please check your LLM configuration.", "", None # Format translations for display display_text = "" for lang_code, trans_survey in translations.items(): if "error" not in trans_survey: lang_name = survey_trans._resolve_language(lang_code) display_text += f"\n{'='*50}\n" display_text += f"TRANSLATION: {lang_name.upper()}\n" display_text += f"{'='*50}\n\n" display_text += format_survey_display(trans_survey) # Save to file for download filepath = save_json_file(translations, "translations") status = "\n".join(status_messages) return status, display_text, filepath except Exception as e: error_msg = f"❌ Error during translation: {str(e)}" print(f"Translation error: {traceback.format_exc()}") return error_msg, "", None def get_language_choices(): """Get language choices for dropdown""" # Get languages directly from SurveyTranslator class (static list) from survey_translator import SurveyTranslator langs = SurveyTranslator.SUPPORTED_LANGUAGES return [f"{code} - {name}" for code, name in langs.items()] # =========================== # Data Analysis Functions # =========================== def analyze_survey_data(responses_json: str, questions_json: str = None): """Analyze survey responses""" # Check if backend is initialized if not data_analyzer: return ( "❌ LLM backend not configured. Please set up API credentials in Settings.", "", None ) if not responses_json or not responses_json.strip(): return "❌ Please provide survey responses in JSON format.", "", None try: # Parse responses responses = json.loads(responses_json) questions = json.loads(questions_json) if questions_json and questions_json.strip() else None if not isinstance(responses, list): return "❌ Responses must be a JSON array.", "", None if len(responses) == 0: return "❌ No responses to analyze.", "", None # Validate questions if provided if questions and not isinstance(questions, list): return "❌ Questions must be a JSON array.", "", None # Run analysis analysis_results = data_analyzer.analyze_responses(responses, questions) if "error" in analysis_results: return f"❌ Analysis error: {analysis_results['error']}", "", None # Generate report report_md = data_analyzer.generate_report(analysis_results, format="markdown") # Save both JSON and Markdown json_filepath = save_json_file(analysis_results, "analysis_results") md_filepath = analysis_to_markdown_file(report_md, "analysis_report") status_msg = f"✅ Analysis complete! Analyzed {len(responses)} responses." if questions: status_msg += f" Considered {len(questions)} questions." return status_msg, report_md, json_filepath except json.JSONDecodeError as e: return f"❌ Invalid JSON format: {str(e)}", "", None except Exception as e: error_msg = f"❌ Error during analysis: {str(e)}" print(f"Analysis error: {traceback.format_exc()}") return error_msg, "", None def load_example_responses(): """Load example responses for demonstration""" example = [ { "q1": "The medication helped reduce my symptoms significantly within the first week.", "q2": "I experienced some mild side effects like drowsiness in the beginning.", "q3": "Overall, I'm satisfied with the treatment and would recommend it to others." }, { "q1": "I didn't notice much improvement in my condition after taking the medication.", "q2": "The side effects were quite severe and made it difficult to continue.", "q3": "I had to stop taking it after two weeks due to adverse reactions." }, { "q1": "The medication worked well but took about 3-4 weeks to show results.", "q2": "No major side effects, just some occasional nausea.", "q3": "It's been effective for managing my symptoms on a daily basis." } ] return json.dumps(example, indent=2) # =========================== # Conversational Research Handlers # =========================== def create_new_flow(flow_name: str, flow_description: str): """Create a new conversation flow with AI-generated initial structure""" global current_flow, saved_flows, llm_backend if not flow_name or not flow_name.strip(): return "❌ Please provide a flow name.", "", None if not flow_description or not flow_description.strip(): return "❌ Please provide a description of what you want to discuss in this flow.", "", None if not llm_backend: return "❌ LLM backend not configured. Cannot generate flow.", "", None try: # Create empty flow flow = ConversationFlow(name=flow_name, description=flow_description) # Generate initial conversation structure using AI success, message = flow.generate_flow_with_ai(llm_backend, num_questions=5) if not success: return f"⚠️ Flow created but generation failed: {message}", display_flow(flow), None current_flow = flow saved_flows[flow.id] = flow status_msg = f"✅ Flow '{flow_name}' created with {len(flow.nodes)} conversation steps!" return ( status_msg, display_flow(flow), flow.id ) except Exception as e: error_msg = f"❌ Error creating flow: {str(e)}" print(f"Flow creation error: {traceback.format_exc()}") return error_msg, "", None def regenerate_flow_content(flow_id: str): """Regenerate the conversation flow nodes using AI""" global saved_flows, current_flow, llm_backend if not flow_id: return "❌ No flow selected.", "" flow = saved_flows.get(flow_id) if not flow: return "❌ Flow not found.", "" if not llm_backend: return "❌ LLM backend not configured.", "" try: # Clear existing nodes flow.nodes = [] # Regenerate with AI success, message = flow.generate_flow_with_ai(llm_backend, num_questions=5) if not success: return f"⚠️ Regeneration failed: {message}", "" current_flow = flow return ( f"✅ Flow regenerated with {len(flow.nodes)} new steps!", display_flow(flow) ) except Exception as e: return f"❌ Error regenerating flow: {str(e)}", "" def load_example_flow(): """Load an example conversation flow""" global current_flow, saved_flows flow = create_example_flow() current_flow = flow saved_flows[flow.id] = flow return ( f"✅ Example flow loaded: {flow.name}", display_flow(flow), flow.id ) def add_flow_node(flow_id: str, node_content: str, node_type: str): """Add a node to the current flow""" global current_flow, saved_flows if not flow_id: return "❌ No flow selected.", "" flow = saved_flows.get(flow_id) if not flow: return "❌ Flow not found.", "" if not node_content or not node_content.strip(): return "❌ Please provide content for the node.", "" try: node = ConversationNode(content=node_content, node_type=node_type.lower()) # Link to previous node if exists if flow.nodes: last_node = flow.nodes[-1] last_node.next = node.id flow.add_node(node) current_flow = flow return ( f"✅ Node added successfully! Total nodes: {len(flow.nodes)}", display_flow(flow) ) except Exception as e: return f"❌ Error adding node: {str(e)}", "" def display_flow(flow: ConversationFlow) -> str: """Display flow as markdown""" if not flow or not flow.nodes: return "No flow to display" output = f"# {flow.name}\n\n" output += f"**Description:** {flow.description}\n\n" output += f"**Total Steps:** {len(flow.nodes)}\n\n" output += "---\n\n" for i, node in enumerate(flow.nodes, 1): output += f"### Step {i}: {node.type.capitalize()}\n\n" output += f"{node.content}\n\n" return output def save_current_flow(flow_id: str): """Save the current flow to file""" if not flow_id: return "❌ No flow selected.", None flow = saved_flows.get(flow_id) if not flow: return "❌ Flow not found.", None try: filepath = save_json_file(flow.to_dict(), "conversation_flow") return f"✅ Flow saved to {filepath}", filepath except Exception as e: return f"❌ Error saving flow: {str(e)}", None def start_conversation_session(flow_id: str): """Start a new conversation session""" global current_session, session_manager if not flow_id: return [], "❌ Please select a flow first." flow = saved_flows.get(flow_id) if not flow: return [], "❌ Flow not found." if not llm_backend: return [], "❌ LLM backend not initialized." try: # Create session session = session_manager.create_session(flow_id=flow.id, flow_name=flow.name) current_session = session # Create moderator moderator = ConversationModerator(llm_backend, flow) # Start conversation opening_message = moderator.start_conversation(session) # Return chat history in Gradio format return [[None, opening_message]], f"✅ Conversation started! Session ID: {session.id}" except Exception as e: return [], f"❌ Error starting conversation: {str(e)}" def chat_with_moderator(user_message: str, history: List): """Handle chat messages with the AI moderator""" global current_session if not current_session: return history, "❌ No active session. Please start a conversation first." if not llm_backend: return history, "❌ LLM backend not initialized." if not user_message or not user_message.strip(): return history, "❌ Please enter a message." try: # Get the flow flow = saved_flows.get(current_session.flow_id) if not flow: return history, "❌ Flow not found." # Create moderator moderator = ConversationModerator(llm_backend, flow) # Process user response ai_response = moderator.process_user_response(current_session, user_message) # Update history history.append([user_message, ai_response]) status = f"Session: {current_session.id} | Turns: {current_session.get_turn_count()}" if current_session.status == "completed": status += " | ✅ Conversation completed" return history, status except Exception as e: return history, f"❌ Error: {str(e)}" def export_conversation(): """Export the current conversation""" global current_session if not current_session: return "❌ No active session to export.", None try: filepath = conversation_to_transcript(current_session) return f"✅ Conversation exported to {filepath}", filepath except Exception as e: return f"❌ Error exporting conversation: {str(e)}", None def generate_conversation_summary(): """Generate AI summary of the current conversation""" global current_session if not current_session: return "❌ No active session. Start a conversation first.", "" if not llm_backend: return "❌ LLM backend not initialized.", "" if current_session.get_turn_count() < 3: return "❌ Not enough conversation data. Have at least 2-3 exchanges first.", "" try: # Get the flow flow = saved_flows.get(current_session.flow_id) if not flow: return "❌ Flow not found.", "" # Create moderator and generate summary moderator = ConversationModerator(llm_backend, flow) summary = moderator.generate_summary(current_session) # Format summary with stats stats = current_session.get_summary_stats() formatted_summary = f"""## Conversation Summary **Session Details:** - Session ID: {current_session.id} - Flow: {current_session.flow_name} - Total Turns: {stats['total_turns']} ({stats['user_turns']} user, {stats['ai_turns']} AI) - Duration: {stats['duration_minutes']} minutes - Status: {stats['status']} --- {summary} --- *Summary generated by AI. Review for accuracy.* """ return "✅ Summary generated successfully!", formatted_summary except Exception as e: return f"❌ Error generating summary: {str(e)}", "" def update_probing_threshold(threshold: int): """Update the probing threshold for follow-up questions""" # This will be used when creating new moderators return f"✅ Probing threshold set to every {threshold} responses" def get_conversation_metrics(): """Get real-time conversation metrics""" global current_session if not current_session: return """**No Active Session** Start a conversation to see metrics.""" stats = current_session.get_summary_stats() user_turns = [t for t in current_session.conversation_history if t.role == "user"] # Calculate follow-up count (AI turns that aren't linked to nodes) follow_ups = len([t for t in current_session.conversation_history if t.role == "ai" and not t.node_id]) scripted = stats['ai_turns'] - follow_ups metrics_md = f"""## 📊 Live Conversation Metrics **Engagement:** - Total Exchanges: {stats['user_turns']} - User Responses: {stats['user_turns']} - AI Questions: {stats['ai_turns']} **Question Mix:** - Scripted Questions: {scripted} - Dynamic Follow-ups: {follow_ups} - Follow-up Rate: {(follow_ups / max(stats['ai_turns'], 1) * 100):.1f}% **Quality Indicators:** - Avg Response Length: {stats['avg_user_response_length']:.0f} characters - Duration: {stats['duration_minutes']} min - Status: {stats['status'].upper()} **Session Info:** - Session ID: `{current_session.id[:8]}...` - Flow: {current_session.flow_name} """ return metrics_md def analyze_multiple_sessions(uploaded_files): """Analyze multiple conversation sessions""" if not uploaded_files: return "❌ Please upload at least one conversation JSON file.", "", None if not llm_backend: return "⚠️ LLM backend not configured. Basic analysis only (no AI insights).", "", None try: # Load session data from uploaded files session_data_list = [] for file in uploaded_files: with open(file.name, 'r') as f: data = json.load(f) session_data_list.append(data) # Create analytics instance analytics = ConversationAnalytics(llm_backend) loaded_count = analytics.load_sessions(session_data_list) if loaded_count == 0: return "❌ No valid sessions found in uploaded files.", "", None # Generate comprehensive report report = analytics.generate_comprehensive_report() # Export aggregated data export_data = analytics.export_aggregated_data() export_file = save_json_file(export_data, "multi_session_analysis") status = f"✅ Successfully analyzed {loaded_count} sessions from {len(uploaded_files)} files" return status, report, export_file except Exception as e: return f"❌ Error analyzing sessions: {str(e)}", "", None # =========================== # Gradio Interface # =========================== def create_interface(): """Create the main Gradio interface""" with gr.Blocks( title="Project Echo - Qualitative Research Assistant", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate") ) as app: gr.Markdown(""" # Project Echo - Your AI-Powered Qualitative Research Assistant Battle the blank page, reach global audiences, and uncover insights with AI assistance. """) # Show backend status if llm_backend: status_msg = f"✅ **Active LLM Provider:** {llm_backend.provider.value.upper()} | Model: {llm_backend.model}" bg_color = "rgba(0, 255, 0, 0.1)" else: status_msg = """⚠️ **LLM Provider Not Configured** **To use this app, you need to configure an LLM provider:** 1. **Easiest (HuggingFace Spaces):** Make sure your Space is PUBLIC and HF_TOKEN will be auto-available 2. **Best Quality:** Add `OPENAI_API_KEY` in Space Settings → Variables 3. **Alternative:** Add `ANTHROPIC_API_KEY` or `HUGGINGFACE_API_KEY` See the **About** tab for detailed instructions.""" bg_color = "rgba(255, 165, 0, 0.2)" gr.Markdown(f'