ProjectEcho / app.py
jmisak's picture
Upload 7 files
fd77f04 verified
raw
history blame
25 kB
"""
ConversAI - AI-Powered Qualitative Research Assistant
Production-grade survey generation, translation, and analysis platform
"""
import gradio as gr
import json
import os
import traceback
from typing import Dict, List, Optional
from llm_backend import LLMBackend, LLMProvider
from survey_generator import SurveyGenerator
from survey_translator import SurveyTranslator
from data_analyzer import DataAnalyzer
from export_utils import save_json_file, survey_to_csv, analysis_to_markdown_file
# Global state for current survey
current_survey = None
current_responses = []
def initialize_backend():
"""Initialize LLM backend based on environment"""
try:
# Debug: Print all environment variables related to LLM
print("=== LLM Backend Initialization ===")
print(f"HF_TOKEN: {'SET' if os.getenv('HF_TOKEN') else 'NOT SET'}")
print(f"HUGGINGFACE_API_KEY: {'SET' if os.getenv('HUGGINGFACE_API_KEY') else 'NOT SET'}")
print(f"OPENAI_API_KEY: {'SET' if os.getenv('OPENAI_API_KEY') else 'NOT SET'}")
print(f"ANTHROPIC_API_KEY: {'SET' if os.getenv('ANTHROPIC_API_KEY') else 'NOT SET'}")
print(f"LLM_PROVIDER: {os.getenv('LLM_PROVIDER', 'NOT SET')}")
# Check for explicit provider setting
provider_env = os.getenv("LLM_PROVIDER", "").lower()
# Priority 1: Explicitly set provider
if provider_env == "openai" and os.getenv("OPENAI_API_KEY"):
print("Using OpenAI (explicit)")
return LLMBackend(provider=LLMProvider.OPENAI)
elif provider_env == "anthropic" and os.getenv("ANTHROPIC_API_KEY"):
print("Using Anthropic (explicit)")
return LLMBackend(provider=LLMProvider.ANTHROPIC)
elif provider_env == "huggingface" and (os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")):
api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
print("Using HuggingFace (explicit)")
return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
elif provider_env == "lm_studio":
print("Using LM Studio (explicit)")
return LLMBackend(provider=LLMProvider.LM_STUDIO)
# Priority 2: Auto-detect based on available credentials
# HF_TOKEN is automatically available in HF Spaces, so check it first
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
if hf_token:
print(f"Auto-detected HuggingFace credentials, using HF Inference API")
print(f"Token preview: {hf_token[:10]}...")
return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=hf_token)
elif os.getenv("OPENAI_API_KEY"):
print(f"Auto-detected OpenAI credentials")
return LLMBackend(provider=LLMProvider.OPENAI)
elif os.getenv("ANTHROPIC_API_KEY"):
print(f"Auto-detected Anthropic credentials")
return LLMBackend(provider=LLMProvider.ANTHROPIC)
else:
# No credentials found - return None to show error in UI
print("="*60)
print("WARNING: No LLM provider credentials found!")
print("="*60)
print("For HuggingFace Spaces:")
print(" - HF_TOKEN should be automatically available")
print(" - Make sure your Space is PUBLIC")
print(" - Or add HUGGINGFACE_API_KEY in Settings")
print("")
print("For other providers, set one of:")
print(" - OPENAI_API_KEY")
print(" - ANTHROPIC_API_KEY")
print(" - HUGGINGFACE_API_KEY")
print("="*60)
return None
except Exception as e:
print(f"Error during backend initialization: {e}")
import traceback
traceback.print_exc()
return None
# Initialize components
llm_backend = initialize_backend()
# Only initialize if backend is available
if llm_backend:
survey_gen = SurveyGenerator(llm_backend)
survey_trans = SurveyTranslator(llm_backend)
data_analyzer = DataAnalyzer(llm_backend)
print(f"βœ“ ConversAI initialized with {llm_backend.provider.value} provider")
else:
survey_gen = None
survey_trans = None
data_analyzer = None
print("βœ— ConversAI initialization incomplete - no LLM credentials found")
# ===========================
# Survey Generation Functions
# ===========================
def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
"""Generate survey from user outline"""
global current_survey
# Check if backend is initialized
if not survey_gen:
return (
"❌ LLM backend not configured. Please set up API credentials:\n"
"- For HuggingFace Spaces: HF_TOKEN is auto-available\n"
"- For OpenAI: Set OPENAI_API_KEY\n"
"- For Anthropic: Set ANTHROPIC_API_KEY\n"
"- For HuggingFace: Set HUGGINGFACE_API_KEY",
"",
None
)
if not outline or not outline.strip():
return "❌ Please provide an outline or topic description.", "", None
# Validate inputs
if num_questions < 1 or num_questions > 50:
return "❌ Number of questions must be between 1 and 50.", "", None
try:
# Generate survey
survey_data = survey_gen.generate_survey(
outline=outline,
survey_type=survey_type.lower(),
num_questions=num_questions,
target_audience=audience
)
current_survey = survey_data
# Format for display
display_text = format_survey_display(survey_data)
# Save to file for download
filepath = save_json_file(survey_data, "survey")
return (
f"βœ… Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
display_text,
filepath
)
except Exception as e:
error_msg = f"❌ Error generating survey: {str(e)}"
print(f"Survey generation error: {traceback.format_exc()}")
return error_msg, "", None
def format_survey_display(survey_data: Dict) -> str:
"""Format survey data for readable display"""
output = f"# {survey_data.get('title', 'Survey')}\n\n"
output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
output += "## Questions\n\n"
for i, q in enumerate(survey_data.get('questions', []), 1):
output += f"**{i}. {q.get('question_text', '')}**\n"
output += f" - Type: {q.get('question_type', 'N/A')}\n"
if q.get('options'):
output += " - Options:\n"
for opt in q['options']:
output += f" - {opt}\n"
if q.get('help_text'):
output += f" - Help: {q['help_text']}\n"
output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"
output += f"## Closing\n{survey_data.get('closing', '')}\n"
return output
# ===========================
# Translation Functions
# ===========================
def translate_current_survey(target_languages: List[str]):
"""Translate the current survey to selected languages"""
global current_survey
# Check if backend is initialized
if not survey_trans:
return (
"❌ LLM backend not configured. Please set up API credentials in Settings.",
"",
None
)
if not current_survey:
return "❌ Please generate or upload a survey first.", "", None
if not target_languages:
return "❌ Please select at least one target language.", "", None
try:
# Translate to all selected languages
translations = {}
status_messages = []
success_count = 0
for lang_code in target_languages:
try:
translated = survey_trans.translate_survey(current_survey, lang_code)
translations[lang_code] = translated
lang_name = survey_trans._resolve_language(lang_code)
status_messages.append(f"βœ… Translated to {lang_name}")
success_count += 1
except Exception as e:
lang_name = survey_trans._resolve_language(lang_code)
status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}")
print(f"Translation error for {lang_code}: {traceback.format_exc()}")
if success_count == 0:
return "❌ All translations failed. Please check your LLM configuration.", "", None
# Format translations for display
display_text = ""
for lang_code, trans_survey in translations.items():
if "error" not in trans_survey:
lang_name = survey_trans._resolve_language(lang_code)
display_text += f"\n{'='*50}\n"
display_text += f"TRANSLATION: {lang_name.upper()}\n"
display_text += f"{'='*50}\n\n"
display_text += format_survey_display(trans_survey)
# Save to file for download
filepath = save_json_file(translations, "translations")
status = "\n".join(status_messages)
return status, display_text, filepath
except Exception as e:
error_msg = f"❌ Error during translation: {str(e)}"
print(f"Translation error: {traceback.format_exc()}")
return error_msg, "", None
def get_language_choices():
"""Get language choices for dropdown"""
# Get languages directly from SurveyTranslator class (static list)
from survey_translator import SurveyTranslator
langs = SurveyTranslator.SUPPORTED_LANGUAGES
return [f"{code} - {name}" for code, name in langs.items()]
# ===========================
# Data Analysis Functions
# ===========================
def analyze_survey_data(responses_json: str, questions_json: str = None):
"""Analyze survey responses"""
# Check if backend is initialized
if not data_analyzer:
return (
"❌ LLM backend not configured. Please set up API credentials in Settings.",
"",
None
)
if not responses_json or not responses_json.strip():
return "❌ Please provide survey responses in JSON format.", "", None
try:
# Parse responses
responses = json.loads(responses_json)
questions = json.loads(questions_json) if questions_json and questions_json.strip() else None
if not isinstance(responses, list):
return "❌ Responses must be a JSON array.", "", None
if len(responses) == 0:
return "❌ No responses to analyze.", "", None
# Validate questions if provided
if questions and not isinstance(questions, list):
return "❌ Questions must be a JSON array.", "", None
# Run analysis
analysis_results = data_analyzer.analyze_responses(responses, questions)
if "error" in analysis_results:
return f"❌ Analysis error: {analysis_results['error']}", "", None
# Generate report
report_md = data_analyzer.generate_report(analysis_results, format="markdown")
# Save both JSON and Markdown
json_filepath = save_json_file(analysis_results, "analysis_results")
md_filepath = analysis_to_markdown_file(report_md, "analysis_report")
status_msg = f"βœ… Analysis complete! Analyzed {len(responses)} responses."
if questions:
status_msg += f" Considered {len(questions)} questions."
return status_msg, report_md, json_filepath
except json.JSONDecodeError as e:
return f"❌ Invalid JSON format: {str(e)}", "", None
except Exception as e:
error_msg = f"❌ Error during analysis: {str(e)}"
print(f"Analysis error: {traceback.format_exc()}")
return error_msg, "", None
def load_example_responses():
"""Load example responses for demonstration"""
example = [
{
"q1": "The medication helped reduce my symptoms significantly within the first week.",
"q2": "I experienced some mild side effects like drowsiness in the beginning.",
"q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
},
{
"q1": "I didn't notice much improvement in my condition after taking the medication.",
"q2": "The side effects were quite severe and made it difficult to continue.",
"q3": "I had to stop taking it after two weeks due to adverse reactions."
},
{
"q1": "The medication worked well but took about 3-4 weeks to show results.",
"q2": "No major side effects, just some occasional nausea.",
"q3": "It's been effective for managing my symptoms on a daily basis."
}
]
return json.dumps(example, indent=2)
# ===========================
# Gradio Interface
# ===========================
def create_interface():
"""Create the main Gradio interface"""
with gr.Blocks(
title="ConversAI - Qualitative Research Assistant",
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
) as app:
gr.Markdown("""
# ConversAI - Your AI-Powered Qualitative Research Assistant
Battle the blank page, reach global audiences, and uncover insights with AI assistance.
""")
# Show backend status
if llm_backend:
status_msg = f"βœ… **Active LLM Provider:** {llm_backend.provider.value.upper()} | Model: {llm_backend.model}"
bg_color = "rgba(0, 255, 0, 0.1)"
else:
status_msg = """⚠️ **LLM Provider Not Configured**
**To use this app, you need to configure an LLM provider:**
1. **Easiest (HuggingFace Spaces):** Make sure your Space is PUBLIC and HF_TOKEN will be auto-available
2. **Best Quality:** Add `OPENAI_API_KEY` in Space Settings β†’ Variables
3. **Alternative:** Add `ANTHROPIC_API_KEY` or `HUGGINGFACE_API_KEY`
See the **About** tab for detailed instructions."""
bg_color = "rgba(255, 165, 0, 0.2)"
gr.Markdown(f'<div style="background-color: {bg_color}; padding: 15px; border-radius: 5px; margin: 10px 0; border-left: 4px solid #FF6B6B;">{status_msg}</div>')
with gr.Tabs() as tabs:
# ========== SURVEY GENERATION TAB ==========
with gr.Tab("πŸ“ Generate Survey"):
gr.Markdown("""
## Battle the Blank Page
Share an outline and get AI-powered surveys drafted in minutes,
complete with industry best practices.
""")
with gr.Row():
with gr.Column(scale=1):
outline_input = gr.Textbox(
label="Your Survey Outline or Topic",
placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
lines=6
)
survey_type_input = gr.Radio(
label="Survey Type",
choices=["Qualitative", "Quantitative", "Mixed"],
value="Qualitative"
)
num_questions_input = gr.Slider(
label="Number of Questions",
minimum=5,
maximum=25,
value=10,
step=1
)
audience_input = gr.Textbox(
label="Target Audience",
placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
value="General audience"
)
generate_btn = gr.Button("πŸš€ Generate Survey", variant="primary", size="lg")
with gr.Column(scale=1):
gen_status = gr.Textbox(label="Status", interactive=False)
gen_output = gr.Markdown(label="Generated Survey")
gen_download = gr.File(label="Download Survey JSON", visible=False)
# Event handlers
generate_btn.click(
fn=generate_survey_from_outline,
inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
outputs=[gen_status, gen_output, gen_download]
).then(
fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
inputs=[gen_download],
outputs=[gen_download]
)
# ========== TRANSLATION TAB ==========
with gr.Tab("🌍 Translate Survey"):
gr.Markdown("""
## Reach Global Audiences
Translate your surveys automatically to streamline efforts and reach wider audiences.
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Select Target Languages")
# Create checkboxes for popular languages
lang_checkboxes = gr.CheckboxGroup(
label="Languages",
choices=get_language_choices(),
value=[]
)
translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")
gr.Markdown("""
**Note:** Make sure you've generated a survey first, or upload one using the JSON format.
""")
with gr.Column(scale=1):
trans_status = gr.Textbox(label="Translation Status", interactive=False)
trans_output = gr.Markdown(label="Translations")
trans_download = gr.File(label="Download Translations JSON", visible=False)
# Event handlers
def extract_lang_codes(selected_items):
"""Extract language codes from checkbox selections"""
return [item.split(" - ")[0] for item in selected_items]
translate_btn.click(
fn=lambda x: translate_current_survey(extract_lang_codes(x)),
inputs=[lang_checkboxes],
outputs=[trans_status, trans_output, trans_download]
).then(
fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
inputs=[trans_download],
outputs=[trans_download]
)
# ========== ANALYSIS TAB ==========
with gr.Tab("πŸ“Š Analyze Data"):
gr.Markdown("""
## Uncover Key Insights
Upload your survey responses and get AI-assisted summaries of key findings,
themes, and trends.
""")
with gr.Row():
with gr.Column(scale=1):
responses_input = gr.Textbox(
label="Survey Responses (JSON)",
placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
lines=10
)
questions_input = gr.Textbox(
label="Questions (JSON, Optional)",
placeholder='[{"question_text": "What is your experience?", ...}]',
lines=5
)
with gr.Row():
analyze_btn = gr.Button("πŸ” Analyze Data", variant="primary", size="lg")
example_btn = gr.Button("Load Example", variant="secondary")
with gr.Column(scale=1):
analysis_status = gr.Textbox(label="Status", interactive=False)
analysis_output = gr.Markdown(label="Analysis Report")
analysis_download = gr.File(label="Download Analysis JSON", visible=False)
# Event handlers
analyze_btn.click(
fn=analyze_survey_data,
inputs=[responses_input, questions_input],
outputs=[analysis_status, analysis_output, analysis_download]
).then(
fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
inputs=[analysis_download],
outputs=[analysis_download]
)
example_btn.click(
fn=load_example_responses,
outputs=[responses_input]
)
# ========== ABOUT TAB ==========
with gr.Tab("ℹ️ About"):
gr.Markdown("""
## About ConversAI
ConversAI is a comprehensive qualitative research assistant that helps you:
### 🎯 Generate Surveys
- Create professional surveys from simple outlines
- Follow industry best practices automatically
- Save hours of questionnaire design time
### 🌍 Translate Globally
- Reach audiences in 18+ languages
- Maintain cultural appropriateness
- Expand your research scope effortlessly
### πŸ“Š Analyze Results
- Extract key themes automatically
- Identify patterns and trends
- Generate actionable insights
### πŸ”§ Configuration Guide
**For HuggingFace Spaces (Recommended):**
No configuration needed! The app automatically uses the HF Inference API with the built-in `HF_TOKEN`.
**Supported Models:**
- Default: `mistralai/Mixtral-8x7B-Instruct-v0.1`
- You can change by setting `LLM_MODEL` environment variable
**For Other LLM Providers:**
Add these environment variables in your Space Settings:
1. **OpenAI** (Best quality, paid):
- `LLM_PROVIDER=openai`
- `OPENAI_API_KEY=sk-your-key`
2. **Anthropic Claude** (Best reasoning, paid):
- `LLM_PROVIDER=anthropic`
- `ANTHROPIC_API_KEY=your-key`
3. **Custom HuggingFace Model**:
- `LLM_PROVIDER=huggingface`
- `LLM_MODEL=your-model-name`
**πŸ’‘ Pro Tip:** For production use, we recommend OpenAI or Anthropic for faster, more reliable results.
**Supported LLM Providers:**
- HuggingFace Inference API (Free tier available)
- OpenAI (GPT-4, GPT-4o-mini, GPT-3.5)
- Anthropic (Claude 3.5 Sonnet, Claude 3 Opus)
- LM Studio (local development only)
### πŸ“„ Data Privacy
- All processing is done through your configured LLM provider
- No data is stored permanently by this application
- Survey data and responses remain in your control
### πŸš€ Getting Started
1. **Generate** a survey from your research outline
2. **Translate** it to reach global audiences
3. Collect responses from participants
4. **Analyze** the data to uncover insights
---
Built with ❀️ using Gradio and state-of-the-art LLMs
""")
return app
# ===========================
# Main Entry Point
# ===========================
if __name__ == "__main__":
demo = create_interface()
# Launch with appropriate settings
demo.launch(
server_name="0.0.0.0", # Allow external access
server_port=7860, # Standard HF Spaces port
share=False, # Don't create a public link (HF Spaces handles this)
show_error=True
)