Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

ProjectEcho / app.py

jmisak

Upload 7 files

fd77f04 verified 6 months ago

raw

history blame

25 kB

	"""
	ConversAI - AI-Powered Qualitative Research Assistant
	Production-grade survey generation, translation, and analysis platform
	"""
	import gradio as gr
	import json
	import os
	import traceback
	from typing import Dict, List, Optional

	from llm_backend import LLMBackend, LLMProvider
	from survey_generator import SurveyGenerator
	from survey_translator import SurveyTranslator
	from data_analyzer import DataAnalyzer
	from export_utils import save_json_file, survey_to_csv, analysis_to_markdown_file


	# Global state for current survey
	current_survey = None
	current_responses = []


	def initialize_backend():
	"""Initialize LLM backend based on environment"""
	try:
	# Debug: Print all environment variables related to LLM
	print("=== LLM Backend Initialization ===")
	print(f"HF_TOKEN: {'SET' if os.getenv('HF_TOKEN') else 'NOT SET'}")
	print(f"HUGGINGFACE_API_KEY: {'SET' if os.getenv('HUGGINGFACE_API_KEY') else 'NOT SET'}")
	print(f"OPENAI_API_KEY: {'SET' if os.getenv('OPENAI_API_KEY') else 'NOT SET'}")
	print(f"ANTHROPIC_API_KEY: {'SET' if os.getenv('ANTHROPIC_API_KEY') else 'NOT SET'}")
	print(f"LLM_PROVIDER: {os.getenv('LLM_PROVIDER', 'NOT SET')}")

	# Check for explicit provider setting
	provider_env = os.getenv("LLM_PROVIDER", "").lower()

	# Priority 1: Explicitly set provider
	if provider_env == "openai" and os.getenv("OPENAI_API_KEY"):
	print("Using OpenAI (explicit)")
	return LLMBackend(provider=LLMProvider.OPENAI)
	elif provider_env == "anthropic" and os.getenv("ANTHROPIC_API_KEY"):
	print("Using Anthropic (explicit)")
	return LLMBackend(provider=LLMProvider.ANTHROPIC)
	elif provider_env == "huggingface" and (os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")):
	api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
	print("Using HuggingFace (explicit)")
	return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
	elif provider_env == "lm_studio":
	print("Using LM Studio (explicit)")
	return LLMBackend(provider=LLMProvider.LM_STUDIO)

	# Priority 2: Auto-detect based on available credentials
	# HF_TOKEN is automatically available in HF Spaces, so check it first
	hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
	if hf_token:
	print(f"Auto-detected HuggingFace credentials, using HF Inference API")
	print(f"Token preview: {hf_token[:10]}...")
	return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=hf_token)
	elif os.getenv("OPENAI_API_KEY"):
	print(f"Auto-detected OpenAI credentials")
	return LLMBackend(provider=LLMProvider.OPENAI)
	elif os.getenv("ANTHROPIC_API_KEY"):
	print(f"Auto-detected Anthropic credentials")
	return LLMBackend(provider=LLMProvider.ANTHROPIC)
	else:
	# No credentials found - return None to show error in UI
	print("="*60)
	print("WARNING: No LLM provider credentials found!")
	print("="*60)
	print("For HuggingFace Spaces:")
	print(" - HF_TOKEN should be automatically available")
	print(" - Make sure your Space is PUBLIC")
	print(" - Or add HUGGINGFACE_API_KEY in Settings")
	print("")
	print("For other providers, set one of:")
	print(" - OPENAI_API_KEY")
	print(" - ANTHROPIC_API_KEY")
	print(" - HUGGINGFACE_API_KEY")
	print("="*60)
	return None

	except Exception as e:
	print(f"Error during backend initialization: {e}")
	import traceback
	traceback.print_exc()
	return None


	# Initialize components
	llm_backend = initialize_backend()

	# Only initialize if backend is available
	if llm_backend:
	survey_gen = SurveyGenerator(llm_backend)
	survey_trans = SurveyTranslator(llm_backend)
	data_analyzer = DataAnalyzer(llm_backend)
	print(f"✓ ConversAI initialized with {llm_backend.provider.value} provider")
	else:
	survey_gen = None
	survey_trans = None
	data_analyzer = None
	print("✗ ConversAI initialization incomplete - no LLM credentials found")


	# ===========================
	# Survey Generation Functions
	# ===========================

	def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
	"""Generate survey from user outline"""
	global current_survey

	# Check if backend is initialized
	if not survey_gen:
	return (
	"❌ LLM backend not configured. Please set up API credentials:\n"
	"- For HuggingFace Spaces: HF_TOKEN is auto-available\n"
	"- For OpenAI: Set OPENAI_API_KEY\n"
	"- For Anthropic: Set ANTHROPIC_API_KEY\n"
	"- For HuggingFace: Set HUGGINGFACE_API_KEY",
	"",
	None
	)

	if not outline or not outline.strip():
	return "❌ Please provide an outline or topic description.", "", None

	# Validate inputs
	if num_questions < 1 or num_questions > 50:
	return "❌ Number of questions must be between 1 and 50.", "", None

	try:
	# Generate survey
	survey_data = survey_gen.generate_survey(
	outline=outline,
	survey_type=survey_type.lower(),
	num_questions=num_questions,
	target_audience=audience
	)

	current_survey = survey_data

	# Format for display
	display_text = format_survey_display(survey_data)

	# Save to file for download
	filepath = save_json_file(survey_data, "survey")

	return (
	f"✅ Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
	display_text,
	filepath
	)

	except Exception as e:
	error_msg = f"❌ Error generating survey: {str(e)}"
	print(f"Survey generation error: {traceback.format_exc()}")
	return error_msg, "", None


	def format_survey_display(survey_data: Dict) -> str:
	"""Format survey data for readable display"""
	output = f"# {survey_data.get('title', 'Survey')}\n\n"
	output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
	output += "## Questions\n\n"

	for i, q in enumerate(survey_data.get('questions', []), 1):
	output += f"{i}. {q.get('question_text', '')}\n"
	output += f" - Type: {q.get('question_type', 'N/A')}\n"

	if q.get('options'):
	output += " - Options:\n"
	for opt in q['options']:
	output += f" - {opt}\n"

	if q.get('help_text'):
	output += f" - Help: {q['help_text']}\n"

	output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"

	output += f"## Closing\n{survey_data.get('closing', '')}\n"

	return output


	# ===========================
	# Translation Functions
	# ===========================

	def translate_current_survey(target_languages: List[str]):
	"""Translate the current survey to selected languages"""
	global current_survey

	# Check if backend is initialized
	if not survey_trans:
	return (
	"❌ LLM backend not configured. Please set up API credentials in Settings.",
	"",
	None
	)

	if not current_survey:
	return "❌ Please generate or upload a survey first.", "", None

	if not target_languages:
	return "❌ Please select at least one target language.", "", None

	try:
	# Translate to all selected languages
	translations = {}
	status_messages = []
	success_count = 0

	for lang_code in target_languages:
	try:
	translated = survey_trans.translate_survey(current_survey, lang_code)
	translations[lang_code] = translated
	lang_name = survey_trans._resolve_language(lang_code)
	status_messages.append(f"✅ Translated to {lang_name}")
	success_count += 1
	except Exception as e:
	lang_name = survey_trans._resolve_language(lang_code)
	status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}")
	print(f"Translation error for {lang_code}: {traceback.format_exc()}")

	if success_count == 0:
	return "❌ All translations failed. Please check your LLM configuration.", "", None

	# Format translations for display
	display_text = ""
	for lang_code, trans_survey in translations.items():
	if "error" not in trans_survey:
	lang_name = survey_trans._resolve_language(lang_code)
	display_text += f"\n{'='*50}\n"
	display_text += f"TRANSLATION: {lang_name.upper()}\n"
	display_text += f"{'='*50}\n\n"
	display_text += format_survey_display(trans_survey)

	# Save to file for download
	filepath = save_json_file(translations, "translations")

	status = "\n".join(status_messages)
	return status, display_text, filepath

	except Exception as e:
	error_msg = f"❌ Error during translation: {str(e)}"
	print(f"Translation error: {traceback.format_exc()}")
	return error_msg, "", None


	def get_language_choices():
	"""Get language choices for dropdown"""
	# Get languages directly from SurveyTranslator class (static list)
	from survey_translator import SurveyTranslator
	langs = SurveyTranslator.SUPPORTED_LANGUAGES
	return [f"{code} - {name}" for code, name in langs.items()]


	# ===========================
	# Data Analysis Functions
	# ===========================

	def analyze_survey_data(responses_json: str, questions_json: str = None):
	"""Analyze survey responses"""
	# Check if backend is initialized
	if not data_analyzer:
	return (
	"❌ LLM backend not configured. Please set up API credentials in Settings.",
	"",
	None
	)

	if not responses_json or not responses_json.strip():
	return "❌ Please provide survey responses in JSON format.", "", None

	try:
	# Parse responses
	responses = json.loads(responses_json)
	questions = json.loads(questions_json) if questions_json and questions_json.strip() else None

	if not isinstance(responses, list):
	return "❌ Responses must be a JSON array.", "", None

	if len(responses) == 0:
	return "❌ No responses to analyze.", "", None

	# Validate questions if provided
	if questions and not isinstance(questions, list):
	return "❌ Questions must be a JSON array.", "", None

	# Run analysis
	analysis_results = data_analyzer.analyze_responses(responses, questions)

	if "error" in analysis_results:
	return f"❌ Analysis error: {analysis_results['error']}", "", None

	# Generate report
	report_md = data_analyzer.generate_report(analysis_results, format="markdown")

	# Save both JSON and Markdown
	json_filepath = save_json_file(analysis_results, "analysis_results")
	md_filepath = analysis_to_markdown_file(report_md, "analysis_report")

	status_msg = f"✅ Analysis complete! Analyzed {len(responses)} responses."
	if questions:
	status_msg += f" Considered {len(questions)} questions."

	return status_msg, report_md, json_filepath

	except json.JSONDecodeError as e:
	return f"❌ Invalid JSON format: {str(e)}", "", None
	except Exception as e:
	error_msg = f"❌ Error during analysis: {str(e)}"
	print(f"Analysis error: {traceback.format_exc()}")
	return error_msg, "", None


	def load_example_responses():
	"""Load example responses for demonstration"""
	example = [
	{
	"q1": "The medication helped reduce my symptoms significantly within the first week.",
	"q2": "I experienced some mild side effects like drowsiness in the beginning.",
	"q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
	},
	{
	"q1": "I didn't notice much improvement in my condition after taking the medication.",
	"q2": "The side effects were quite severe and made it difficult to continue.",
	"q3": "I had to stop taking it after two weeks due to adverse reactions."
	},
	{
	"q1": "The medication worked well but took about 3-4 weeks to show results.",
	"q2": "No major side effects, just some occasional nausea.",
	"q3": "It's been effective for managing my symptoms on a daily basis."
	}
	]
	return json.dumps(example, indent=2)


	# ===========================
	# Gradio Interface
	# ===========================

	def create_interface():
	"""Create the main Gradio interface"""

	with gr.Blocks(
	title="ConversAI - Qualitative Research Assistant",
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
	) as app:

	gr.Markdown("""
	# ConversAI - Your AI-Powered Qualitative Research Assistant

	Battle the blank page, reach global audiences, and uncover insights with AI assistance.
	""")

	# Show backend status
	if llm_backend:
	status_msg = f"✅ Active LLM Provider: {llm_backend.provider.value.upper()} \| Model: {llm_backend.model}"
	bg_color = "rgba(0, 255, 0, 0.1)"
	else:
	status_msg = """⚠️ LLM Provider Not Configured

	To use this app, you need to configure an LLM provider:

	1. Easiest (HuggingFace Spaces): Make sure your Space is PUBLIC and HF_TOKEN will be auto-available
	2. Best Quality: Add `OPENAI_API_KEY` in Space Settings → Variables
	3. Alternative: Add `ANTHROPIC_API_KEY` or `HUGGINGFACE_API_KEY`

	See the About tab for detailed instructions."""
	bg_color = "rgba(255, 165, 0, 0.2)"

	gr.Markdown(f'<div style="background-color: {bg_color}; padding: 15px; border-radius: 5px; margin: 10px 0; border-left: 4px solid #FF6B6B;">{status_msg}</div>')

	with gr.Tabs() as tabs:

	# ========== SURVEY GENERATION TAB ==========
	with gr.Tab("📝 Generate Survey"):
	gr.Markdown("""
	## Battle the Blank Page
	Share an outline and get AI-powered surveys drafted in minutes,
	complete with industry best practices.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	outline_input = gr.Textbox(
	label="Your Survey Outline or Topic",
	placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
	lines=6
	)

	survey_type_input = gr.Radio(
	label="Survey Type",
	choices=["Qualitative", "Quantitative", "Mixed"],
	value="Qualitative"
	)

	num_questions_input = gr.Slider(
	label="Number of Questions",
	minimum=5,
	maximum=25,
	value=10,
	step=1
	)

	audience_input = gr.Textbox(
	label="Target Audience",
	placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
	value="General audience"
	)

	generate_btn = gr.Button("🚀 Generate Survey", variant="primary", size="lg")

	with gr.Column(scale=1):
	gen_status = gr.Textbox(label="Status", interactive=False)
	gen_output = gr.Markdown(label="Generated Survey")

	gen_download = gr.File(label="Download Survey JSON", visible=False)

	# Event handlers
	generate_btn.click(
	fn=generate_survey_from_outline,
	inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
	outputs=[gen_status, gen_output, gen_download]
	).then(
	fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
	inputs=[gen_download],
	outputs=[gen_download]
	)

	# ========== TRANSLATION TAB ==========
	with gr.Tab("🌍 Translate Survey"):
	gr.Markdown("""
	## Reach Global Audiences
	Translate your surveys automatically to streamline efforts and reach wider audiences.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Select Target Languages")

	# Create checkboxes for popular languages
	lang_checkboxes = gr.CheckboxGroup(
	label="Languages",
	choices=get_language_choices(),
	value=[]
	)

	translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")

	gr.Markdown("""
	Note: Make sure you've generated a survey first, or upload one using the JSON format.
	""")

	with gr.Column(scale=1):
	trans_status = gr.Textbox(label="Translation Status", interactive=False)
	trans_output = gr.Markdown(label="Translations")

	trans_download = gr.File(label="Download Translations JSON", visible=False)

	# Event handlers
	def extract_lang_codes(selected_items):
	"""Extract language codes from checkbox selections"""
	return [item.split(" - ")[0] for item in selected_items]

	translate_btn.click(
	fn=lambda x: translate_current_survey(extract_lang_codes(x)),
	inputs=[lang_checkboxes],
	outputs=[trans_status, trans_output, trans_download]
	).then(
	fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
	inputs=[trans_download],
	outputs=[trans_download]
	)

	# ========== ANALYSIS TAB ==========
	with gr.Tab("📊 Analyze Data"):
	gr.Markdown("""
	## Uncover Key Insights
	Upload your survey responses and get AI-assisted summaries of key findings,
	themes, and trends.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	responses_input = gr.Textbox(
	label="Survey Responses (JSON)",
	placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
	lines=10
	)

	questions_input = gr.Textbox(
	label="Questions (JSON, Optional)",
	placeholder='[{"question_text": "What is your experience?", ...}]',
	lines=5
	)

	with gr.Row():
	analyze_btn = gr.Button("🔍 Analyze Data", variant="primary", size="lg")
	example_btn = gr.Button("Load Example", variant="secondary")

	with gr.Column(scale=1):
	analysis_status = gr.Textbox(label="Status", interactive=False)
	analysis_output = gr.Markdown(label="Analysis Report")

	analysis_download = gr.File(label="Download Analysis JSON", visible=False)

	# Event handlers
	analyze_btn.click(
	fn=analyze_survey_data,
	inputs=[responses_input, questions_input],
	outputs=[analysis_status, analysis_output, analysis_download]
	).then(
	fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
	inputs=[analysis_download],
	outputs=[analysis_download]
	)

	example_btn.click(
	fn=load_example_responses,
	outputs=[responses_input]
	)

	# ========== ABOUT TAB ==========
	with gr.Tab("ℹ️ About"):
	gr.Markdown("""
	## About ConversAI

	ConversAI is a comprehensive qualitative research assistant that helps you:

	### 🎯 Generate Surveys
	- Create professional surveys from simple outlines
	- Follow industry best practices automatically
	- Save hours of questionnaire design time

	### 🌍 Translate Globally
	- Reach audiences in 18+ languages
	- Maintain cultural appropriateness
	- Expand your research scope effortlessly

	### 📊 Analyze Results
	- Extract key themes automatically
	- Identify patterns and trends
	- Generate actionable insights

	### 🔧 Configuration Guide

	For HuggingFace Spaces (Recommended):

	No configuration needed! The app automatically uses the HF Inference API with the built-in `HF_TOKEN`.

	Supported Models:
	- Default: `mistralai/Mixtral-8x7B-Instruct-v0.1`
	- You can change by setting `LLM_MODEL` environment variable

	For Other LLM Providers:

	Add these environment variables in your Space Settings:

	1. OpenAI (Best quality, paid):
	- `LLM_PROVIDER=openai`
	- `OPENAI_API_KEY=sk-your-key`

	2. Anthropic Claude (Best reasoning, paid):
	- `LLM_PROVIDER=anthropic`
	- `ANTHROPIC_API_KEY=your-key`

	3. Custom HuggingFace Model:
	- `LLM_PROVIDER=huggingface`
	- `LLM_MODEL=your-model-name`

	💡 Pro Tip: For production use, we recommend OpenAI or Anthropic for faster, more reliable results.

	Supported LLM Providers:
	- HuggingFace Inference API (Free tier available)
	- OpenAI (GPT-4, GPT-4o-mini, GPT-3.5)
	- Anthropic (Claude 3.5 Sonnet, Claude 3 Opus)
	- LM Studio (local development only)

	### 📄 Data Privacy

	- All processing is done through your configured LLM provider
	- No data is stored permanently by this application
	- Survey data and responses remain in your control

	### 🚀 Getting Started

	1. Generate a survey from your research outline
	2. Translate it to reach global audiences
	3. Collect responses from participants
	4. Analyze the data to uncover insights

	---

	Built with ❤️ using Gradio and state-of-the-art LLMs
	""")

	return app


	# ===========================
	# Main Entry Point
	# ===========================

	if __name__ == "__main__":
	demo = create_interface()

	# Launch with appropriate settings
	demo.launch(
	server_name="0.0.0.0", # Allow external access
	server_port=7860, # Standard HF Spaces port
	share=False, # Don't create a public link (HF Spaces handles this)
	show_error=True
	)