Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

jmisak commited on Oct 25, 2025

Commit

196c707

verified ·

1 Parent(s): 4072ec3

Upload 23 files

Browse files

Files changed (14) hide show

.env.example +38 -0
.gitattributes +35 -35
.gitignore +58 -0
DEPLOYMENT.md +278 -0
README.md +90 -6
USAGE_GUIDE.md +278 -0
app.py +495 -0
data_analyzer.py +434 -0
export_utils.py +138 -0
llm_backend.py +220 -0
requirements.txt +2 -2
survey_generator.py +224 -0
survey_translator.py +263 -0
test_app.py +130 -0

.env.example ADDED Viewed

	@@ -0,0 +1,38 @@

+# ConversAI Environment Configuration
+# Copy this file to .env and fill in your credentials
+# ===========================
+# LLM Provider Configuration
+# ===========================
+# Specify which provider to use (openai, anthropic, huggingface, lm_studio)
+LLM_PROVIDER=huggingface
+# OpenAI Configuration
+OPENAI_API_KEY=your_openai_api_key_here
+# Optional: Override default model
+# LLM_MODEL=gpt-4o-mini
+# Anthropic Configuration
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+# Optional: Override default model
+# LLM_MODEL=claude-3-5-sonnet-20241022
+# HuggingFace Configuration
+HUGGINGFACE_API_KEY=your_huggingface_api_key_here
+# Optional: Override default model
+# LLM_MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1
+# LM Studio Configuration (for local development)
+LM_STUDIO_URL=http://localhost:1234/v1/chat/completions
+# LLM_MODEL=your_local_model_name
+# ===========================
+# Application Settings
+# ===========================
+# Port for local development (HF Spaces uses 7860 by default)
+PORT=7860
+# Enable debug mode
+DEBUG=false

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,58 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+venv/
+ENV/
+env/
+.venv
+# Environment variables
+.env
+.env.local
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# Gradio
+flagged/
+gradio_cached_examples/
+# Data files
+*.json
+!requirements.json
+conversation_log_*.json
+# Logs
+*.log
+# OS
+.DS_Store
+Thumbs.db
+# Temporary files
+*.tmp
+*.temp

DEPLOYMENT.md ADDED Viewed

	@@ -0,0 +1,278 @@

+# Deployment Guide
+## Deploying to HuggingFace Spaces
+### Prerequisites
+- HuggingFace account
+- API token from your LLM provider (or use HF Inference API)
+### Step-by-Step Deployment
+#### 1. Create a New Space
+1. Go to https://huggingface.co/spaces
+2. Click "Create new Space"
+3. Choose a name (e.g., "conversai-research-assistant")
+4. Select SDK: **Gradio**
+5. Choose visibility (Public or Private)
+6. Click "Create Space"
+#### 2. Upload Files
+Upload these files to your Space:
+**Required Files:**
+- `app.py` - Main application
+- `llm_backend.py` - LLM interface
+- `survey_generator.py` - Survey generation
+- `survey_translator.py` - Translation module
+- `data_analyzer.py` - Analysis module
+- `export_utils.py` - Export utilities
+- `requirements.txt` - Dependencies
+- `README.md` - Space description
+**Optional Files:**
+- `.env.example` - Configuration template
+- `USAGE_GUIDE.md` - User guide
+- `test_app.py` - Testing script
+#### 3. Configure Environment Variables
+In your Space settings, add environment variables:
+**For HuggingFace Inference API (Free Tier):**
+```
+LLM_PROVIDER=huggingface
+# HF_TOKEN is automatically available in Spaces
+```
+**For OpenAI:**
+```
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key-here
+```
+**For Anthropic:**
+```
+LLM_PROVIDER=anthropic
+ANTHROPIC_API_KEY=your-key-here
+```
+#### 4. Space Will Auto-Deploy
+- HuggingFace will automatically build and deploy
+- Check the "Logs" tab for build status
+- First build may take 2-3 minutes
+#### 5. Test Your Deployment
+1. Wait for "Running" status
+2. Open the Space URL
+3. Test survey generation
+4. Test translation
+5. Test analysis with example data
+### Using HuggingFace Inference API
+The easiest option for deployment is to use HuggingFace's free Inference API:
+**Pros:**
+- No API key needed (uses HF_TOKEN automatically)
+- Free tier available
+- Easy setup
+**Cons:**
+- May have rate limits on free tier
+- Slower than paid providers
+- May queue during high usage
+**Configuration:**
+Just set `LLM_PROVIDER=huggingface` in your environment variables.
+### Using Other Providers
+#### OpenAI (Recommended for Production)
+**Pros:**
+- Fast and reliable
+- High quality outputs
+- Good API documentation
+**Cons:**
+- Requires paid API key
+- Usage costs
+**Cost Estimate:**
+- Survey generation: ~$0.01-0.05 per survey
+- Translation: ~$0.01-0.03 per language
+- Analysis: ~$0.05-0.15 per batch
+#### Anthropic Claude
+**Pros:**
+- Excellent for nuanced text
+- Strong reasoning capabilities
+- Good safety features
+**Cons:**
+- Requires API key
+- Usage costs
+**Cost Estimate:**
+Similar to OpenAI pricing
+## Deploying Locally
+### For Development
+```bash
+# 1. Clone/download repository
+git clone <your-repo-url>
+cd ConversAI
+# 2. Create virtual environment
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+# 3. Install dependencies
+pip install -r requirements.txt
+# 4. Set environment variables
+export LLM_PROVIDER="openai"
+export OPENAI_API_KEY="your-key"
+# 5. Run
+python app.py
+```
+Access at `http://localhost:7860`
+### For Production (Self-Hosted)
+Use Docker for production deployment:
+**Create Dockerfile:**
+```dockerfile
+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY *.py .
+COPY *.md .
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV GRADIO_SERVER_PORT=7860
+EXPOSE 7860
+CMD ["python", "app.py"]
+```
+**Build and run:**
+```bash
+docker build -t conversai .
+docker run -p 7860:7860 \
+  -e LLM_PROVIDER=openai \
+  -e OPENAI_API_KEY=your-key \
+  conversai
+```
+## Post-Deployment Checklist
+- [ ] App loads without errors
+- [ ] Can generate a survey
+- [ ] Can translate a survey
+- [ ] Can analyze sample data
+- [ ] Downloads work correctly
+- [ ] Error messages are clear
+- [ ] All tabs are accessible
+- [ ] Mobile view works (if public)
+## Monitoring and Maintenance
+### Check Usage
+Monitor your LLM API usage:
+- OpenAI: https://platform.openai.com/usage
+- Anthropic: Check your console
+- HuggingFace: Monitor rate limits
+### Update Dependencies
+Regularly update to get security fixes:
+```bash
+pip install --upgrade gradio requests pandas
+```
+### Backup
+Regularly backup:
+- Generated surveys
+- Analysis results
+- User feedback
+- Configuration
+## Troubleshooting Deployment
+### Space Build Fails
+**Check:**
+- `requirements.txt` is valid
+- `README.md` has correct frontmatter
+- No syntax errors in Python files
+### Space Runs But Errors
+**Check:**
+- Environment variables are set
+- API keys are valid
+- Provider quotas aren't exceeded
+### Slow Performance
+**Solutions:**
+- Upgrade to paid LLM tier
+- Use faster models (e.g., GPT-4o-mini)
+- Add caching for common requests
+- Optimize prompts for shorter responses
+## Scaling Considerations
+### For Heavy Usage
+1. **Use faster models**: GPT-4o-mini instead of GPT-4
+2. **Implement caching**: Cache common survey patterns
+3. **Add rate limiting**: Prevent abuse
+4. **Load balancing**: Use multiple API keys
+5. **Queue system**: Handle concurrent requests
+### Cost Optimization
+1. **Optimize prompts**: Shorter prompts = lower costs
+2. **Batch operations**: Process multiple items together
+3. **Use cheaper models**: For simpler tasks
+4. **Set token limits**: Prevent runaway costs
+5. **Monitor usage**: Set up alerts
+## Security Best Practices
+1. **Never commit API keys** to version control
+2. **Use environment variables** for secrets
+3. **Rotate keys regularly**
+4. **Set spending limits** with providers
+5. **Monitor for unusual activity**
+6. **Use private Spaces** for sensitive research
+## Support and Resources
+- **HuggingFace Docs**: https://huggingface.co/docs/hub/spaces
+- **Gradio Docs**: https://gradio.app/docs
+- **OpenAI API**: https://platform.openai.com/docs
+- **Anthropic API**: https://docs.anthropic.com
+---
+Need help? Check the USAGE_GUIDE.md or open an issue!

README.md CHANGED Viewed

@@ -1,6 +1,90 @@
----
-title: ConversAI
-app_file: insight_genie_v021.py
-sdk: gradio
-sdk_version: 5.45.0
----

+---
+title: ConversAI - Qualitative Research Assistant
+emoji: 🔬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 5.45.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# ConversAI - AI-Powered Qualitative Research Assistant
+Battle the blank page, reach global audiences, and uncover insights with AI assistance.
+## 🌟 Features
+### 📝 Survey Generation
+- Generate professional surveys from simple outlines
+- Follow industry best practices automatically
+- Choose from qualitative, quantitative, or mixed methods
+- Customize number of questions and target audience
+### 🌍 Survey Translation
+- Translate surveys to 18+ languages
+- Maintain cultural appropriateness and meaning
+- Reach global audiences effortlessly
+- Batch translation support
+### 📊 Data Analysis
+- AI-assisted thematic analysis
+- Sentiment analysis and emotional insights
+- Automatic pattern and trend detection
+- Generate actionable insights and recommendations
+- Export detailed analysis reports
+## 🚀 Quick Start
+1. **Generate a Survey**: Start with an outline or topic description
+2. **Translate**: Select target languages to reach global audiences
+3. **Collect Responses**: Use the generated survey with your participants
+4. **Analyze**: Upload responses to uncover key findings and trends
+## 🔧 Configuration
+ConversAI supports multiple LLM providers. Configure via environment variables:
+- `OPENAI_API_KEY` - For OpenAI models (GPT-4, GPT-3.5)
+- `ANTHROPIC_API_KEY` - For Claude models
+- `HUGGINGFACE_API_KEY` or `HF_TOKEN` - For HuggingFace Inference API
+- `LM_STUDIO_URL` - For local LM Studio instance
+The app automatically detects which provider to use based on available credentials.
+## 📦 Installation
+```bash
+pip install -r requirements.txt
+python app.py
+```
+## 🏗️ Architecture
+ConversAI is built with a modular architecture:
+- **llm_backend.py** - Unified LLM interface supporting multiple providers
+- **survey_generator.py** - AI-powered survey generation
+- **survey_translator.py** - Multi-language translation engine
+- **data_analyzer.py** - Qualitative data analysis and insights
+- **app.py** - Gradio-based web interface
+## 📄 Data Privacy
+- All processing is done through your configured LLM provider
+- No data is stored permanently by this application
+- Survey data and responses remain in your control
+- Suitable for sensitive research projects
+## 🤝 Contributing
+Contributions are welcome! This is a production-grade application designed for real-world qualitative research.
+## 📝 License
+MIT License - Feel free to use for research and commercial purposes.
+---
+Built with ❤️ using Gradio and state-of-the-art LLMs

USAGE_GUIDE.md ADDED Viewed

	@@ -0,0 +1,278 @@

+# ConversAI Usage Guide
+## Quick Start
+### 1. Installation
+```bash
+# Clone or download the repository
+cd ConversAI
+# Install dependencies
+pip install -r requirements.txt
+```
+### 2. Configuration
+ConversAI supports multiple LLM providers. Choose one and configure:
+#### Option A: HuggingFace (Recommended for HF Spaces)
+```bash
+export HUGGINGFACE_API_KEY="your_hf_token_here"
+export LLM_PROVIDER="huggingface"
+```
+#### Option B: OpenAI
+```bash
+export OPENAI_API_KEY="your_openai_key_here"
+export LLM_PROVIDER="openai"
+```
+#### Option C: Anthropic
+```bash
+export ANTHROPIC_API_KEY="your_anthropic_key_here"
+export LLM_PROVIDER="anthropic"
+```
+#### Option D: Local LM Studio
+```bash
+export LLM_PROVIDER="lm_studio"
+export LM_STUDIO_URL="http://localhost:1234/v1/chat/completions"
+```
+### 3. Run the Application
+```bash
+python app.py
+```
+The app will be available at `http://localhost:7860`
+## Features Guide
+### 📝 Survey Generation
+Generate professional surveys from simple outlines.
+**Steps:**
+1. Navigate to the "Generate Survey" tab
+2. Enter your research outline or topic description
+   - Example: "I want to understand patient experiences with a new diabetes medication"
+3. Select survey type: Qualitative, Quantitative, or Mixed
+4. Set number of questions (5-25 recommended)
+5. Specify your target audience
+6. Click "Generate Survey"
+**Best Practices:**
+- Be specific about your research goals
+- Mention key topics you want to explore
+- Include context about your target respondents
+- Start with 10-15 questions for most surveys
+**Output:**
+- Formatted survey preview
+- Downloadable JSON file with full survey data
+- Questions follow industry best practices
+- Includes introduction and closing messages
+### 🌍 Survey Translation
+Translate your surveys to reach global audiences.
+**Steps:**
+1. Generate a survey first (or have one ready)
+2. Navigate to the "Translate Survey" tab
+3. Select target language(s) from the checkbox list
+4. Click "Translate Survey"
+**Supported Languages:**
+- Spanish, French, German, Portuguese
+- Chinese, Japanese, Korean
+- Arabic, Hindi, Russian
+- And 8+ more languages
+**Features:**
+- Maintains cultural appropriateness
+- Preserves question intent and meaning
+- Handles multiple languages in one batch
+- Exports all translations in a single file
+**Tips:**
+- Translate to multiple similar languages to compare phrasing
+- Use back-translation to verify accuracy
+- Consider cultural context for sensitive topics
+### 📊 Data Analysis
+Uncover insights from your survey responses.
+**Steps:**
+1. Navigate to the "Analyze Data" tab
+2. Prepare your responses in JSON format:
+   ```json
+   [
+     {"q1": "response 1", "q2": "response 2"},
+     {"q1": "response 1", "q2": "response 2"}
+   ]
+   ```
+3. Optionally include questions for context
+4. Click "Load Example" to see format
+5. Click "Analyze Data"
+**Analysis Includes:**
+- **Executive Summary** - High-level overview
+- **Themes** - Main topics identified in responses
+- **Sentiment Analysis** - Emotional tone and distribution
+- **Key Insights** - Actionable findings
+- **Statistics** - Response metrics
+**Output Formats:**
+- Markdown report (for viewing)
+- JSON file (for further processing)
+- Both include complete analysis results
+**Pro Tips:**
+- Minimum 10-20 responses for meaningful analysis
+- Include diverse perspectives for richer insights
+- Provide questions for better context
+- Export results for presentations
+## File Formats
+### Survey JSON Format
+```json
+{
+  "title": "Survey Title",
+  "introduction": "Welcome message",
+  "questions": [
+    {
+      "id": 1,
+      "question_text": "Your question here?",
+      "question_type": "open_ended",
+      "required": true,
+      "help_text": "Optional clarification"
+    }
+  ],
+  "closing": "Thank you message"
+}
+```
+### Responses JSON Format
+```json
+[
+  {
+    "q1": "First question response",
+    "q2": "Second question response",
+    "q3": "Third question response"
+  },
+  {
+    "q1": "Another respondent's answer",
+    "q2": "Their second answer",
+    "q3": "Their third answer"
+  }
+]
+```
+## Deployment to HuggingFace Spaces
+1. Create a new Space on HuggingFace
+2. Upload all `.py` files and `requirements.txt`
+3. Upload `README.md` with the frontmatter
+4. Set environment variables in Space settings:
+   - Add `HF_TOKEN` (automatically available)
+   - Or add API keys for other providers
+5. Space will auto-deploy!
+## Troubleshooting
+### Issue: "LLM generation failed"
+**Solutions:**
+- Check your API key is set correctly
+- Verify you have credits/quota with your provider
+- Try a different provider
+- Check network connectivity
+### Issue: "Translation failed"
+**Solutions:**
+- Ensure survey was generated first
+- Check API key and quota
+- Try translating to fewer languages at once
+- Verify the survey data is valid
+### Issue: "Analysis returned no results"
+**Solutions:**
+- Check JSON format is valid
+- Ensure responses is a list/array
+- Provide at least 3-5 responses
+- Check LLM provider is working
+### Issue: "Module import errors"
+**Solutions:**
+```bash
+pip install -r requirements.txt --upgrade
+```
+## API Usage (Advanced)
+You can also use the modules programmatically:
+```python
+from llm_backend import LLMBackend, LLMProvider
+from survey_generator import SurveyGenerator
+# Initialize
+backend = LLMBackend(provider=LLMProvider.OPENAI)
+generator = SurveyGenerator(backend)
+# Generate survey
+survey = generator.generate_survey(
+    outline="Study user satisfaction with mobile apps",
+    survey_type="qualitative",
+    num_questions=10,
+    target_audience="Mobile app users aged 18-35"
+)
+print(survey)
+```
+## Best Practices
+### For Survey Generation:
+- Start with clear research objectives
+- Be specific about your target audience
+- Review and refine generated questions
+- Test with a small pilot group first
+### For Translation:
+- Verify translations with native speakers
+- Consider regional language variations
+- Test cultural appropriateness
+- Use back-translation for validation
+### For Analysis:
+- Collect sufficient responses (20+ ideal)
+- Ensure response quality
+- Combine with quantitative data when possible
+- Review AI insights critically
+## Support
+For issues, questions, or contributions:
+- Check the README.md
+- Review this usage guide
+- Open an issue on GitHub
+- Contact the development team
+## Tips for Production Use
+1. **Data Privacy**: Review your LLM provider's data policy
+2. **API Costs**: Monitor usage to control costs
+3. **Rate Limits**: Be aware of provider rate limits
+4. **Validation**: Always review AI-generated content
+5. **Backup**: Save generated surveys and analyses
+6. **Version Control**: Track survey versions
+7. **Ethics**: Ensure informed consent from participants
+---
+Happy researching! 🔬

app.py ADDED Viewed

	@@ -0,0 +1,495 @@

+"""
+ConversAI - AI-Powered Qualitative Research Assistant
+Production-grade survey generation, translation, and analysis platform
+"""
+import gradio as gr
+import json
+import os
+import traceback
+from typing import Dict, List, Optional
+from llm_backend import LLMBackend, LLMProvider
+from survey_generator import SurveyGenerator
+from survey_translator import SurveyTranslator
+from data_analyzer import DataAnalyzer
+from export_utils import save_json_file, survey_to_csv, analysis_to_markdown_file
+# Global state for current survey
+current_survey = None
+current_responses = []
+def initialize_backend():
+    """Initialize LLM backend based on environment"""
+    try:
+        # Try to detect available provider from environment
+        if os.getenv("OPENAI_API_KEY"):
+            return LLMBackend(provider=LLMProvider.OPENAI)
+        elif os.getenv("ANTHROPIC_API_KEY"):
+            return LLMBackend(provider=LLMProvider.ANTHROPIC)
+        elif os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN"):
+            # Use HF_TOKEN which is automatically set in HF Spaces
+            api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
+            return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
+        else:
+            # Fallback to LM Studio for local development
+            return LLMBackend(provider=LLMProvider.LM_STUDIO)
+    except Exception as e:
+        print(f"Warning: Backend initialization issue: {e}")
+        # Return a default backend
+        return LLMBackend(provider=LLMProvider.LM_STUDIO)
+# Initialize components
+llm_backend = initialize_backend()
+survey_gen = SurveyGenerator(llm_backend)
+survey_trans = SurveyTranslator(llm_backend)
+data_analyzer = DataAnalyzer(llm_backend)
+# ===========================
+# Survey Generation Functions
+# ===========================
+def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
+    """Generate survey from user outline"""
+    global current_survey
+    if not outline or not outline.strip():
+        return "❌ Please provide an outline or topic description.", "", None
+    # Validate inputs
+    if num_questions < 1 or num_questions > 50:
+        return "❌ Number of questions must be between 1 and 50.", "", None
+    try:
+        # Generate survey
+        survey_data = survey_gen.generate_survey(
+            outline=outline,
+            survey_type=survey_type.lower(),
+            num_questions=num_questions,
+            target_audience=audience
+        )
+        current_survey = survey_data
+        # Format for display
+        display_text = format_survey_display(survey_data)
+        # Save to file for download
+        filepath = save_json_file(survey_data, "survey")
+        return (
+            f"✅ Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
+            display_text,
+            filepath
+        )
+    except Exception as e:
+        error_msg = f"❌ Error generating survey: {str(e)}"
+        print(f"Survey generation error: {traceback.format_exc()}")
+        return error_msg, "", None
+def format_survey_display(survey_data: Dict) -> str:
+    """Format survey data for readable display"""
+    output = f"# {survey_data.get('title', 'Survey')}\n\n"
+    output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
+    output += "## Questions\n\n"
+    for i, q in enumerate(survey_data.get('questions', []), 1):
+        output += f"**{i}. {q.get('question_text', '')}**\n"
+        output += f"   - Type: {q.get('question_type', 'N/A')}\n"
+        if q.get('options'):
+            output += "   - Options:\n"
+            for opt in q['options']:
+                output += f"     - {opt}\n"
+        if q.get('help_text'):
+            output += f"   - Help: {q['help_text']}\n"
+        output += f"   - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"
+    output += f"## Closing\n{survey_data.get('closing', '')}\n"
+    return output
+# ===========================
+# Translation Functions
+# ===========================
+def translate_current_survey(target_languages: List[str]):
+    """Translate the current survey to selected languages"""
+    global current_survey
+    if not current_survey:
+        return "❌ Please generate or upload a survey first.", "", None
+    if not target_languages:
+        return "❌ Please select at least one target language.", "", None
+    try:
+        # Translate to all selected languages
+        translations = {}
+        status_messages = []
+        success_count = 0
+        for lang_code in target_languages:
+            try:
+                translated = survey_trans.translate_survey(current_survey, lang_code)
+                translations[lang_code] = translated
+                lang_name = survey_trans._resolve_language(lang_code)
+                status_messages.append(f"✅ Translated to {lang_name}")
+                success_count += 1
+            except Exception as e:
+                lang_name = survey_trans._resolve_language(lang_code)
+                status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}")
+                print(f"Translation error for {lang_code}: {traceback.format_exc()}")
+        if success_count == 0:
+            return "❌ All translations failed. Please check your LLM configuration.", "", None
+        # Format translations for display
+        display_text = ""
+        for lang_code, trans_survey in translations.items():
+            if "error" not in trans_survey:
+                lang_name = survey_trans._resolve_language(lang_code)
+                display_text += f"\n{'='*50}\n"
+                display_text += f"TRANSLATION: {lang_name.upper()}\n"
+                display_text += f"{'='*50}\n\n"
+                display_text += format_survey_display(trans_survey)
+        # Save to file for download
+        filepath = save_json_file(translations, "translations")
+        status = "\n".join(status_messages)
+        return status, display_text, filepath
+    except Exception as e:
+        error_msg = f"❌ Error during translation: {str(e)}"
+        print(f"Translation error: {traceback.format_exc()}")
+        return error_msg, "", None
+def get_language_choices():
+    """Get language choices for dropdown"""
+    langs = survey_trans.get_supported_languages()
+    return [f"{code} - {name}" for code, name in langs.items()]
+# ===========================
+# Data Analysis Functions
+# ===========================
+def analyze_survey_data(responses_json: str, questions_json: str = None):
+    """Analyze survey responses"""
+    if not responses_json or not responses_json.strip():
+        return "❌ Please provide survey responses in JSON format.", "", None
+    try:
+        # Parse responses
+        responses = json.loads(responses_json)
+        questions = json.loads(questions_json) if questions_json and questions_json.strip() else None
+        if not isinstance(responses, list):
+            return "❌ Responses must be a JSON array.", "", None
+        if len(responses) == 0:
+            return "❌ No responses to analyze.", "", None
+        # Validate questions if provided
+        if questions and not isinstance(questions, list):
+            return "❌ Questions must be a JSON array.", "", None
+        # Run analysis
+        analysis_results = data_analyzer.analyze_responses(responses, questions)
+        if "error" in analysis_results:
+            return f"❌ Analysis error: {analysis_results['error']}", "", None
+        # Generate report
+        report_md = data_analyzer.generate_report(analysis_results, format="markdown")
+        # Save both JSON and Markdown
+        json_filepath = save_json_file(analysis_results, "analysis_results")
+        md_filepath = analysis_to_markdown_file(report_md, "analysis_report")
+        status_msg = f"✅ Analysis complete! Analyzed {len(responses)} responses."
+        if questions:
+            status_msg += f" Considered {len(questions)} questions."
+        return status_msg, report_md, json_filepath
+    except json.JSONDecodeError as e:
+        return f"❌ Invalid JSON format: {str(e)}", "", None
+    except Exception as e:
+        error_msg = f"❌ Error during analysis: {str(e)}"
+        print(f"Analysis error: {traceback.format_exc()}")
+        return error_msg, "", None
+def load_example_responses():
+    """Load example responses for demonstration"""
+    example = [
+        {
+            "q1": "The medication helped reduce my symptoms significantly within the first week.",
+            "q2": "I experienced some mild side effects like drowsiness in the beginning.",
+            "q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
+        },
+        {
+            "q1": "I didn't notice much improvement in my condition after taking the medication.",
+            "q2": "The side effects were quite severe and made it difficult to continue.",
+            "q3": "I had to stop taking it after two weeks due to adverse reactions."
+        },
+        {
+            "q1": "The medication worked well but took about 3-4 weeks to show results.",
+            "q2": "No major side effects, just some occasional nausea.",
+            "q3": "It's been effective for managing my symptoms on a daily basis."
+        }
+    ]
+    return json.dumps(example, indent=2)
+# ===========================
+# Gradio Interface
+# ===========================
+def create_interface():
+    """Create the main Gradio interface"""
+    with gr.Blocks(
+        title="ConversAI - Qualitative Research Assistant",
+        theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
+    ) as app:
+        gr.Markdown("""
+        # ConversAI - Your AI-Powered Qualitative Research Assistant
+        Battle the blank page, reach global audiences, and uncover insights with AI assistance.
+        """)
+        with gr.Tabs() as tabs:
+            # ========== SURVEY GENERATION TAB ==========
+            with gr.Tab("📝 Generate Survey"):
+                gr.Markdown("""
+                ## Battle the Blank Page
+                Share an outline and get AI-powered surveys drafted in minutes,
+                complete with industry best practices.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        outline_input = gr.Textbox(
+                            label="Your Survey Outline or Topic",
+                            placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
+                            lines=6
+                        )
+                        survey_type_input = gr.Radio(
+                            label="Survey Type",
+                            choices=["Qualitative", "Quantitative", "Mixed"],
+                            value="Qualitative"
+                        )
+                        num_questions_input = gr.Slider(
+                            label="Number of Questions",
+                            minimum=5,
+                            maximum=25,
+                            value=10,
+                            step=1
+                        )
+                        audience_input = gr.Textbox(
+                            label="Target Audience",
+                            placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
+                            value="General audience"
+                        )
+                        generate_btn = gr.Button("🚀 Generate Survey", variant="primary", size="lg")
+                    with gr.Column(scale=1):
+                        gen_status = gr.Textbox(label="Status", interactive=False)
+                        gen_output = gr.Markdown(label="Generated Survey")
+                gen_download = gr.File(label="Download Survey JSON", visible=False)
+                # Event handlers
+                generate_btn.click(
+                    fn=generate_survey_from_outline,
+                    inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
+                    outputs=[gen_status, gen_output, gen_download]
+                ).then(
+                    fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
+                    inputs=[gen_download],
+                    outputs=[gen_download]
+                )
+            # ========== TRANSLATION TAB ==========
+            with gr.Tab("🌍 Translate Survey"):
+                gr.Markdown("""
+                ## Reach Global Audiences
+                Translate your surveys automatically to streamline efforts and reach wider audiences.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Select Target Languages")
+                        # Create checkboxes for popular languages
+                        lang_checkboxes = gr.CheckboxGroup(
+                            label="Languages",
+                            choices=get_language_choices(),
+                            value=[]
+                        )
+                        translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")
+                        gr.Markdown("""
+                        **Note:** Make sure you've generated a survey first, or upload one using the JSON format.
+                        """)
+                    with gr.Column(scale=1):
+                        trans_status = gr.Textbox(label="Translation Status", interactive=False)
+                        trans_output = gr.Markdown(label="Translations")
+                trans_download = gr.File(label="Download Translations JSON", visible=False)
+                # Event handlers
+                def extract_lang_codes(selected_items):
+                    """Extract language codes from checkbox selections"""
+                    return [item.split(" - ")[0] for item in selected_items]
+                translate_btn.click(
+                    fn=lambda x: translate_current_survey(extract_lang_codes(x)),
+                    inputs=[lang_checkboxes],
+                    outputs=[trans_status, trans_output, trans_download]
+                ).then(
+                    fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
+                    inputs=[trans_download],
+                    outputs=[trans_download]
+                )
+            # ========== ANALYSIS TAB ==========
+            with gr.Tab("📊 Analyze Data"):
+                gr.Markdown("""
+                ## Uncover Key Insights
+                Upload your survey responses and get AI-assisted summaries of key findings,
+                themes, and trends.
+                """)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        responses_input = gr.Textbox(
+                            label="Survey Responses (JSON)",
+                            placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
+                            lines=10
+                        )
+                        questions_input = gr.Textbox(
+                            label="Questions (JSON, Optional)",
+                            placeholder='[{"question_text": "What is your experience?", ...}]',
+                            lines=5
+                        )
+                        with gr.Row():
+                            analyze_btn = gr.Button("🔍 Analyze Data", variant="primary", size="lg")
+                            example_btn = gr.Button("Load Example", variant="secondary")
+                    with gr.Column(scale=1):
+                        analysis_status = gr.Textbox(label="Status", interactive=False)
+                        analysis_output = gr.Markdown(label="Analysis Report")
+                analysis_download = gr.File(label="Download Analysis JSON", visible=False)
+                # Event handlers
+                analyze_btn.click(
+                    fn=analyze_survey_data,
+                    inputs=[responses_input, questions_input],
+                    outputs=[analysis_status, analysis_output, analysis_download]
+                ).then(
+                    fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
+                    inputs=[analysis_download],
+                    outputs=[analysis_download]
+                )
+                example_btn.click(
+                    fn=load_example_responses,
+                    outputs=[responses_input]
+                )
+            # ========== ABOUT TAB ==========
+            with gr.Tab("ℹ️ About"):
+                gr.Markdown("""
+                ## About ConversAI
+                ConversAI is a comprehensive qualitative research assistant that helps you:
+                ### 🎯 Generate Surveys
+                - Create professional surveys from simple outlines
+                - Follow industry best practices automatically
+                - Save hours of questionnaire design time
+                ### 🌍 Translate Globally
+                - Reach audiences in 18+ languages
+                - Maintain cultural appropriateness
+                - Expand your research scope effortlessly
+                ### 📊 Analyze Results
+                - Extract key themes automatically
+                - Identify patterns and trends
+                - Generate actionable insights
+                ### 🔧 Technical Details
+                **Supported LLM Providers:**
+                - OpenAI (GPT-4, GPT-3.5)
+                - Anthropic (Claude)
+                - HuggingFace Inference API
+                - LM Studio (local)
+                **Configuration:**
+                Set environment variables to configure your LLM provider:
+                - `OPENAI_API_KEY` - For OpenAI models
+                - `ANTHROPIC_API_KEY` - For Claude models
+                - `HUGGINGFACE_API_KEY` or `HF_TOKEN` - For HuggingFace
+                - `LM_STUDIO_URL` - For local LM Studio (default: http://192.168.1.245:1234/v1/chat/completions)
+                ### 📄 Data Privacy
+                - All processing is done through your configured LLM provider
+                - No data is stored permanently by this application
+                - Survey data and responses remain in your control
+                ### 🚀 Getting Started
+                1. **Generate** a survey from your research outline
+                2. **Translate** it to reach global audiences
+                3. Collect responses from participants
+                4. **Analyze** the data to uncover insights
+                ---
+                Built with ❤️ using Gradio and state-of-the-art LLMs
+                """)
+        return app
+# ===========================
+# Main Entry Point
+# ===========================
+if __name__ == "__main__":
+    demo = create_interface()
+    # Launch with appropriate settings
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,  # Standard HF Spaces port
+        share=False,  # Don't create a public link (HF Spaces handles this)
+        show_error=True
+    )

data_analyzer.py ADDED Viewed

	@@ -0,0 +1,434 @@

+"""
+Data Analysis Module - AI-assisted analysis of survey responses
+"""
+import json
+from typing import Dict, List, Optional
+from collections import Counter
+from llm_backend import LLMBackend
+class DataAnalyzer:
+    """
+    Analyzes survey responses to uncover key findings, trends, and patterns.
+    Provides AI-assisted summaries for qualitative research data.
+    """
+    def __init__(self, llm_backend: LLMBackend):
+        self.llm = llm_backend
+    def analyze_responses(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
+        """
+        Comprehensive analysis of survey responses.
+        Args:
+            responses: List of response dictionaries
+            questions: Optional list of questions for context
+        Returns:
+            Analysis results including themes, sentiment, and insights
+        """
+        if not responses:
+            return {"error": "No responses to analyze"}
+        analysis = {
+            "summary": {},
+            "themes": [],
+            "sentiment": {},
+            "key_insights": [],
+            "response_count": len(responses)
+        }
+        # Generate overall summary
+        analysis["summary"] = self._generate_summary(responses, questions)
+        # Extract themes
+        analysis["themes"] = self._extract_themes(responses)
+        # Analyze sentiment
+        analysis["sentiment"] = self._analyze_sentiment(responses)
+        # Generate key insights
+        analysis["key_insights"] = self._generate_insights(responses, questions)
+        # Add quantitative stats if applicable
+        analysis["statistics"] = self._compute_statistics(responses, questions)
+        return analysis
+    def _generate_summary(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
+        """Generate an executive summary of responses"""
+        # Prepare context
+        response_texts = self._extract_text_responses(responses)
+        sample_size = min(50, len(response_texts))  # Use sample for large datasets
+        sample_responses = response_texts[:sample_size]
+        context = f"Total responses: {len(responses)}\n\n"
+        if questions:
+            context += "Questions asked:\n"
+            for i, q in enumerate(questions[:10], 1):  # Limit to first 10 questions
+                context += f"{i}. {q.get('question_text', '')}\n"
+            context += "\n"
+        context += "Sample responses:\n"
+        for i, resp in enumerate(sample_responses, 1):
+            context += f"{i}. {resp[:200]}...\n"  # Truncate long responses
+        prompt = f"""Analyze the following survey responses and provide an executive summary.
+{context}
+Provide a summary that includes:
+1. Overview: High-level summary of what the data shows (2-3 sentences)
+2. Key patterns: Main patterns or trends observed
+3. Notable findings: Interesting or unexpected discoveries
+4. Response quality: Assessment of response depth and engagement
+Respond with a JSON object with these fields:
+{{
+  "overview": "...",
+  "key_patterns": ["pattern 1", "pattern 2", ...],
+  "notable_findings": ["finding 1", "finding 2", ...],
+  "response_quality": "..."
+}}"""
+        messages = [
+            {"role": "system", "content": self._get_analyst_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            response = self.llm.generate(messages, max_tokens=1000, temperature=0.5)
+            return self._parse_json_response(response)
+        except Exception as e:
+            return {"error": f"Summary generation failed: {str(e)}"}
+    def _extract_themes(self, responses: List[Dict], num_themes: int = 5) -> List[Dict]:
+        """Extract main themes from responses using AI"""
+        response_texts = self._extract_text_responses(responses)
+        if not response_texts:
+            return []
+        # Sample for large datasets
+        sample_size = min(100, len(response_texts))
+        sample_responses = response_texts[:sample_size]
+        prompt = f"""Analyze the following {len(sample_responses)} survey responses and identify the top {num_themes} themes.
+Responses:
+{self._format_responses_for_prompt(sample_responses)}
+For each theme, provide:
+1. Theme name: A short, descriptive name
+2. Description: What this theme represents
+3. Prevalence: Estimated percentage of responses mentioning this theme
+4. Example quotes: 2-3 representative quotes from the responses
+Respond with a JSON array of theme objects:
+[
+  {{
+    "theme_name": "...",
+    "description": "...",
+    "prevalence": "XX%",
+    "example_quotes": ["quote 1", "quote 2"]
+  }}
+]"""
+        messages = [
+            {"role": "system", "content": self._get_analyst_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            response = self.llm.generate(messages, max_tokens=1500, temperature=0.6)
+            themes = self._parse_json_response(response)
+            if isinstance(themes, list):
+                return themes
+            return []
+        except Exception as e:
+            return [{"error": f"Theme extraction failed: {str(e)}"}]
+    def _analyze_sentiment(self, responses: List[Dict]) -> Dict:
+        """Analyze overall sentiment of responses"""
+        response_texts = self._extract_text_responses(responses)
+        if not response_texts:
+            return {}
+        # Sample for analysis
+        sample_size = min(100, len(response_texts))
+        sample_responses = response_texts[:sample_size]
+        prompt = f"""Analyze the sentiment of these {len(sample_responses)} survey responses.
+Responses:
+{self._format_responses_for_prompt(sample_responses)}
+Provide sentiment analysis including:
+1. Overall sentiment: positive, negative, neutral, or mixed
+2. Sentiment distribution: Estimated percentage breakdown
+3. Emotional tone: Key emotions detected
+4. Intensity: How strong the sentiments are
+Respond with JSON:
+{{
+  "overall_sentiment": "...",
+  "distribution": {{
+    "positive": "XX%",
+    "neutral": "XX%",
+    "negative": "XX%"
+  }},
+  "emotions": ["emotion1", "emotion2", ...],
+  "intensity": "low|moderate|high"
+}}"""
+        messages = [
+            {"role": "system", "content": self._get_analyst_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            response = self.llm.generate(messages, max_tokens=500, temperature=0.4)
+            return self._parse_json_response(response)
+        except Exception as e:
+            return {"error": f"Sentiment analysis failed: {str(e)}"}
+    def _generate_insights(self, responses: List[Dict], questions: List[Dict] = None) -> List[str]:
+        """Generate actionable insights from the data"""
+        response_texts = self._extract_text_responses(responses)
+        if not response_texts:
+            return []
+        sample_size = min(100, len(response_texts))
+        sample_responses = response_texts[:sample_size]
+        context = f"Analyzing {len(responses)} survey responses.\n\n"
+        if questions:
+            context += "Research questions:\n"
+            for i, q in enumerate(questions[:5], 1):
+                context += f"{i}. {q.get('question_text', '')}\n"
+            context += "\n"
+        prompt = f"""{context}
+Sample responses:
+{self._format_responses_for_prompt(sample_responses)}
+Based on this data, provide 5-7 key insights that would be valuable for:
+- Understanding the target audience
+- Identifying opportunities or challenges
+- Informing strategic decisions
+- Recognizing patterns or trends
+Each insight should be:
+- Specific and actionable
+- Supported by the data
+- Clear and concise
+Respond with a JSON array of insight strings:
+["insight 1", "insight 2", ...]"""
+        messages = [
+            {"role": "system", "content": self._get_analyst_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            response = self.llm.generate(messages, max_tokens=1000, temperature=0.6)
+            insights = self._parse_json_response(response)
+            if isinstance(insights, list):
+                return insights
+            return []
+        except Exception as e:
+            return [f"Insight generation failed: {str(e)}"]
+    def _compute_statistics(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
+        """Compute basic statistics from responses"""
+        stats = {
+            "total_responses": len(responses),
+            "response_lengths": {},
+            "completion_rate": "N/A"
+        }
+        # Calculate average response length
+        response_texts = self._extract_text_responses(responses)
+        if response_texts:
+            lengths = [len(r.split()) for r in response_texts]
+            stats["response_lengths"] = {
+                "avg_words": sum(lengths) / len(lengths),
+                "min_words": min(lengths),
+                "max_words": max(lengths)
+            }
+        # Calculate completion rate if questions are provided
+        if questions:
+            total_questions = len(questions)
+            completed_questions = 0
+            for response in responses:
+                if isinstance(response, dict):
+                    completed_questions += len([v for v in response.values() if v])
+            if total_questions > 0:
+                completion_rate = (completed_questions / (total_questions * len(responses))) * 100
+                stats["completion_rate"] = f"{completion_rate:.1f}%"
+        return stats
+    def generate_report(self, analysis_results: Dict, format: str = "markdown") -> str:
+        """
+        Generate a formatted report from analysis results.
+        Args:
+            analysis_results: Results from analyze_responses()
+            format: Output format (markdown, text, html)
+        Returns:
+            Formatted report string
+        """
+        if format == "markdown":
+            return self._generate_markdown_report(analysis_results)
+        elif format == "html":
+            return self._generate_html_report(analysis_results)
+        else:
+            return self._generate_text_report(analysis_results)
+    def _generate_markdown_report(self, results: Dict) -> str:
+        """Generate markdown formatted report"""
+        report = "# Survey Analysis Report\n\n"
+        # Summary section
+        if "summary" in results and results["summary"]:
+            report += "## Executive Summary\n\n"
+            summary = results["summary"]
+            if "overview" in summary:
+                report += f"{summary['overview']}\n\n"
+            if "key_patterns" in summary:
+                report += "### Key Patterns\n"
+                for pattern in summary["key_patterns"]:
+                    report += f"- {pattern}\n"
+                report += "\n"
+        # Statistics
+        if "statistics" in results:
+            report += "## Response Statistics\n\n"
+            stats = results["statistics"]
+            report += f"- Total Responses: {stats.get('total_responses', 'N/A')}\n"
+            if "response_lengths" in stats:
+                rl = stats["response_lengths"]
+                report += f"- Average Response Length: {rl.get('avg_words', 0):.1f} words\n"
+            report += f"- Completion Rate: {stats.get('completion_rate', 'N/A')}\n\n"
+        # Themes
+        if "themes" in results and results["themes"]:
+            report += "## Main Themes\n\n"
+            for i, theme in enumerate(results["themes"], 1):
+                if isinstance(theme, dict) and "theme_name" in theme:
+                    report += f"### {i}. {theme['theme_name']}\n"
+                    report += f"{theme.get('description', '')}\n\n"
+                    report += f"**Prevalence:** {theme.get('prevalence', 'N/A')}\n\n"
+                    if "example_quotes" in theme:
+                        report += "**Example quotes:**\n"
+                        for quote in theme["example_quotes"]:
+                            report += f"> {quote}\n"
+                        report += "\n"
+        # Sentiment
+        if "sentiment" in results and results["sentiment"]:
+            report += "## Sentiment Analysis\n\n"
+            sent = results["sentiment"]
+            report += f"**Overall Sentiment:** {sent.get('overall_sentiment', 'N/A')}\n\n"
+            if "distribution" in sent:
+                report += "**Distribution:**\n"
+                for key, value in sent["distribution"].items():
+                    report += f"- {key.title()}: {value}\n"
+                report += "\n"
+        # Key Insights
+        if "key_insights" in results and results["key_insights"]:
+            report += "## Key Insights\n\n"
+            for i, insight in enumerate(results["key_insights"], 1):
+                report += f"{i}. {insight}\n"
+            report += "\n"
+        return report
+    def _generate_text_report(self, results: Dict) -> str:
+        """Generate plain text report"""
+        # Similar to markdown but without formatting
+        return self._generate_markdown_report(results).replace("#", "").replace("**", "").replace(">", "")
+    def _generate_html_report(self, results: Dict) -> str:
+        """Generate HTML report"""
+        # Convert markdown to basic HTML
+        md_report = self._generate_markdown_report(results)
+        # Basic conversion (for production, use a proper markdown-to-html library)
+        html = md_report.replace("# ", "<h1>").replace("\n\n", "</p>\n<p>")
+        return f"<html><body>{html}</body></html>"
+    def _get_analyst_system_prompt(self) -> str:
+        """System prompt for analysis tasks"""
+        return """You are an expert qualitative research analyst with deep expertise in:
+- Thematic analysis and coding
+- Sentiment analysis and emotional intelligence
+- Pattern recognition in qualitative data
+- Insight generation and strategic thinking
+- Survey research methodology
+Your analyses should be:
+- Objective and evidence-based
+- Nuanced and comprehensive
+- Actionable and clear
+- Grounded in the actual data provided
+Always respond with valid JSON when requested."""
+    def _extract_text_responses(self, responses: List[Dict]) -> List[str]:
+        """Extract text from response objects"""
+        texts = []
+        for response in responses:
+            if isinstance(response, dict):
+                # Extract all string values
+                for value in response.values():
+                    if isinstance(value, str) and value.strip():
+                        texts.append(value.strip())
+            elif isinstance(response, str):
+                texts.append(response.strip())
+        return texts
+    def _format_responses_for_prompt(self, responses: List[str], max_responses: int = 50) -> str:
+        """Format responses for inclusion in prompt"""
+        formatted = []
+        for i, resp in enumerate(responses[:max_responses], 1):
+            # Truncate very long responses
+            truncated = resp[:300] + "..." if len(resp) > 300 else resp
+            formatted.append(f"{i}. {truncated}")
+        return "\n".join(formatted)
+    def _parse_json_response(self, response: str):
+        """Parse JSON from LLM response"""
+        response = response.strip()
+        # Handle code blocks
+        if "```json" in response:
+            start = response.find("```json") + 7
+            end = response.find("```", start)
+            response = response[start:end].strip()
+        elif "```" in response:
+            start = response.find("```") + 3
+            end = response.find("```", start)
+            response = response[start:end].strip()
+        try:
+            return json.loads(response)
+        except json.JSONDecodeError:
+            # Try to find JSON object or array
+            if "{" in response:
+                start = response.find("{")
+                end = response.rfind("}") + 1
+                return json.loads(response[start:end])
+            elif "[" in response:
+                start = response.find("[")
+                end = response.rfind("]") + 1
+                return json.loads(response[start:end])
+            raise

export_utils.py ADDED Viewed

	@@ -0,0 +1,138 @@

+"""
+Export Utilities - Handle various export formats
+"""
+import json
+import csv
+import io
+from typing import Dict, List
+from datetime import datetime
+def save_json_file(data: Dict, prefix: str = "export") -> str:
+    """
+    Save data to JSON file and return filepath.
+    Args:
+        data: Data to save
+        prefix: Filename prefix
+    Returns:
+        Path to saved file
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{prefix}_{timestamp}.json"
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    return filename
+def survey_to_csv(survey_data: Dict) -> str:
+    """
+    Convert survey to CSV format (one row per question).
+    Args:
+        survey_data: Survey dictionary
+    Returns:
+        Path to CSV file
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"survey_{timestamp}.csv"
+    with open(filename, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        # Write header
+        writer.writerow(['Question ID', 'Question Text', 'Type', 'Options', 'Required', 'Help Text'])
+        # Write questions
+        for q in survey_data.get('questions', []):
+            writer.writerow([
+                q.get('id', ''),
+                q.get('question_text', ''),
+                q.get('question_type', ''),
+                '; '.join(q.get('options', [])) if q.get('options') else '',
+                'Yes' if q.get('required', False) else 'No',
+                q.get('help_text', '')
+            ])
+    return filename
+def responses_to_csv(responses: List[Dict], filename_prefix: str = "responses") -> str:
+    """
+    Convert responses to CSV format.
+    Args:
+        responses: List of response dictionaries
+        filename_prefix: Prefix for filename
+    Returns:
+        Path to CSV file
+    """
+    if not responses:
+        return None
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{filename_prefix}_{timestamp}.csv"
+    # Get all unique keys from all responses
+    all_keys = set()
+    for response in responses:
+        if isinstance(response, dict):
+            all_keys.update(response.keys())
+    fieldnames = sorted(all_keys)
+    with open(filename, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        for response in responses:
+            if isinstance(response, dict):
+                writer.writerow(response)
+    return filename
+def analysis_to_markdown_file(analysis_report: str, prefix: str = "analysis_report") -> str:
+    """
+    Save analysis report to markdown file.
+    Args:
+        analysis_report: Markdown formatted report
+        prefix: Filename prefix
+    Returns:
+        Path to markdown file
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"{prefix}_{timestamp}.md"
+    with open(filename, 'w', encoding='utf-8') as f:
+        f.write(analysis_report)
+    return filename
+def create_survey_package(survey_data: Dict) -> Dict[str, str]:
+    """
+    Create a complete package of survey files (JSON, CSV, etc.).
+    Args:
+        survey_data: Survey dictionary
+    Returns:
+        Dictionary mapping format to filepath
+    """
+    package = {}
+    # Save JSON
+    package['json'] = save_json_file(survey_data, "survey")
+    # Save CSV
+    package['csv'] = survey_to_csv(survey_data)
+    return package

llm_backend.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+LLM Backend for ConversAI - Supports multiple providers
+"""
+import os
+import requests
+import json
+from typing import List, Dict, Optional
+from enum import Enum
+class LLMProvider(Enum):
+    """Supported LLM providers"""
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    HUGGINGFACE = "huggingface"
+    LM_STUDIO = "lm_studio"
+class LLMBackend:
+    """
+    Unified interface for multiple LLM providers.
+    Supports OpenAI, Anthropic, HuggingFace Inference API, and LM Studio.
+    """
+    def __init__(self, provider: LLMProvider = None, api_key: str = None, model: str = None):
+        """
+        Initialize LLM backend with specified provider.
+        Args:
+            provider: LLM provider to use (defaults to env var or LM_STUDIO)
+            api_key: API key for the provider (reads from env if not provided)
+            model: Model name to use (provider-specific defaults if not provided)
+        """
+        # Determine provider
+        if provider is None:
+            provider_str = os.getenv("LLM_PROVIDER", "lm_studio").lower()
+            self.provider = LLMProvider(provider_str)
+        else:
+            self.provider = provider
+        # Set API key
+        if api_key:
+            self.api_key = api_key
+        else:
+            if self.provider == LLMProvider.OPENAI:
+                self.api_key = os.getenv("OPENAI_API_KEY")
+            elif self.provider == LLMProvider.ANTHROPIC:
+                self.api_key = os.getenv("ANTHROPIC_API_KEY")
+            elif self.provider == LLMProvider.HUGGINGFACE:
+                self.api_key = os.getenv("HUGGINGFACE_API_KEY")
+            else:
+                self.api_key = None
+        # Set model
+        if model:
+            self.model = model
+        else:
+            self.model = self._get_default_model()
+        # Set API endpoint
+        self.api_url = self._get_api_url()
+    def _get_default_model(self) -> str:
+        """Get default model for each provider"""
+        defaults = {
+            LLMProvider.OPENAI: "gpt-4o-mini",
+            LLMProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
+            LLMProvider.HUGGINGFACE: "mistralai/Mixtral-8x7B-Instruct-v0.1",
+            LLMProvider.LM_STUDIO: "google/gemma-3-27b"
+        }
+        return os.getenv("LLM_MODEL", defaults[self.provider])
+    def _get_api_url(self) -> str:
+        """Get API URL for each provider"""
+        if self.provider == LLMProvider.OPENAI:
+            return "https://api.openai.com/v1/chat/completions"
+        elif self.provider == LLMProvider.ANTHROPIC:
+            return "https://api.anthropic.com/v1/messages"
+        elif self.provider == LLMProvider.HUGGINGFACE:
+            return f"https://api-inference.huggingface.co/models/{self.model}"
+        elif self.provider == LLMProvider.LM_STUDIO:
+            return os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234/v1/chat/completions")
+    def generate(self,
+                 messages: List[Dict[str, str]],
+                 max_tokens: int = 1000,
+                 temperature: float = 0.7,
+                 json_mode: bool = False) -> str:
+        """
+        Generate completion from messages.
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            max_tokens: Maximum tokens to generate
+            temperature: Sampling temperature
+            json_mode: Whether to request JSON output (supported by some providers)
+        Returns:
+            Generated text response
+        """
+        try:
+            if self.provider == LLMProvider.OPENAI:
+                return self._generate_openai(messages, max_tokens, temperature, json_mode)
+            elif self.provider == LLMProvider.ANTHROPIC:
+                return self._generate_anthropic(messages, max_tokens, temperature)
+            elif self.provider == LLMProvider.HUGGINGFACE:
+                return self._generate_huggingface(messages, max_tokens, temperature)
+            elif self.provider == LLMProvider.LM_STUDIO:
+                return self._generate_lm_studio(messages, max_tokens, temperature)
+        except Exception as e:
+            raise Exception(f"LLM generation failed: {str(e)}")
+    def _generate_openai(self, messages, max_tokens, temperature, json_mode) -> str:
+        """Generate using OpenAI API"""
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature
+        }
+        if json_mode:
+            payload["response_format"] = {"type": "json_object"}
+        response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+    def _generate_anthropic(self, messages, max_tokens, temperature) -> str:
+        """Generate using Anthropic API"""
+        headers = {
+            "x-api-key": self.api_key,
+            "anthropic-version": "2023-06-01",
+            "Content-Type": "application/json"
+        }
+        # Convert messages format (extract system message if present)
+        system_message = None
+        converted_messages = []
+        for msg in messages:
+            if msg["role"] == "system":
+                system_message = msg["content"]
+            else:
+                converted_messages.append(msg)
+        payload = {
+            "model": self.model,
+            "messages": converted_messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature
+        }
+        if system_message:
+            payload["system"] = system_message
+        response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        data = response.json()
+        return data["content"][0]["text"]
+    def _generate_huggingface(self, messages, max_tokens, temperature) -> str:
+        """Generate using HuggingFace Inference API"""
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+        # Convert messages to prompt
+        prompt = self._messages_to_prompt(messages)
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": max_tokens,
+                "temperature": temperature,
+                "return_full_text": False
+            }
+        }
+        response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        data = response.json()
+        if isinstance(data, list) and len(data) > 0:
+            return data[0].get("generated_text", "")
+        return ""
+    def _generate_lm_studio(self, messages, max_tokens, temperature) -> str:
+        """Generate using LM Studio local API"""
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": max_tokens,
+            "temperature": temperature
+        }
+        response = requests.post(self.api_url, json=payload, timeout=60)
+        response.raise_for_status()
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+    def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
+        """Convert message format to simple prompt"""
+        prompt_parts = []
+        for msg in messages:
+            role = msg["role"].capitalize()
+            content = msg["content"]
+            prompt_parts.append(f"{role}: {content}")
+        prompt_parts.append("Assistant:")
+        return "\n\n".join(prompt_parts)

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-gradio==4.38.1
-requests==2.32.3
 pandas==2.2.2

+gradio==5.45.0
+requests==2.32.3
 pandas==2.2.2

survey_generator.py ADDED Viewed

	@@ -0,0 +1,224 @@

+"""
+Survey Generation Module - Generate AI-powered surveys from outlines
+"""
+import json
+from typing import List, Dict, Optional
+from llm_backend import LLMBackend
+class SurveyGenerator:
+    """
+    Generates professional surveys from user outlines using AI.
+    Follows industry best practices for qualitative research.
+    """
+    def __init__(self, llm_backend: LLMBackend):
+        self.llm = llm_backend
+    def generate_survey(self,
+                       outline: str,
+                       survey_type: str = "qualitative",
+                       num_questions: int = 10,
+                       target_audience: str = "general") -> Dict:
+        """
+        Generate a complete survey from an outline.
+        Args:
+            outline: User's outline or topic description
+            survey_type: Type of survey (qualitative, quantitative, mixed)
+            num_questions: Target number of questions
+            target_audience: Description of target respondents
+        Returns:
+            Dict containing survey metadata and questions
+        """
+        prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience)
+        messages = [
+            {"role": "system", "content": self._get_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            response = self.llm.generate(messages, max_tokens=2000, temperature=0.7)
+            survey_data = self._parse_survey_response(response)
+            # Add metadata
+            survey_data["metadata"] = {
+                "outline": outline,
+                "survey_type": survey_type,
+                "target_audience": target_audience,
+                "generated_question_count": len(survey_data.get("questions", []))
+            }
+            return survey_data
+        except Exception as e:
+            raise Exception(f"Survey generation failed: {str(e)}")
+    def _get_system_prompt(self) -> str:
+        """System prompt for survey generation"""
+        return """You are an expert survey designer and qualitative researcher with deep knowledge of:
+- Industry best practices for survey design
+- Question formulation techniques (open-ended, closed-ended, Likert scales)
+- Avoiding bias and leading questions
+- Survey flow and respondent experience
+- Research methodologies (interviews, focus groups, ethnographic studies)
+Your task is to generate professional, well-structured surveys that will yield high-quality research data.
+Follow these principles:
+1. Use clear, unambiguous language
+2. Avoid double-barreled questions
+3. Include a logical flow from general to specific
+4. Balance open-ended and structured questions appropriately
+5. Consider the respondent's cognitive load
+6. Include screening questions when relevant
+7. Add instructions and context where helpful
+Always respond with valid JSON containing the survey structure."""
+    def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
+        """Build the user prompt for survey generation"""
+        return f"""Generate a professional {survey_type} survey based on the following outline:
+OUTLINE:
+{outline}
+REQUIREMENTS:
+- Target number of questions: {num_questions}
+- Target audience: {target_audience}
+- Survey type: {survey_type}
+Please generate a complete survey with:
+1. A clear title
+2. An introduction/welcome message
+3. Well-crafted questions following best practices
+4. Appropriate question types for the research goals
+5. A thank you/closing message
+Respond with a JSON object in this exact format:
+{{
+  "title": "Survey Title",
+  "introduction": "Welcome message and instructions",
+  "questions": [
+    {{
+      "id": 1,
+      "question_text": "The question to ask",
+      "question_type": "open_ended|multiple_choice|likert_scale|yes_no|rating",
+      "options": ["option1", "option2"],
+      "required": true|false,
+      "help_text": "Optional clarification"
+    }}
+  ],
+  "closing": "Thank you message"
+}}
+For open-ended questions, omit the "options" field.
+For multiple choice and Likert questions, include appropriate options.
+Ensure questions follow best practices and are unbiased."""
+    def _parse_survey_response(self, response: str) -> Dict:
+        """Parse LLM response into survey structure"""
+        # Try to extract JSON from response
+        response = response.strip()
+        # Handle code blocks
+        if "```json" in response:
+            start = response.find("```json") + 7
+            end = response.find("```", start)
+            response = response[start:end].strip()
+        elif "```" in response:
+            start = response.find("```") + 3
+            end = response.find("```", start)
+            response = response[start:end].strip()
+        try:
+            survey_data = json.loads(response)
+            # Validate required fields
+            required_fields = ["title", "introduction", "questions", "closing"]
+            for field in required_fields:
+                if field not in survey_data:
+                    raise ValueError(f"Missing required field: {field}")
+            # Validate questions
+            if not isinstance(survey_data["questions"], list) or len(survey_data["questions"]) == 0:
+                raise ValueError("Survey must contain at least one question")
+            return survey_data
+        except json.JSONDecodeError as e:
+            raise Exception(f"Failed to parse survey JSON: {str(e)}\nResponse: {response}")
+    def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
+        """
+        Refine a single survey question.
+        Args:
+            question: The question to improve
+            improvement_type: Type of improvement (clarity, neutrality, specificity)
+        Returns:
+            Improved question text
+        """
+        prompt = f"""Improve the following survey question for better {improvement_type}:
+Original Question: {question}
+Provide an improved version that:
+- {"Is clearer and easier to understand" if improvement_type == "clarity" else ""}
+- {"Removes bias and leading language" if improvement_type == "neutrality" else ""}
+- {"Is more specific and actionable" if improvement_type == "specificity" else ""}
+Respond with only the improved question text, no explanation."""
+        messages = [
+            {"role": "system", "content": "You are an expert survey question designer."},
+            {"role": "user", "content": prompt}
+        ]
+        return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip()
+    def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
+        """
+        Generate follow-up questions for deeper exploration.
+        Args:
+            base_question: The main question
+            num_follow_ups: Number of follow-up questions to generate
+        Returns:
+            List of follow-up question texts
+        """
+        prompt = f"""Generate {num_follow_ups} follow-up questions for this main question:
+Main Question: {base_question}
+The follow-up questions should:
+1. Probe deeper into the topic
+2. Explore different aspects or dimensions
+3. Encourage detailed responses
+4. Follow a logical progression
+Respond with a JSON array of question strings."""
+        messages = [
+            {"role": "system", "content": "You are an expert in qualitative research interviews."},
+            {"role": "user", "content": prompt}
+        ]
+        response = self.llm.generate(messages, max_tokens=500, temperature=0.7)
+        try:
+            # Extract JSON array
+            if "[" in response:
+                start = response.find("[")
+                end = response.rfind("]") + 1
+                follow_ups = json.loads(response[start:end])
+                return follow_ups[:num_follow_ups]
+        except:
+            pass
+        # Fallback: split by newlines
+        lines = [line.strip() for line in response.split("\n") if line.strip()]
+        return [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]

survey_translator.py ADDED Viewed

	@@ -0,0 +1,263 @@

+"""
+Survey Translation Module - Translate surveys to reach wider audiences
+"""
+import json
+from typing import Dict, List
+from llm_backend import LLMBackend
+class SurveyTranslator:
+    """
+    Translates surveys into multiple languages while preserving
+    meaning, context, and cultural appropriateness.
+    """
+    # Common target languages for research
+    SUPPORTED_LANGUAGES = {
+        "es": "Spanish",
+        "fr": "French",
+        "de": "German",
+        "pt": "Portuguese",
+        "it": "Italian",
+        "zh": "Chinese (Simplified)",
+        "ja": "Japanese",
+        "ko": "Korean",
+        "ar": "Arabic",
+        "hi": "Hindi",
+        "ru": "Russian",
+        "nl": "Dutch",
+        "sv": "Swedish",
+        "pl": "Polish",
+        "tr": "Turkish",
+        "vi": "Vietnamese",
+        "th": "Thai",
+        "id": "Indonesian"
+    }
+    def __init__(self, llm_backend: LLMBackend):
+        self.llm = llm_backend
+    def translate_survey(self, survey_data: Dict, target_language: str) -> Dict:
+        """
+        Translate an entire survey to a target language.
+        Args:
+            survey_data: Survey dictionary with title, introduction, questions, closing
+            target_language: Target language code (e.g., 'es', 'fr') or full name
+        Returns:
+            Translated survey dictionary with same structure
+        """
+        # Resolve language name
+        language_name = self._resolve_language(target_language)
+        if not language_name:
+            raise ValueError(f"Unsupported language: {target_language}")
+        # Create a copy of the survey data
+        translated_survey = survey_data.copy()
+        # Translate main fields
+        translated_survey["title"] = self._translate_text(
+            survey_data.get("title", ""),
+            language_name,
+            context="survey title"
+        )
+        translated_survey["introduction"] = self._translate_text(
+            survey_data.get("introduction", ""),
+            language_name,
+            context="survey introduction"
+        )
+        translated_survey["closing"] = self._translate_text(
+            survey_data.get("closing", ""),
+            language_name,
+            context="survey closing message"
+        )
+        # Translate questions
+        translated_questions = []
+        for question in survey_data.get("questions", []):
+            translated_q = self._translate_question(question, language_name)
+            translated_questions.append(translated_q)
+        translated_survey["questions"] = translated_questions
+        # Add translation metadata
+        if "metadata" not in translated_survey:
+            translated_survey["metadata"] = {}
+        translated_survey["metadata"]["translated_to"] = language_name
+        translated_survey["metadata"]["original_language"] = "English"
+        return translated_survey
+    def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]:
+        """
+        Translate survey to multiple languages.
+        Args:
+            survey_data: Original survey data
+            target_languages: List of target language codes
+        Returns:
+            Dictionary mapping language codes to translated surveys
+        """
+        translations = {}
+        for lang_code in target_languages:
+            try:
+                translated = self.translate_survey(survey_data, lang_code)
+                translations[lang_code] = translated
+            except Exception as e:
+                translations[lang_code] = {"error": str(e)}
+        return translations
+    def _resolve_language(self, language: str) -> str:
+        """Resolve language code or name to full name"""
+        language = language.strip().lower()
+        # Check if it's a code
+        if language in self.SUPPORTED_LANGUAGES:
+            return self.SUPPORTED_LANGUAGES[language]
+        # Check if it's a full name
+        for code, name in self.SUPPORTED_LANGUAGES.items():
+            if name.lower() == language:
+                return name
+        # Return as-is if not found (LLM might still handle it)
+        return language.title()
+    def _translate_text(self, text: str, target_language: str, context: str = "") -> str:
+        """
+        Translate a piece of text with context awareness.
+        Args:
+            text: Text to translate
+            target_language: Target language name
+            context: Context for better translation (e.g., "survey question")
+        Returns:
+            Translated text
+        """
+        if not text or not text.strip():
+            return text
+        context_note = f" (this is a {context})" if context else ""
+        prompt = f"""Translate the following text to {target_language}{context_note}.
+Maintain:
+- The original meaning and nuance
+- Professional and respectful tone
+- Cultural appropriateness
+- Any formatting or structure
+Original text:
+{text}
+Provide only the translation, no explanations or notes."""
+        messages = [
+            {"role": "system", "content": self._get_translation_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
+        try:
+            translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3)
+            return translation.strip()
+        except Exception as e:
+            raise Exception(f"Translation failed: {str(e)}")
+    def _translate_question(self, question: Dict, target_language: str) -> Dict:
+        """
+        Translate a single question with all its components.
+        Args:
+            question: Question dictionary
+            target_language: Target language name
+        Returns:
+            Translated question dictionary
+        """
+        translated_q = question.copy()
+        # Translate question text
+        translated_q["question_text"] = self._translate_text(
+            question.get("question_text", ""),
+            target_language,
+            context="survey question"
+        )
+        # Translate options if present
+        if "options" in question and question["options"]:
+            translated_options = []
+            for option in question["options"]:
+                translated_option = self._translate_text(
+                    option,
+                    target_language,
+                    context="answer option"
+                )
+                translated_options.append(translated_option)
+            translated_q["options"] = translated_options
+        # Translate help text if present
+        if "help_text" in question and question["help_text"]:
+            translated_q["help_text"] = self._translate_text(
+                question["help_text"],
+                target_language,
+                context="help text"
+            )
+        return translated_q
+    def _get_translation_system_prompt(self) -> str:
+        """System prompt for translation tasks"""
+        return """You are an expert translator specializing in survey research and qualitative studies.
+Your translations must:
+1. Preserve the exact meaning and intent of the original text
+2. Use culturally appropriate language for the target audience
+3. Maintain professional and neutral tone
+4. Adapt idioms and expressions appropriately
+5. Keep the same level of formality
+6. Preserve any special formatting or structure
+For survey questions, be especially careful to:
+- Avoid introducing bias
+- Keep questions clear and unambiguous
+- Maintain the same question type and structure
+- Use natural, conversational language when appropriate
+Provide accurate, natural-sounding translations that a native speaker would use."""
+    def back_translate(self, translated_text: str, original_language: str = "English") -> str:
+        """
+        Back-translate text to check translation quality.
+        Args:
+            translated_text: The translated text
+            original_language: Language to translate back to
+        Returns:
+            Back-translated text
+        """
+        prompt = f"""Translate the following text back to {original_language}.
+Text to translate:
+{translated_text}
+Provide only the translation, no explanations."""
+        messages = [
+            {"role": "system", "content": "You are an expert translator. Translate accurately."},
+            {"role": "user", "content": prompt}
+        ]
+        return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip()
+    def get_supported_languages(self) -> Dict[str, str]:
+        """Get dictionary of supported language codes and names"""
+        return self.SUPPORTED_LANGUAGES.copy()

test_app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+Basic test script for ConversAI modules
+Run this to verify core functionality
+"""
+import json
+from llm_backend import LLMBackend, LLMProvider
+from survey_generator import SurveyGenerator
+from survey_translator import SurveyTranslator
+from data_analyzer import DataAnalyzer
+def test_llm_backend():
+    """Test LLM backend initialization"""
+    print("\n=== Testing LLM Backend ===")
+    try:
+        backend = LLMBackend(provider=LLMProvider.LM_STUDIO)
+        print(f"✓ Backend initialized with provider: {backend.provider}")
+        print(f"✓ Model: {backend.model}")
+        print(f"✓ API URL: {backend.api_url}")
+        return backend
+    except Exception as e:
+        print(f"✗ Backend initialization failed: {e}")
+        return None
+def test_survey_generator(backend):
+    """Test survey generation"""
+    print("\n=== Testing Survey Generator ===")
+    if not backend:
+        print("✗ Skipping (no backend)")
+        return None
+    try:
+        gen = SurveyGenerator(backend)
+        print("✓ Survey generator initialized")
+        # Note: Actual generation requires LLM connection
+        print("  (Actual survey generation requires LLM connection)")
+        return gen
+    except Exception as e:
+        print(f"✗ Survey generator failed: {e}")
+        return None
+def test_survey_translator(backend):
+    """Test survey translator"""
+    print("\n=== Testing Survey Translator ===")
+    if not backend:
+        print("✗ Skipping (no backend)")
+        return None
+    try:
+        translator = SurveyTranslator(backend)
+        print("✓ Translator initialized")
+        # Test language list
+        langs = translator.get_supported_languages()
+        print(f"✓ Supports {len(langs)} languages")
+        print(f"  Sample languages: {', '.join(list(langs.values())[:5])}")
+        return translator
+    except Exception as e:
+        print(f"✗ Translator failed: {e}")
+        return None
+def test_data_analyzer(backend):
+    """Test data analyzer"""
+    print("\n=== Testing Data Analyzer ===")
+    if not backend:
+        print("✗ Skipping (no backend)")
+        return None
+    try:
+        analyzer = DataAnalyzer(backend)
+        print("✓ Analyzer initialized")
+        # Test with sample data
+        sample_responses = [
+            {"q1": "I had a great experience", "q2": "Very satisfied"},
+            {"q1": "It was okay", "q2": "Neutral feelings"},
+            {"q1": "Not very good", "q2": "Disappointed"}
+        ]
+        # Note: Actual analysis requires LLM connection
+        print("  (Actual analysis requires LLM connection)")
+        return analyzer
+    except Exception as e:
+        print(f"✗ Analyzer failed: {e}")
+        return None
+def test_modules():
+    """Test all modules"""
+    print("="*50)
+    print("ConversAI Module Tests")
+    print("="*50)
+    # Test backend
+    backend = test_llm_backend()
+    # Test generators
+    gen = test_survey_generator(backend)
+    translator = test_survey_translator(backend)
+    analyzer = test_data_analyzer(backend)
+    # Summary
+    print("\n=== Test Summary ===")
+    modules = {
+        "LLM Backend": backend is not None,
+        "Survey Generator": gen is not None,
+        "Survey Translator": translator is not None,
+        "Data Analyzer": analyzer is not None
+    }
+    for module, status in modules.items():
+        symbol = "✓" if status else "✗"
+        print(f"{symbol} {module}")
+    all_passed = all(modules.values())
+    print(f"\n{'✓ All tests passed!' if all_passed else '✗ Some tests failed'}")
+    if not all_passed:
+        print("\nNote: Make sure your LLM backend is configured correctly.")
+        print("Check environment variables or .env file.")
+    return all_passed
+if __name__ == "__main__":
+    test_modules()