jmisak commited on
Commit
196c707
Β·
verified Β·
1 Parent(s): 4072ec3

Upload 23 files

Browse files
Files changed (14) hide show
  1. .env.example +38 -0
  2. .gitattributes +35 -35
  3. .gitignore +58 -0
  4. DEPLOYMENT.md +278 -0
  5. README.md +90 -6
  6. USAGE_GUIDE.md +278 -0
  7. app.py +495 -0
  8. data_analyzer.py +434 -0
  9. export_utils.py +138 -0
  10. llm_backend.py +220 -0
  11. requirements.txt +2 -2
  12. survey_generator.py +224 -0
  13. survey_translator.py +263 -0
  14. test_app.py +130 -0
.env.example ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ConversAI Environment Configuration
2
+ # Copy this file to .env and fill in your credentials
3
+
4
+ # ===========================
5
+ # LLM Provider Configuration
6
+ # ===========================
7
+
8
+ # Specify which provider to use (openai, anthropic, huggingface, lm_studio)
9
+ LLM_PROVIDER=huggingface
10
+
11
+ # OpenAI Configuration
12
+ OPENAI_API_KEY=your_openai_api_key_here
13
+ # Optional: Override default model
14
+ # LLM_MODEL=gpt-4o-mini
15
+
16
+ # Anthropic Configuration
17
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
18
+ # Optional: Override default model
19
+ # LLM_MODEL=claude-3-5-sonnet-20241022
20
+
21
+ # HuggingFace Configuration
22
+ HUGGINGFACE_API_KEY=your_huggingface_api_key_here
23
+ # Optional: Override default model
24
+ # LLM_MODEL=mistralai/Mixtral-8x7B-Instruct-v0.1
25
+
26
+ # LM Studio Configuration (for local development)
27
+ LM_STUDIO_URL=http://localhost:1234/v1/chat/completions
28
+ # LLM_MODEL=your_local_model_name
29
+
30
+ # ===========================
31
+ # Application Settings
32
+ # ===========================
33
+
34
+ # Port for local development (HF Spaces uses 7860 by default)
35
+ PORT=7860
36
+
37
+ # Enable debug mode
38
+ DEBUG=false
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ venv/
25
+ ENV/
26
+ env/
27
+ .venv
28
+
29
+ # Environment variables
30
+ .env
31
+ .env.local
32
+
33
+ # IDE
34
+ .vscode/
35
+ .idea/
36
+ *.swp
37
+ *.swo
38
+ *~
39
+
40
+ # Gradio
41
+ flagged/
42
+ gradio_cached_examples/
43
+
44
+ # Data files
45
+ *.json
46
+ !requirements.json
47
+ conversation_log_*.json
48
+
49
+ # Logs
50
+ *.log
51
+
52
+ # OS
53
+ .DS_Store
54
+ Thumbs.db
55
+
56
+ # Temporary files
57
+ *.tmp
58
+ *.temp
DEPLOYMENT.md ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deployment Guide
2
+
3
+ ## Deploying to HuggingFace Spaces
4
+
5
+ ### Prerequisites
6
+ - HuggingFace account
7
+ - API token from your LLM provider (or use HF Inference API)
8
+
9
+ ### Step-by-Step Deployment
10
+
11
+ #### 1. Create a New Space
12
+
13
+ 1. Go to https://huggingface.co/spaces
14
+ 2. Click "Create new Space"
15
+ 3. Choose a name (e.g., "conversai-research-assistant")
16
+ 4. Select SDK: **Gradio**
17
+ 5. Choose visibility (Public or Private)
18
+ 6. Click "Create Space"
19
+
20
+ #### 2. Upload Files
21
+
22
+ Upload these files to your Space:
23
+
24
+ **Required Files:**
25
+ - `app.py` - Main application
26
+ - `llm_backend.py` - LLM interface
27
+ - `survey_generator.py` - Survey generation
28
+ - `survey_translator.py` - Translation module
29
+ - `data_analyzer.py` - Analysis module
30
+ - `export_utils.py` - Export utilities
31
+ - `requirements.txt` - Dependencies
32
+ - `README.md` - Space description
33
+
34
+ **Optional Files:**
35
+ - `.env.example` - Configuration template
36
+ - `USAGE_GUIDE.md` - User guide
37
+ - `test_app.py` - Testing script
38
+
39
+ #### 3. Configure Environment Variables
40
+
41
+ In your Space settings, add environment variables:
42
+
43
+ **For HuggingFace Inference API (Free Tier):**
44
+ ```
45
+ LLM_PROVIDER=huggingface
46
+ # HF_TOKEN is automatically available in Spaces
47
+ ```
48
+
49
+ **For OpenAI:**
50
+ ```
51
+ LLM_PROVIDER=openai
52
+ OPENAI_API_KEY=sk-your-key-here
53
+ ```
54
+
55
+ **For Anthropic:**
56
+ ```
57
+ LLM_PROVIDER=anthropic
58
+ ANTHROPIC_API_KEY=your-key-here
59
+ ```
60
+
61
+ #### 4. Space Will Auto-Deploy
62
+
63
+ - HuggingFace will automatically build and deploy
64
+ - Check the "Logs" tab for build status
65
+ - First build may take 2-3 minutes
66
+
67
+ #### 5. Test Your Deployment
68
+
69
+ 1. Wait for "Running" status
70
+ 2. Open the Space URL
71
+ 3. Test survey generation
72
+ 4. Test translation
73
+ 5. Test analysis with example data
74
+
75
+ ### Using HuggingFace Inference API
76
+
77
+ The easiest option for deployment is to use HuggingFace's free Inference API:
78
+
79
+ **Pros:**
80
+ - No API key needed (uses HF_TOKEN automatically)
81
+ - Free tier available
82
+ - Easy setup
83
+
84
+ **Cons:**
85
+ - May have rate limits on free tier
86
+ - Slower than paid providers
87
+ - May queue during high usage
88
+
89
+ **Configuration:**
90
+ Just set `LLM_PROVIDER=huggingface` in your environment variables.
91
+
92
+ ### Using Other Providers
93
+
94
+ #### OpenAI (Recommended for Production)
95
+
96
+ **Pros:**
97
+ - Fast and reliable
98
+ - High quality outputs
99
+ - Good API documentation
100
+
101
+ **Cons:**
102
+ - Requires paid API key
103
+ - Usage costs
104
+
105
+ **Cost Estimate:**
106
+ - Survey generation: ~$0.01-0.05 per survey
107
+ - Translation: ~$0.01-0.03 per language
108
+ - Analysis: ~$0.05-0.15 per batch
109
+
110
+ #### Anthropic Claude
111
+
112
+ **Pros:**
113
+ - Excellent for nuanced text
114
+ - Strong reasoning capabilities
115
+ - Good safety features
116
+
117
+ **Cons:**
118
+ - Requires API key
119
+ - Usage costs
120
+
121
+ **Cost Estimate:**
122
+ Similar to OpenAI pricing
123
+
124
+ ## Deploying Locally
125
+
126
+ ### For Development
127
+
128
+ ```bash
129
+ # 1. Clone/download repository
130
+ git clone <your-repo-url>
131
+ cd ConversAI
132
+
133
+ # 2. Create virtual environment
134
+ python -m venv venv
135
+ source venv/bin/activate # On Windows: venv\Scripts\activate
136
+
137
+ # 3. Install dependencies
138
+ pip install -r requirements.txt
139
+
140
+ # 4. Set environment variables
141
+ export LLM_PROVIDER="openai"
142
+ export OPENAI_API_KEY="your-key"
143
+
144
+ # 5. Run
145
+ python app.py
146
+ ```
147
+
148
+ Access at `http://localhost:7860`
149
+
150
+ ### For Production (Self-Hosted)
151
+
152
+ Use Docker for production deployment:
153
+
154
+ **Create Dockerfile:**
155
+ ```dockerfile
156
+ FROM python:3.10-slim
157
+
158
+ WORKDIR /app
159
+
160
+ COPY requirements.txt .
161
+ RUN pip install --no-cache-dir -r requirements.txt
162
+
163
+ COPY *.py .
164
+ COPY *.md .
165
+
166
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
167
+ ENV GRADIO_SERVER_PORT=7860
168
+
169
+ EXPOSE 7860
170
+
171
+ CMD ["python", "app.py"]
172
+ ```
173
+
174
+ **Build and run:**
175
+ ```bash
176
+ docker build -t conversai .
177
+ docker run -p 7860:7860 \
178
+ -e LLM_PROVIDER=openai \
179
+ -e OPENAI_API_KEY=your-key \
180
+ conversai
181
+ ```
182
+
183
+ ## Post-Deployment Checklist
184
+
185
+ - [ ] App loads without errors
186
+ - [ ] Can generate a survey
187
+ - [ ] Can translate a survey
188
+ - [ ] Can analyze sample data
189
+ - [ ] Downloads work correctly
190
+ - [ ] Error messages are clear
191
+ - [ ] All tabs are accessible
192
+ - [ ] Mobile view works (if public)
193
+
194
+ ## Monitoring and Maintenance
195
+
196
+ ### Check Usage
197
+
198
+ Monitor your LLM API usage:
199
+ - OpenAI: https://platform.openai.com/usage
200
+ - Anthropic: Check your console
201
+ - HuggingFace: Monitor rate limits
202
+
203
+ ### Update Dependencies
204
+
205
+ Regularly update to get security fixes:
206
+ ```bash
207
+ pip install --upgrade gradio requests pandas
208
+ ```
209
+
210
+ ### Backup
211
+
212
+ Regularly backup:
213
+ - Generated surveys
214
+ - Analysis results
215
+ - User feedback
216
+ - Configuration
217
+
218
+ ## Troubleshooting Deployment
219
+
220
+ ### Space Build Fails
221
+
222
+ **Check:**
223
+ - `requirements.txt` is valid
224
+ - `README.md` has correct frontmatter
225
+ - No syntax errors in Python files
226
+
227
+ ### Space Runs But Errors
228
+
229
+ **Check:**
230
+ - Environment variables are set
231
+ - API keys are valid
232
+ - Provider quotas aren't exceeded
233
+
234
+ ### Slow Performance
235
+
236
+ **Solutions:**
237
+ - Upgrade to paid LLM tier
238
+ - Use faster models (e.g., GPT-4o-mini)
239
+ - Add caching for common requests
240
+ - Optimize prompts for shorter responses
241
+
242
+ ## Scaling Considerations
243
+
244
+ ### For Heavy Usage
245
+
246
+ 1. **Use faster models**: GPT-4o-mini instead of GPT-4
247
+ 2. **Implement caching**: Cache common survey patterns
248
+ 3. **Add rate limiting**: Prevent abuse
249
+ 4. **Load balancing**: Use multiple API keys
250
+ 5. **Queue system**: Handle concurrent requests
251
+
252
+ ### Cost Optimization
253
+
254
+ 1. **Optimize prompts**: Shorter prompts = lower costs
255
+ 2. **Batch operations**: Process multiple items together
256
+ 3. **Use cheaper models**: For simpler tasks
257
+ 4. **Set token limits**: Prevent runaway costs
258
+ 5. **Monitor usage**: Set up alerts
259
+
260
+ ## Security Best Practices
261
+
262
+ 1. **Never commit API keys** to version control
263
+ 2. **Use environment variables** for secrets
264
+ 3. **Rotate keys regularly**
265
+ 4. **Set spending limits** with providers
266
+ 5. **Monitor for unusual activity**
267
+ 6. **Use private Spaces** for sensitive research
268
+
269
+ ## Support and Resources
270
+
271
+ - **HuggingFace Docs**: https://huggingface.co/docs/hub/spaces
272
+ - **Gradio Docs**: https://gradio.app/docs
273
+ - **OpenAI API**: https://platform.openai.com/docs
274
+ - **Anthropic API**: https://docs.anthropic.com
275
+
276
+ ---
277
+
278
+ Need help? Check the USAGE_GUIDE.md or open an issue!
README.md CHANGED
@@ -1,6 +1,90 @@
1
- ---
2
- title: ConversAI
3
- app_file: insight_genie_v021.py
4
- sdk: gradio
5
- sdk_version: 5.45.0
6
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ConversAI - Qualitative Research Assistant
3
+ emoji: πŸ”¬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.45.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # ConversAI - AI-Powered Qualitative Research Assistant
14
+
15
+ Battle the blank page, reach global audiences, and uncover insights with AI assistance.
16
+
17
+ ## 🌟 Features
18
+
19
+ ### πŸ“ Survey Generation
20
+ - Generate professional surveys from simple outlines
21
+ - Follow industry best practices automatically
22
+ - Choose from qualitative, quantitative, or mixed methods
23
+ - Customize number of questions and target audience
24
+
25
+ ### 🌍 Survey Translation
26
+ - Translate surveys to 18+ languages
27
+ - Maintain cultural appropriateness and meaning
28
+ - Reach global audiences effortlessly
29
+ - Batch translation support
30
+
31
+ ### πŸ“Š Data Analysis
32
+ - AI-assisted thematic analysis
33
+ - Sentiment analysis and emotional insights
34
+ - Automatic pattern and trend detection
35
+ - Generate actionable insights and recommendations
36
+ - Export detailed analysis reports
37
+
38
+ ## πŸš€ Quick Start
39
+
40
+ 1. **Generate a Survey**: Start with an outline or topic description
41
+ 2. **Translate**: Select target languages to reach global audiences
42
+ 3. **Collect Responses**: Use the generated survey with your participants
43
+ 4. **Analyze**: Upload responses to uncover key findings and trends
44
+
45
+ ## πŸ”§ Configuration
46
+
47
+ ConversAI supports multiple LLM providers. Configure via environment variables:
48
+
49
+ - `OPENAI_API_KEY` - For OpenAI models (GPT-4, GPT-3.5)
50
+ - `ANTHROPIC_API_KEY` - For Claude models
51
+ - `HUGGINGFACE_API_KEY` or `HF_TOKEN` - For HuggingFace Inference API
52
+ - `LM_STUDIO_URL` - For local LM Studio instance
53
+
54
+ The app automatically detects which provider to use based on available credentials.
55
+
56
+ ## πŸ“¦ Installation
57
+
58
+ ```bash
59
+ pip install -r requirements.txt
60
+ python app.py
61
+ ```
62
+
63
+ ## πŸ—οΈ Architecture
64
+
65
+ ConversAI is built with a modular architecture:
66
+
67
+ - **llm_backend.py** - Unified LLM interface supporting multiple providers
68
+ - **survey_generator.py** - AI-powered survey generation
69
+ - **survey_translator.py** - Multi-language translation engine
70
+ - **data_analyzer.py** - Qualitative data analysis and insights
71
+ - **app.py** - Gradio-based web interface
72
+
73
+ ## πŸ“„ Data Privacy
74
+
75
+ - All processing is done through your configured LLM provider
76
+ - No data is stored permanently by this application
77
+ - Survey data and responses remain in your control
78
+ - Suitable for sensitive research projects
79
+
80
+ ## 🀝 Contributing
81
+
82
+ Contributions are welcome! This is a production-grade application designed for real-world qualitative research.
83
+
84
+ ## πŸ“ License
85
+
86
+ MIT License - Feel free to use for research and commercial purposes.
87
+
88
+ ---
89
+
90
+ Built with ❀️ using Gradio and state-of-the-art LLMs
USAGE_GUIDE.md ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ConversAI Usage Guide
2
+
3
+ ## Quick Start
4
+
5
+ ### 1. Installation
6
+
7
+ ```bash
8
+ # Clone or download the repository
9
+ cd ConversAI
10
+
11
+ # Install dependencies
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ ### 2. Configuration
16
+
17
+ ConversAI supports multiple LLM providers. Choose one and configure:
18
+
19
+ #### Option A: HuggingFace (Recommended for HF Spaces)
20
+ ```bash
21
+ export HUGGINGFACE_API_KEY="your_hf_token_here"
22
+ export LLM_PROVIDER="huggingface"
23
+ ```
24
+
25
+ #### Option B: OpenAI
26
+ ```bash
27
+ export OPENAI_API_KEY="your_openai_key_here"
28
+ export LLM_PROVIDER="openai"
29
+ ```
30
+
31
+ #### Option C: Anthropic
32
+ ```bash
33
+ export ANTHROPIC_API_KEY="your_anthropic_key_here"
34
+ export LLM_PROVIDER="anthropic"
35
+ ```
36
+
37
+ #### Option D: Local LM Studio
38
+ ```bash
39
+ export LLM_PROVIDER="lm_studio"
40
+ export LM_STUDIO_URL="http://localhost:1234/v1/chat/completions"
41
+ ```
42
+
43
+ ### 3. Run the Application
44
+
45
+ ```bash
46
+ python app.py
47
+ ```
48
+
49
+ The app will be available at `http://localhost:7860`
50
+
51
+ ## Features Guide
52
+
53
+ ### πŸ“ Survey Generation
54
+
55
+ Generate professional surveys from simple outlines.
56
+
57
+ **Steps:**
58
+ 1. Navigate to the "Generate Survey" tab
59
+ 2. Enter your research outline or topic description
60
+ - Example: "I want to understand patient experiences with a new diabetes medication"
61
+ 3. Select survey type: Qualitative, Quantitative, or Mixed
62
+ 4. Set number of questions (5-25 recommended)
63
+ 5. Specify your target audience
64
+ 6. Click "Generate Survey"
65
+
66
+ **Best Practices:**
67
+ - Be specific about your research goals
68
+ - Mention key topics you want to explore
69
+ - Include context about your target respondents
70
+ - Start with 10-15 questions for most surveys
71
+
72
+ **Output:**
73
+ - Formatted survey preview
74
+ - Downloadable JSON file with full survey data
75
+ - Questions follow industry best practices
76
+ - Includes introduction and closing messages
77
+
78
+ ### 🌍 Survey Translation
79
+
80
+ Translate your surveys to reach global audiences.
81
+
82
+ **Steps:**
83
+ 1. Generate a survey first (or have one ready)
84
+ 2. Navigate to the "Translate Survey" tab
85
+ 3. Select target language(s) from the checkbox list
86
+ 4. Click "Translate Survey"
87
+
88
+ **Supported Languages:**
89
+ - Spanish, French, German, Portuguese
90
+ - Chinese, Japanese, Korean
91
+ - Arabic, Hindi, Russian
92
+ - And 8+ more languages
93
+
94
+ **Features:**
95
+ - Maintains cultural appropriateness
96
+ - Preserves question intent and meaning
97
+ - Handles multiple languages in one batch
98
+ - Exports all translations in a single file
99
+
100
+ **Tips:**
101
+ - Translate to multiple similar languages to compare phrasing
102
+ - Use back-translation to verify accuracy
103
+ - Consider cultural context for sensitive topics
104
+
105
+ ### πŸ“Š Data Analysis
106
+
107
+ Uncover insights from your survey responses.
108
+
109
+ **Steps:**
110
+ 1. Navigate to the "Analyze Data" tab
111
+ 2. Prepare your responses in JSON format:
112
+ ```json
113
+ [
114
+ {"q1": "response 1", "q2": "response 2"},
115
+ {"q1": "response 1", "q2": "response 2"}
116
+ ]
117
+ ```
118
+ 3. Optionally include questions for context
119
+ 4. Click "Load Example" to see format
120
+ 5. Click "Analyze Data"
121
+
122
+ **Analysis Includes:**
123
+ - **Executive Summary** - High-level overview
124
+ - **Themes** - Main topics identified in responses
125
+ - **Sentiment Analysis** - Emotional tone and distribution
126
+ - **Key Insights** - Actionable findings
127
+ - **Statistics** - Response metrics
128
+
129
+ **Output Formats:**
130
+ - Markdown report (for viewing)
131
+ - JSON file (for further processing)
132
+ - Both include complete analysis results
133
+
134
+ **Pro Tips:**
135
+ - Minimum 10-20 responses for meaningful analysis
136
+ - Include diverse perspectives for richer insights
137
+ - Provide questions for better context
138
+ - Export results for presentations
139
+
140
+ ## File Formats
141
+
142
+ ### Survey JSON Format
143
+ ```json
144
+ {
145
+ "title": "Survey Title",
146
+ "introduction": "Welcome message",
147
+ "questions": [
148
+ {
149
+ "id": 1,
150
+ "question_text": "Your question here?",
151
+ "question_type": "open_ended",
152
+ "required": true,
153
+ "help_text": "Optional clarification"
154
+ }
155
+ ],
156
+ "closing": "Thank you message"
157
+ }
158
+ ```
159
+
160
+ ### Responses JSON Format
161
+ ```json
162
+ [
163
+ {
164
+ "q1": "First question response",
165
+ "q2": "Second question response",
166
+ "q3": "Third question response"
167
+ },
168
+ {
169
+ "q1": "Another respondent's answer",
170
+ "q2": "Their second answer",
171
+ "q3": "Their third answer"
172
+ }
173
+ ]
174
+ ```
175
+
176
+ ## Deployment to HuggingFace Spaces
177
+
178
+ 1. Create a new Space on HuggingFace
179
+ 2. Upload all `.py` files and `requirements.txt`
180
+ 3. Upload `README.md` with the frontmatter
181
+ 4. Set environment variables in Space settings:
182
+ - Add `HF_TOKEN` (automatically available)
183
+ - Or add API keys for other providers
184
+ 5. Space will auto-deploy!
185
+
186
+ ## Troubleshooting
187
+
188
+ ### Issue: "LLM generation failed"
189
+ **Solutions:**
190
+ - Check your API key is set correctly
191
+ - Verify you have credits/quota with your provider
192
+ - Try a different provider
193
+ - Check network connectivity
194
+
195
+ ### Issue: "Translation failed"
196
+ **Solutions:**
197
+ - Ensure survey was generated first
198
+ - Check API key and quota
199
+ - Try translating to fewer languages at once
200
+ - Verify the survey data is valid
201
+
202
+ ### Issue: "Analysis returned no results"
203
+ **Solutions:**
204
+ - Check JSON format is valid
205
+ - Ensure responses is a list/array
206
+ - Provide at least 3-5 responses
207
+ - Check LLM provider is working
208
+
209
+ ### Issue: "Module import errors"
210
+ **Solutions:**
211
+ ```bash
212
+ pip install -r requirements.txt --upgrade
213
+ ```
214
+
215
+ ## API Usage (Advanced)
216
+
217
+ You can also use the modules programmatically:
218
+
219
+ ```python
220
+ from llm_backend import LLMBackend, LLMProvider
221
+ from survey_generator import SurveyGenerator
222
+
223
+ # Initialize
224
+ backend = LLMBackend(provider=LLMProvider.OPENAI)
225
+ generator = SurveyGenerator(backend)
226
+
227
+ # Generate survey
228
+ survey = generator.generate_survey(
229
+ outline="Study user satisfaction with mobile apps",
230
+ survey_type="qualitative",
231
+ num_questions=10,
232
+ target_audience="Mobile app users aged 18-35"
233
+ )
234
+
235
+ print(survey)
236
+ ```
237
+
238
+ ## Best Practices
239
+
240
+ ### For Survey Generation:
241
+ - Start with clear research objectives
242
+ - Be specific about your target audience
243
+ - Review and refine generated questions
244
+ - Test with a small pilot group first
245
+
246
+ ### For Translation:
247
+ - Verify translations with native speakers
248
+ - Consider regional language variations
249
+ - Test cultural appropriateness
250
+ - Use back-translation for validation
251
+
252
+ ### For Analysis:
253
+ - Collect sufficient responses (20+ ideal)
254
+ - Ensure response quality
255
+ - Combine with quantitative data when possible
256
+ - Review AI insights critically
257
+
258
+ ## Support
259
+
260
+ For issues, questions, or contributions:
261
+ - Check the README.md
262
+ - Review this usage guide
263
+ - Open an issue on GitHub
264
+ - Contact the development team
265
+
266
+ ## Tips for Production Use
267
+
268
+ 1. **Data Privacy**: Review your LLM provider's data policy
269
+ 2. **API Costs**: Monitor usage to control costs
270
+ 3. **Rate Limits**: Be aware of provider rate limits
271
+ 4. **Validation**: Always review AI-generated content
272
+ 5. **Backup**: Save generated surveys and analyses
273
+ 6. **Version Control**: Track survey versions
274
+ 7. **Ethics**: Ensure informed consent from participants
275
+
276
+ ---
277
+
278
+ Happy researching! πŸ”¬
app.py ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ConversAI - AI-Powered Qualitative Research Assistant
3
+ Production-grade survey generation, translation, and analysis platform
4
+ """
5
+ import gradio as gr
6
+ import json
7
+ import os
8
+ import traceback
9
+ from typing import Dict, List, Optional
10
+
11
+ from llm_backend import LLMBackend, LLMProvider
12
+ from survey_generator import SurveyGenerator
13
+ from survey_translator import SurveyTranslator
14
+ from data_analyzer import DataAnalyzer
15
+ from export_utils import save_json_file, survey_to_csv, analysis_to_markdown_file
16
+
17
+
18
+ # Global state for current survey
19
+ current_survey = None
20
+ current_responses = []
21
+
22
+
23
+ def initialize_backend():
24
+ """Initialize LLM backend based on environment"""
25
+ try:
26
+ # Try to detect available provider from environment
27
+ if os.getenv("OPENAI_API_KEY"):
28
+ return LLMBackend(provider=LLMProvider.OPENAI)
29
+ elif os.getenv("ANTHROPIC_API_KEY"):
30
+ return LLMBackend(provider=LLMProvider.ANTHROPIC)
31
+ elif os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN"):
32
+ # Use HF_TOKEN which is automatically set in HF Spaces
33
+ api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
34
+ return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
35
+ else:
36
+ # Fallback to LM Studio for local development
37
+ return LLMBackend(provider=LLMProvider.LM_STUDIO)
38
+ except Exception as e:
39
+ print(f"Warning: Backend initialization issue: {e}")
40
+ # Return a default backend
41
+ return LLMBackend(provider=LLMProvider.LM_STUDIO)
42
+
43
+
44
+ # Initialize components
45
+ llm_backend = initialize_backend()
46
+ survey_gen = SurveyGenerator(llm_backend)
47
+ survey_trans = SurveyTranslator(llm_backend)
48
+ data_analyzer = DataAnalyzer(llm_backend)
49
+
50
+
51
+ # ===========================
52
+ # Survey Generation Functions
53
+ # ===========================
54
+
55
+ def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
56
+ """Generate survey from user outline"""
57
+ global current_survey
58
+
59
+ if not outline or not outline.strip():
60
+ return "❌ Please provide an outline or topic description.", "", None
61
+
62
+ # Validate inputs
63
+ if num_questions < 1 or num_questions > 50:
64
+ return "❌ Number of questions must be between 1 and 50.", "", None
65
+
66
+ try:
67
+ # Generate survey
68
+ survey_data = survey_gen.generate_survey(
69
+ outline=outline,
70
+ survey_type=survey_type.lower(),
71
+ num_questions=num_questions,
72
+ target_audience=audience
73
+ )
74
+
75
+ current_survey = survey_data
76
+
77
+ # Format for display
78
+ display_text = format_survey_display(survey_data)
79
+
80
+ # Save to file for download
81
+ filepath = save_json_file(survey_data, "survey")
82
+
83
+ return (
84
+ f"βœ… Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
85
+ display_text,
86
+ filepath
87
+ )
88
+
89
+ except Exception as e:
90
+ error_msg = f"❌ Error generating survey: {str(e)}"
91
+ print(f"Survey generation error: {traceback.format_exc()}")
92
+ return error_msg, "", None
93
+
94
+
95
+ def format_survey_display(survey_data: Dict) -> str:
96
+ """Format survey data for readable display"""
97
+ output = f"# {survey_data.get('title', 'Survey')}\n\n"
98
+ output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
99
+ output += "## Questions\n\n"
100
+
101
+ for i, q in enumerate(survey_data.get('questions', []), 1):
102
+ output += f"**{i}. {q.get('question_text', '')}**\n"
103
+ output += f" - Type: {q.get('question_type', 'N/A')}\n"
104
+
105
+ if q.get('options'):
106
+ output += " - Options:\n"
107
+ for opt in q['options']:
108
+ output += f" - {opt}\n"
109
+
110
+ if q.get('help_text'):
111
+ output += f" - Help: {q['help_text']}\n"
112
+
113
+ output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"
114
+
115
+ output += f"## Closing\n{survey_data.get('closing', '')}\n"
116
+
117
+ return output
118
+
119
+
120
+ # ===========================
121
+ # Translation Functions
122
+ # ===========================
123
+
124
+ def translate_current_survey(target_languages: List[str]):
125
+ """Translate the current survey to selected languages"""
126
+ global current_survey
127
+
128
+ if not current_survey:
129
+ return "❌ Please generate or upload a survey first.", "", None
130
+
131
+ if not target_languages:
132
+ return "❌ Please select at least one target language.", "", None
133
+
134
+ try:
135
+ # Translate to all selected languages
136
+ translations = {}
137
+ status_messages = []
138
+ success_count = 0
139
+
140
+ for lang_code in target_languages:
141
+ try:
142
+ translated = survey_trans.translate_survey(current_survey, lang_code)
143
+ translations[lang_code] = translated
144
+ lang_name = survey_trans._resolve_language(lang_code)
145
+ status_messages.append(f"βœ… Translated to {lang_name}")
146
+ success_count += 1
147
+ except Exception as e:
148
+ lang_name = survey_trans._resolve_language(lang_code)
149
+ status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}")
150
+ print(f"Translation error for {lang_code}: {traceback.format_exc()}")
151
+
152
+ if success_count == 0:
153
+ return "❌ All translations failed. Please check your LLM configuration.", "", None
154
+
155
+ # Format translations for display
156
+ display_text = ""
157
+ for lang_code, trans_survey in translations.items():
158
+ if "error" not in trans_survey:
159
+ lang_name = survey_trans._resolve_language(lang_code)
160
+ display_text += f"\n{'='*50}\n"
161
+ display_text += f"TRANSLATION: {lang_name.upper()}\n"
162
+ display_text += f"{'='*50}\n\n"
163
+ display_text += format_survey_display(trans_survey)
164
+
165
+ # Save to file for download
166
+ filepath = save_json_file(translations, "translations")
167
+
168
+ status = "\n".join(status_messages)
169
+ return status, display_text, filepath
170
+
171
+ except Exception as e:
172
+ error_msg = f"❌ Error during translation: {str(e)}"
173
+ print(f"Translation error: {traceback.format_exc()}")
174
+ return error_msg, "", None
175
+
176
+
177
+ def get_language_choices():
178
+ """Get language choices for dropdown"""
179
+ langs = survey_trans.get_supported_languages()
180
+ return [f"{code} - {name}" for code, name in langs.items()]
181
+
182
+
183
+ # ===========================
184
+ # Data Analysis Functions
185
+ # ===========================
186
+
187
+ def analyze_survey_data(responses_json: str, questions_json: str = None):
188
+ """Analyze survey responses"""
189
+ if not responses_json or not responses_json.strip():
190
+ return "❌ Please provide survey responses in JSON format.", "", None
191
+
192
+ try:
193
+ # Parse responses
194
+ responses = json.loads(responses_json)
195
+ questions = json.loads(questions_json) if questions_json and questions_json.strip() else None
196
+
197
+ if not isinstance(responses, list):
198
+ return "❌ Responses must be a JSON array.", "", None
199
+
200
+ if len(responses) == 0:
201
+ return "❌ No responses to analyze.", "", None
202
+
203
+ # Validate questions if provided
204
+ if questions and not isinstance(questions, list):
205
+ return "❌ Questions must be a JSON array.", "", None
206
+
207
+ # Run analysis
208
+ analysis_results = data_analyzer.analyze_responses(responses, questions)
209
+
210
+ if "error" in analysis_results:
211
+ return f"❌ Analysis error: {analysis_results['error']}", "", None
212
+
213
+ # Generate report
214
+ report_md = data_analyzer.generate_report(analysis_results, format="markdown")
215
+
216
+ # Save both JSON and Markdown
217
+ json_filepath = save_json_file(analysis_results, "analysis_results")
218
+ md_filepath = analysis_to_markdown_file(report_md, "analysis_report")
219
+
220
+ status_msg = f"βœ… Analysis complete! Analyzed {len(responses)} responses."
221
+ if questions:
222
+ status_msg += f" Considered {len(questions)} questions."
223
+
224
+ return status_msg, report_md, json_filepath
225
+
226
+ except json.JSONDecodeError as e:
227
+ return f"❌ Invalid JSON format: {str(e)}", "", None
228
+ except Exception as e:
229
+ error_msg = f"❌ Error during analysis: {str(e)}"
230
+ print(f"Analysis error: {traceback.format_exc()}")
231
+ return error_msg, "", None
232
+
233
+
234
+ def load_example_responses():
235
+ """Load example responses for demonstration"""
236
+ example = [
237
+ {
238
+ "q1": "The medication helped reduce my symptoms significantly within the first week.",
239
+ "q2": "I experienced some mild side effects like drowsiness in the beginning.",
240
+ "q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
241
+ },
242
+ {
243
+ "q1": "I didn't notice much improvement in my condition after taking the medication.",
244
+ "q2": "The side effects were quite severe and made it difficult to continue.",
245
+ "q3": "I had to stop taking it after two weeks due to adverse reactions."
246
+ },
247
+ {
248
+ "q1": "The medication worked well but took about 3-4 weeks to show results.",
249
+ "q2": "No major side effects, just some occasional nausea.",
250
+ "q3": "It's been effective for managing my symptoms on a daily basis."
251
+ }
252
+ ]
253
+ return json.dumps(example, indent=2)
254
+
255
+
256
+ # ===========================
257
+ # Gradio Interface
258
+ # ===========================
259
+
260
+ def create_interface():
261
+ """Create the main Gradio interface"""
262
+
263
+ with gr.Blocks(
264
+ title="ConversAI - Qualitative Research Assistant",
265
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
266
+ ) as app:
267
+
268
+ gr.Markdown("""
269
+ # ConversAI - Your AI-Powered Qualitative Research Assistant
270
+
271
+ Battle the blank page, reach global audiences, and uncover insights with AI assistance.
272
+ """)
273
+
274
+ with gr.Tabs() as tabs:
275
+
276
+ # ========== SURVEY GENERATION TAB ==========
277
+ with gr.Tab("πŸ“ Generate Survey"):
278
+ gr.Markdown("""
279
+ ## Battle the Blank Page
280
+ Share an outline and get AI-powered surveys drafted in minutes,
281
+ complete with industry best practices.
282
+ """)
283
+
284
+ with gr.Row():
285
+ with gr.Column(scale=1):
286
+ outline_input = gr.Textbox(
287
+ label="Your Survey Outline or Topic",
288
+ placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
289
+ lines=6
290
+ )
291
+
292
+ survey_type_input = gr.Radio(
293
+ label="Survey Type",
294
+ choices=["Qualitative", "Quantitative", "Mixed"],
295
+ value="Qualitative"
296
+ )
297
+
298
+ num_questions_input = gr.Slider(
299
+ label="Number of Questions",
300
+ minimum=5,
301
+ maximum=25,
302
+ value=10,
303
+ step=1
304
+ )
305
+
306
+ audience_input = gr.Textbox(
307
+ label="Target Audience",
308
+ placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
309
+ value="General audience"
310
+ )
311
+
312
+ generate_btn = gr.Button("πŸš€ Generate Survey", variant="primary", size="lg")
313
+
314
+ with gr.Column(scale=1):
315
+ gen_status = gr.Textbox(label="Status", interactive=False)
316
+ gen_output = gr.Markdown(label="Generated Survey")
317
+
318
+ gen_download = gr.File(label="Download Survey JSON", visible=False)
319
+
320
+ # Event handlers
321
+ generate_btn.click(
322
+ fn=generate_survey_from_outline,
323
+ inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
324
+ outputs=[gen_status, gen_output, gen_download]
325
+ ).then(
326
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
327
+ inputs=[gen_download],
328
+ outputs=[gen_download]
329
+ )
330
+
331
+ # ========== TRANSLATION TAB ==========
332
+ with gr.Tab("🌍 Translate Survey"):
333
+ gr.Markdown("""
334
+ ## Reach Global Audiences
335
+ Translate your surveys automatically to streamline efforts and reach wider audiences.
336
+ """)
337
+
338
+ with gr.Row():
339
+ with gr.Column(scale=1):
340
+ gr.Markdown("### Select Target Languages")
341
+
342
+ # Create checkboxes for popular languages
343
+ lang_checkboxes = gr.CheckboxGroup(
344
+ label="Languages",
345
+ choices=get_language_choices(),
346
+ value=[]
347
+ )
348
+
349
+ translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")
350
+
351
+ gr.Markdown("""
352
+ **Note:** Make sure you've generated a survey first, or upload one using the JSON format.
353
+ """)
354
+
355
+ with gr.Column(scale=1):
356
+ trans_status = gr.Textbox(label="Translation Status", interactive=False)
357
+ trans_output = gr.Markdown(label="Translations")
358
+
359
+ trans_download = gr.File(label="Download Translations JSON", visible=False)
360
+
361
+ # Event handlers
362
+ def extract_lang_codes(selected_items):
363
+ """Extract language codes from checkbox selections"""
364
+ return [item.split(" - ")[0] for item in selected_items]
365
+
366
+ translate_btn.click(
367
+ fn=lambda x: translate_current_survey(extract_lang_codes(x)),
368
+ inputs=[lang_checkboxes],
369
+ outputs=[trans_status, trans_output, trans_download]
370
+ ).then(
371
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
372
+ inputs=[trans_download],
373
+ outputs=[trans_download]
374
+ )
375
+
376
+ # ========== ANALYSIS TAB ==========
377
+ with gr.Tab("πŸ“Š Analyze Data"):
378
+ gr.Markdown("""
379
+ ## Uncover Key Insights
380
+ Upload your survey responses and get AI-assisted summaries of key findings,
381
+ themes, and trends.
382
+ """)
383
+
384
+ with gr.Row():
385
+ with gr.Column(scale=1):
386
+ responses_input = gr.Textbox(
387
+ label="Survey Responses (JSON)",
388
+ placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
389
+ lines=10
390
+ )
391
+
392
+ questions_input = gr.Textbox(
393
+ label="Questions (JSON, Optional)",
394
+ placeholder='[{"question_text": "What is your experience?", ...}]',
395
+ lines=5
396
+ )
397
+
398
+ with gr.Row():
399
+ analyze_btn = gr.Button("πŸ” Analyze Data", variant="primary", size="lg")
400
+ example_btn = gr.Button("Load Example", variant="secondary")
401
+
402
+ with gr.Column(scale=1):
403
+ analysis_status = gr.Textbox(label="Status", interactive=False)
404
+ analysis_output = gr.Markdown(label="Analysis Report")
405
+
406
+ analysis_download = gr.File(label="Download Analysis JSON", visible=False)
407
+
408
+ # Event handlers
409
+ analyze_btn.click(
410
+ fn=analyze_survey_data,
411
+ inputs=[responses_input, questions_input],
412
+ outputs=[analysis_status, analysis_output, analysis_download]
413
+ ).then(
414
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
415
+ inputs=[analysis_download],
416
+ outputs=[analysis_download]
417
+ )
418
+
419
+ example_btn.click(
420
+ fn=load_example_responses,
421
+ outputs=[responses_input]
422
+ )
423
+
424
+ # ========== ABOUT TAB ==========
425
+ with gr.Tab("ℹ️ About"):
426
+ gr.Markdown("""
427
+ ## About ConversAI
428
+
429
+ ConversAI is a comprehensive qualitative research assistant that helps you:
430
+
431
+ ### 🎯 Generate Surveys
432
+ - Create professional surveys from simple outlines
433
+ - Follow industry best practices automatically
434
+ - Save hours of questionnaire design time
435
+
436
+ ### 🌍 Translate Globally
437
+ - Reach audiences in 18+ languages
438
+ - Maintain cultural appropriateness
439
+ - Expand your research scope effortlessly
440
+
441
+ ### πŸ“Š Analyze Results
442
+ - Extract key themes automatically
443
+ - Identify patterns and trends
444
+ - Generate actionable insights
445
+
446
+ ### πŸ”§ Technical Details
447
+
448
+ **Supported LLM Providers:**
449
+ - OpenAI (GPT-4, GPT-3.5)
450
+ - Anthropic (Claude)
451
+ - HuggingFace Inference API
452
+ - LM Studio (local)
453
+
454
+ **Configuration:**
455
+ Set environment variables to configure your LLM provider:
456
+ - `OPENAI_API_KEY` - For OpenAI models
457
+ - `ANTHROPIC_API_KEY` - For Claude models
458
+ - `HUGGINGFACE_API_KEY` or `HF_TOKEN` - For HuggingFace
459
+ - `LM_STUDIO_URL` - For local LM Studio (default: http://192.168.1.245:1234/v1/chat/completions)
460
+
461
+ ### πŸ“„ Data Privacy
462
+
463
+ - All processing is done through your configured LLM provider
464
+ - No data is stored permanently by this application
465
+ - Survey data and responses remain in your control
466
+
467
+ ### πŸš€ Getting Started
468
+
469
+ 1. **Generate** a survey from your research outline
470
+ 2. **Translate** it to reach global audiences
471
+ 3. Collect responses from participants
472
+ 4. **Analyze** the data to uncover insights
473
+
474
+ ---
475
+
476
+ Built with ❀️ using Gradio and state-of-the-art LLMs
477
+ """)
478
+
479
+ return app
480
+
481
+
482
+ # ===========================
483
+ # Main Entry Point
484
+ # ===========================
485
+
486
+ if __name__ == "__main__":
487
+ demo = create_interface()
488
+
489
+ # Launch with appropriate settings
490
+ demo.launch(
491
+ server_name="0.0.0.0", # Allow external access
492
+ server_port=7860, # Standard HF Spaces port
493
+ share=False, # Don't create a public link (HF Spaces handles this)
494
+ show_error=True
495
+ )
data_analyzer.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Data Analysis Module - AI-assisted analysis of survey responses
3
+ """
4
+ import json
5
+ from typing import Dict, List, Optional
6
+ from collections import Counter
7
+ from llm_backend import LLMBackend
8
+
9
+
10
+ class DataAnalyzer:
11
+ """
12
+ Analyzes survey responses to uncover key findings, trends, and patterns.
13
+ Provides AI-assisted summaries for qualitative research data.
14
+ """
15
+
16
+ def __init__(self, llm_backend: LLMBackend):
17
+ self.llm = llm_backend
18
+
19
+ def analyze_responses(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
20
+ """
21
+ Comprehensive analysis of survey responses.
22
+
23
+ Args:
24
+ responses: List of response dictionaries
25
+ questions: Optional list of questions for context
26
+
27
+ Returns:
28
+ Analysis results including themes, sentiment, and insights
29
+ """
30
+ if not responses:
31
+ return {"error": "No responses to analyze"}
32
+
33
+ analysis = {
34
+ "summary": {},
35
+ "themes": [],
36
+ "sentiment": {},
37
+ "key_insights": [],
38
+ "response_count": len(responses)
39
+ }
40
+
41
+ # Generate overall summary
42
+ analysis["summary"] = self._generate_summary(responses, questions)
43
+
44
+ # Extract themes
45
+ analysis["themes"] = self._extract_themes(responses)
46
+
47
+ # Analyze sentiment
48
+ analysis["sentiment"] = self._analyze_sentiment(responses)
49
+
50
+ # Generate key insights
51
+ analysis["key_insights"] = self._generate_insights(responses, questions)
52
+
53
+ # Add quantitative stats if applicable
54
+ analysis["statistics"] = self._compute_statistics(responses, questions)
55
+
56
+ return analysis
57
+
58
+ def _generate_summary(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
59
+ """Generate an executive summary of responses"""
60
+ # Prepare context
61
+ response_texts = self._extract_text_responses(responses)
62
+ sample_size = min(50, len(response_texts)) # Use sample for large datasets
63
+ sample_responses = response_texts[:sample_size]
64
+
65
+ context = f"Total responses: {len(responses)}\n\n"
66
+ if questions:
67
+ context += "Questions asked:\n"
68
+ for i, q in enumerate(questions[:10], 1): # Limit to first 10 questions
69
+ context += f"{i}. {q.get('question_text', '')}\n"
70
+ context += "\n"
71
+
72
+ context += "Sample responses:\n"
73
+ for i, resp in enumerate(sample_responses, 1):
74
+ context += f"{i}. {resp[:200]}...\n" # Truncate long responses
75
+
76
+ prompt = f"""Analyze the following survey responses and provide an executive summary.
77
+
78
+ {context}
79
+
80
+ Provide a summary that includes:
81
+ 1. Overview: High-level summary of what the data shows (2-3 sentences)
82
+ 2. Key patterns: Main patterns or trends observed
83
+ 3. Notable findings: Interesting or unexpected discoveries
84
+ 4. Response quality: Assessment of response depth and engagement
85
+
86
+ Respond with a JSON object with these fields:
87
+ {{
88
+ "overview": "...",
89
+ "key_patterns": ["pattern 1", "pattern 2", ...],
90
+ "notable_findings": ["finding 1", "finding 2", ...],
91
+ "response_quality": "..."
92
+ }}"""
93
+
94
+ messages = [
95
+ {"role": "system", "content": self._get_analyst_system_prompt()},
96
+ {"role": "user", "content": prompt}
97
+ ]
98
+
99
+ try:
100
+ response = self.llm.generate(messages, max_tokens=1000, temperature=0.5)
101
+ return self._parse_json_response(response)
102
+ except Exception as e:
103
+ return {"error": f"Summary generation failed: {str(e)}"}
104
+
105
+ def _extract_themes(self, responses: List[Dict], num_themes: int = 5) -> List[Dict]:
106
+ """Extract main themes from responses using AI"""
107
+ response_texts = self._extract_text_responses(responses)
108
+
109
+ if not response_texts:
110
+ return []
111
+
112
+ # Sample for large datasets
113
+ sample_size = min(100, len(response_texts))
114
+ sample_responses = response_texts[:sample_size]
115
+
116
+ prompt = f"""Analyze the following {len(sample_responses)} survey responses and identify the top {num_themes} themes.
117
+
118
+ Responses:
119
+ {self._format_responses_for_prompt(sample_responses)}
120
+
121
+ For each theme, provide:
122
+ 1. Theme name: A short, descriptive name
123
+ 2. Description: What this theme represents
124
+ 3. Prevalence: Estimated percentage of responses mentioning this theme
125
+ 4. Example quotes: 2-3 representative quotes from the responses
126
+
127
+ Respond with a JSON array of theme objects:
128
+ [
129
+ {{
130
+ "theme_name": "...",
131
+ "description": "...",
132
+ "prevalence": "XX%",
133
+ "example_quotes": ["quote 1", "quote 2"]
134
+ }}
135
+ ]"""
136
+
137
+ messages = [
138
+ {"role": "system", "content": self._get_analyst_system_prompt()},
139
+ {"role": "user", "content": prompt}
140
+ ]
141
+
142
+ try:
143
+ response = self.llm.generate(messages, max_tokens=1500, temperature=0.6)
144
+ themes = self._parse_json_response(response)
145
+ if isinstance(themes, list):
146
+ return themes
147
+ return []
148
+ except Exception as e:
149
+ return [{"error": f"Theme extraction failed: {str(e)}"}]
150
+
151
+ def _analyze_sentiment(self, responses: List[Dict]) -> Dict:
152
+ """Analyze overall sentiment of responses"""
153
+ response_texts = self._extract_text_responses(responses)
154
+
155
+ if not response_texts:
156
+ return {}
157
+
158
+ # Sample for analysis
159
+ sample_size = min(100, len(response_texts))
160
+ sample_responses = response_texts[:sample_size]
161
+
162
+ prompt = f"""Analyze the sentiment of these {len(sample_responses)} survey responses.
163
+
164
+ Responses:
165
+ {self._format_responses_for_prompt(sample_responses)}
166
+
167
+ Provide sentiment analysis including:
168
+ 1. Overall sentiment: positive, negative, neutral, or mixed
169
+ 2. Sentiment distribution: Estimated percentage breakdown
170
+ 3. Emotional tone: Key emotions detected
171
+ 4. Intensity: How strong the sentiments are
172
+
173
+ Respond with JSON:
174
+ {{
175
+ "overall_sentiment": "...",
176
+ "distribution": {{
177
+ "positive": "XX%",
178
+ "neutral": "XX%",
179
+ "negative": "XX%"
180
+ }},
181
+ "emotions": ["emotion1", "emotion2", ...],
182
+ "intensity": "low|moderate|high"
183
+ }}"""
184
+
185
+ messages = [
186
+ {"role": "system", "content": self._get_analyst_system_prompt()},
187
+ {"role": "user", "content": prompt}
188
+ ]
189
+
190
+ try:
191
+ response = self.llm.generate(messages, max_tokens=500, temperature=0.4)
192
+ return self._parse_json_response(response)
193
+ except Exception as e:
194
+ return {"error": f"Sentiment analysis failed: {str(e)}"}
195
+
196
+ def _generate_insights(self, responses: List[Dict], questions: List[Dict] = None) -> List[str]:
197
+ """Generate actionable insights from the data"""
198
+ response_texts = self._extract_text_responses(responses)
199
+
200
+ if not response_texts:
201
+ return []
202
+
203
+ sample_size = min(100, len(response_texts))
204
+ sample_responses = response_texts[:sample_size]
205
+
206
+ context = f"Analyzing {len(responses)} survey responses.\n\n"
207
+ if questions:
208
+ context += "Research questions:\n"
209
+ for i, q in enumerate(questions[:5], 1):
210
+ context += f"{i}. {q.get('question_text', '')}\n"
211
+ context += "\n"
212
+
213
+ prompt = f"""{context}
214
+
215
+ Sample responses:
216
+ {self._format_responses_for_prompt(sample_responses)}
217
+
218
+ Based on this data, provide 5-7 key insights that would be valuable for:
219
+ - Understanding the target audience
220
+ - Identifying opportunities or challenges
221
+ - Informing strategic decisions
222
+ - Recognizing patterns or trends
223
+
224
+ Each insight should be:
225
+ - Specific and actionable
226
+ - Supported by the data
227
+ - Clear and concise
228
+
229
+ Respond with a JSON array of insight strings:
230
+ ["insight 1", "insight 2", ...]"""
231
+
232
+ messages = [
233
+ {"role": "system", "content": self._get_analyst_system_prompt()},
234
+ {"role": "user", "content": prompt}
235
+ ]
236
+
237
+ try:
238
+ response = self.llm.generate(messages, max_tokens=1000, temperature=0.6)
239
+ insights = self._parse_json_response(response)
240
+ if isinstance(insights, list):
241
+ return insights
242
+ return []
243
+ except Exception as e:
244
+ return [f"Insight generation failed: {str(e)}"]
245
+
246
+ def _compute_statistics(self, responses: List[Dict], questions: List[Dict] = None) -> Dict:
247
+ """Compute basic statistics from responses"""
248
+ stats = {
249
+ "total_responses": len(responses),
250
+ "response_lengths": {},
251
+ "completion_rate": "N/A"
252
+ }
253
+
254
+ # Calculate average response length
255
+ response_texts = self._extract_text_responses(responses)
256
+ if response_texts:
257
+ lengths = [len(r.split()) for r in response_texts]
258
+ stats["response_lengths"] = {
259
+ "avg_words": sum(lengths) / len(lengths),
260
+ "min_words": min(lengths),
261
+ "max_words": max(lengths)
262
+ }
263
+
264
+ # Calculate completion rate if questions are provided
265
+ if questions:
266
+ total_questions = len(questions)
267
+ completed_questions = 0
268
+ for response in responses:
269
+ if isinstance(response, dict):
270
+ completed_questions += len([v for v in response.values() if v])
271
+
272
+ if total_questions > 0:
273
+ completion_rate = (completed_questions / (total_questions * len(responses))) * 100
274
+ stats["completion_rate"] = f"{completion_rate:.1f}%"
275
+
276
+ return stats
277
+
278
+ def generate_report(self, analysis_results: Dict, format: str = "markdown") -> str:
279
+ """
280
+ Generate a formatted report from analysis results.
281
+
282
+ Args:
283
+ analysis_results: Results from analyze_responses()
284
+ format: Output format (markdown, text, html)
285
+
286
+ Returns:
287
+ Formatted report string
288
+ """
289
+ if format == "markdown":
290
+ return self._generate_markdown_report(analysis_results)
291
+ elif format == "html":
292
+ return self._generate_html_report(analysis_results)
293
+ else:
294
+ return self._generate_text_report(analysis_results)
295
+
296
+ def _generate_markdown_report(self, results: Dict) -> str:
297
+ """Generate markdown formatted report"""
298
+ report = "# Survey Analysis Report\n\n"
299
+
300
+ # Summary section
301
+ if "summary" in results and results["summary"]:
302
+ report += "## Executive Summary\n\n"
303
+ summary = results["summary"]
304
+ if "overview" in summary:
305
+ report += f"{summary['overview']}\n\n"
306
+ if "key_patterns" in summary:
307
+ report += "### Key Patterns\n"
308
+ for pattern in summary["key_patterns"]:
309
+ report += f"- {pattern}\n"
310
+ report += "\n"
311
+
312
+ # Statistics
313
+ if "statistics" in results:
314
+ report += "## Response Statistics\n\n"
315
+ stats = results["statistics"]
316
+ report += f"- Total Responses: {stats.get('total_responses', 'N/A')}\n"
317
+ if "response_lengths" in stats:
318
+ rl = stats["response_lengths"]
319
+ report += f"- Average Response Length: {rl.get('avg_words', 0):.1f} words\n"
320
+ report += f"- Completion Rate: {stats.get('completion_rate', 'N/A')}\n\n"
321
+
322
+ # Themes
323
+ if "themes" in results and results["themes"]:
324
+ report += "## Main Themes\n\n"
325
+ for i, theme in enumerate(results["themes"], 1):
326
+ if isinstance(theme, dict) and "theme_name" in theme:
327
+ report += f"### {i}. {theme['theme_name']}\n"
328
+ report += f"{theme.get('description', '')}\n\n"
329
+ report += f"**Prevalence:** {theme.get('prevalence', 'N/A')}\n\n"
330
+ if "example_quotes" in theme:
331
+ report += "**Example quotes:**\n"
332
+ for quote in theme["example_quotes"]:
333
+ report += f"> {quote}\n"
334
+ report += "\n"
335
+
336
+ # Sentiment
337
+ if "sentiment" in results and results["sentiment"]:
338
+ report += "## Sentiment Analysis\n\n"
339
+ sent = results["sentiment"]
340
+ report += f"**Overall Sentiment:** {sent.get('overall_sentiment', 'N/A')}\n\n"
341
+ if "distribution" in sent:
342
+ report += "**Distribution:**\n"
343
+ for key, value in sent["distribution"].items():
344
+ report += f"- {key.title()}: {value}\n"
345
+ report += "\n"
346
+
347
+ # Key Insights
348
+ if "key_insights" in results and results["key_insights"]:
349
+ report += "## Key Insights\n\n"
350
+ for i, insight in enumerate(results["key_insights"], 1):
351
+ report += f"{i}. {insight}\n"
352
+ report += "\n"
353
+
354
+ return report
355
+
356
+ def _generate_text_report(self, results: Dict) -> str:
357
+ """Generate plain text report"""
358
+ # Similar to markdown but without formatting
359
+ return self._generate_markdown_report(results).replace("#", "").replace("**", "").replace(">", "")
360
+
361
+ def _generate_html_report(self, results: Dict) -> str:
362
+ """Generate HTML report"""
363
+ # Convert markdown to basic HTML
364
+ md_report = self._generate_markdown_report(results)
365
+ # Basic conversion (for production, use a proper markdown-to-html library)
366
+ html = md_report.replace("# ", "<h1>").replace("\n\n", "</p>\n<p>")
367
+ return f"<html><body>{html}</body></html>"
368
+
369
+ def _get_analyst_system_prompt(self) -> str:
370
+ """System prompt for analysis tasks"""
371
+ return """You are an expert qualitative research analyst with deep expertise in:
372
+ - Thematic analysis and coding
373
+ - Sentiment analysis and emotional intelligence
374
+ - Pattern recognition in qualitative data
375
+ - Insight generation and strategic thinking
376
+ - Survey research methodology
377
+
378
+ Your analyses should be:
379
+ - Objective and evidence-based
380
+ - Nuanced and comprehensive
381
+ - Actionable and clear
382
+ - Grounded in the actual data provided
383
+
384
+ Always respond with valid JSON when requested."""
385
+
386
+ def _extract_text_responses(self, responses: List[Dict]) -> List[str]:
387
+ """Extract text from response objects"""
388
+ texts = []
389
+ for response in responses:
390
+ if isinstance(response, dict):
391
+ # Extract all string values
392
+ for value in response.values():
393
+ if isinstance(value, str) and value.strip():
394
+ texts.append(value.strip())
395
+ elif isinstance(response, str):
396
+ texts.append(response.strip())
397
+ return texts
398
+
399
+ def _format_responses_for_prompt(self, responses: List[str], max_responses: int = 50) -> str:
400
+ """Format responses for inclusion in prompt"""
401
+ formatted = []
402
+ for i, resp in enumerate(responses[:max_responses], 1):
403
+ # Truncate very long responses
404
+ truncated = resp[:300] + "..." if len(resp) > 300 else resp
405
+ formatted.append(f"{i}. {truncated}")
406
+ return "\n".join(formatted)
407
+
408
+ def _parse_json_response(self, response: str):
409
+ """Parse JSON from LLM response"""
410
+ response = response.strip()
411
+
412
+ # Handle code blocks
413
+ if "```json" in response:
414
+ start = response.find("```json") + 7
415
+ end = response.find("```", start)
416
+ response = response[start:end].strip()
417
+ elif "```" in response:
418
+ start = response.find("```") + 3
419
+ end = response.find("```", start)
420
+ response = response[start:end].strip()
421
+
422
+ try:
423
+ return json.loads(response)
424
+ except json.JSONDecodeError:
425
+ # Try to find JSON object or array
426
+ if "{" in response:
427
+ start = response.find("{")
428
+ end = response.rfind("}") + 1
429
+ return json.loads(response[start:end])
430
+ elif "[" in response:
431
+ start = response.find("[")
432
+ end = response.rfind("]") + 1
433
+ return json.loads(response[start:end])
434
+ raise
export_utils.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Export Utilities - Handle various export formats
3
+ """
4
+ import json
5
+ import csv
6
+ import io
7
+ from typing import Dict, List
8
+ from datetime import datetime
9
+
10
+
11
+ def save_json_file(data: Dict, prefix: str = "export") -> str:
12
+ """
13
+ Save data to JSON file and return filepath.
14
+
15
+ Args:
16
+ data: Data to save
17
+ prefix: Filename prefix
18
+
19
+ Returns:
20
+ Path to saved file
21
+ """
22
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
23
+ filename = f"{prefix}_{timestamp}.json"
24
+
25
+ with open(filename, 'w', encoding='utf-8') as f:
26
+ json.dump(data, f, indent=2, ensure_ascii=False)
27
+
28
+ return filename
29
+
30
+
31
+ def survey_to_csv(survey_data: Dict) -> str:
32
+ """
33
+ Convert survey to CSV format (one row per question).
34
+
35
+ Args:
36
+ survey_data: Survey dictionary
37
+
38
+ Returns:
39
+ Path to CSV file
40
+ """
41
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
42
+ filename = f"survey_{timestamp}.csv"
43
+
44
+ with open(filename, 'w', newline='', encoding='utf-8') as f:
45
+ writer = csv.writer(f)
46
+
47
+ # Write header
48
+ writer.writerow(['Question ID', 'Question Text', 'Type', 'Options', 'Required', 'Help Text'])
49
+
50
+ # Write questions
51
+ for q in survey_data.get('questions', []):
52
+ writer.writerow([
53
+ q.get('id', ''),
54
+ q.get('question_text', ''),
55
+ q.get('question_type', ''),
56
+ '; '.join(q.get('options', [])) if q.get('options') else '',
57
+ 'Yes' if q.get('required', False) else 'No',
58
+ q.get('help_text', '')
59
+ ])
60
+
61
+ return filename
62
+
63
+
64
+ def responses_to_csv(responses: List[Dict], filename_prefix: str = "responses") -> str:
65
+ """
66
+ Convert responses to CSV format.
67
+
68
+ Args:
69
+ responses: List of response dictionaries
70
+ filename_prefix: Prefix for filename
71
+
72
+ Returns:
73
+ Path to CSV file
74
+ """
75
+ if not responses:
76
+ return None
77
+
78
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
79
+ filename = f"{filename_prefix}_{timestamp}.csv"
80
+
81
+ # Get all unique keys from all responses
82
+ all_keys = set()
83
+ for response in responses:
84
+ if isinstance(response, dict):
85
+ all_keys.update(response.keys())
86
+
87
+ fieldnames = sorted(all_keys)
88
+
89
+ with open(filename, 'w', newline='', encoding='utf-8') as f:
90
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
91
+ writer.writeheader()
92
+
93
+ for response in responses:
94
+ if isinstance(response, dict):
95
+ writer.writerow(response)
96
+
97
+ return filename
98
+
99
+
100
+ def analysis_to_markdown_file(analysis_report: str, prefix: str = "analysis_report") -> str:
101
+ """
102
+ Save analysis report to markdown file.
103
+
104
+ Args:
105
+ analysis_report: Markdown formatted report
106
+ prefix: Filename prefix
107
+
108
+ Returns:
109
+ Path to markdown file
110
+ """
111
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
112
+ filename = f"{prefix}_{timestamp}.md"
113
+
114
+ with open(filename, 'w', encoding='utf-8') as f:
115
+ f.write(analysis_report)
116
+
117
+ return filename
118
+
119
+
120
+ def create_survey_package(survey_data: Dict) -> Dict[str, str]:
121
+ """
122
+ Create a complete package of survey files (JSON, CSV, etc.).
123
+
124
+ Args:
125
+ survey_data: Survey dictionary
126
+
127
+ Returns:
128
+ Dictionary mapping format to filepath
129
+ """
130
+ package = {}
131
+
132
+ # Save JSON
133
+ package['json'] = save_json_file(survey_data, "survey")
134
+
135
+ # Save CSV
136
+ package['csv'] = survey_to_csv(survey_data)
137
+
138
+ return package
llm_backend.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM Backend for ConversAI - Supports multiple providers
3
+ """
4
+ import os
5
+ import requests
6
+ import json
7
+ from typing import List, Dict, Optional
8
+ from enum import Enum
9
+
10
+
11
+ class LLMProvider(Enum):
12
+ """Supported LLM providers"""
13
+ OPENAI = "openai"
14
+ ANTHROPIC = "anthropic"
15
+ HUGGINGFACE = "huggingface"
16
+ LM_STUDIO = "lm_studio"
17
+
18
+
19
+ class LLMBackend:
20
+ """
21
+ Unified interface for multiple LLM providers.
22
+ Supports OpenAI, Anthropic, HuggingFace Inference API, and LM Studio.
23
+ """
24
+
25
+ def __init__(self, provider: LLMProvider = None, api_key: str = None, model: str = None):
26
+ """
27
+ Initialize LLM backend with specified provider.
28
+
29
+ Args:
30
+ provider: LLM provider to use (defaults to env var or LM_STUDIO)
31
+ api_key: API key for the provider (reads from env if not provided)
32
+ model: Model name to use (provider-specific defaults if not provided)
33
+ """
34
+ # Determine provider
35
+ if provider is None:
36
+ provider_str = os.getenv("LLM_PROVIDER", "lm_studio").lower()
37
+ self.provider = LLMProvider(provider_str)
38
+ else:
39
+ self.provider = provider
40
+
41
+ # Set API key
42
+ if api_key:
43
+ self.api_key = api_key
44
+ else:
45
+ if self.provider == LLMProvider.OPENAI:
46
+ self.api_key = os.getenv("OPENAI_API_KEY")
47
+ elif self.provider == LLMProvider.ANTHROPIC:
48
+ self.api_key = os.getenv("ANTHROPIC_API_KEY")
49
+ elif self.provider == LLMProvider.HUGGINGFACE:
50
+ self.api_key = os.getenv("HUGGINGFACE_API_KEY")
51
+ else:
52
+ self.api_key = None
53
+
54
+ # Set model
55
+ if model:
56
+ self.model = model
57
+ else:
58
+ self.model = self._get_default_model()
59
+
60
+ # Set API endpoint
61
+ self.api_url = self._get_api_url()
62
+
63
+ def _get_default_model(self) -> str:
64
+ """Get default model for each provider"""
65
+ defaults = {
66
+ LLMProvider.OPENAI: "gpt-4o-mini",
67
+ LLMProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
68
+ LLMProvider.HUGGINGFACE: "mistralai/Mixtral-8x7B-Instruct-v0.1",
69
+ LLMProvider.LM_STUDIO: "google/gemma-3-27b"
70
+ }
71
+ return os.getenv("LLM_MODEL", defaults[self.provider])
72
+
73
+ def _get_api_url(self) -> str:
74
+ """Get API URL for each provider"""
75
+ if self.provider == LLMProvider.OPENAI:
76
+ return "https://api.openai.com/v1/chat/completions"
77
+ elif self.provider == LLMProvider.ANTHROPIC:
78
+ return "https://api.anthropic.com/v1/messages"
79
+ elif self.provider == LLMProvider.HUGGINGFACE:
80
+ return f"https://api-inference.huggingface.co/models/{self.model}"
81
+ elif self.provider == LLMProvider.LM_STUDIO:
82
+ return os.getenv("LM_STUDIO_URL", "http://192.168.1.245:1234/v1/chat/completions")
83
+
84
+ def generate(self,
85
+ messages: List[Dict[str, str]],
86
+ max_tokens: int = 1000,
87
+ temperature: float = 0.7,
88
+ json_mode: bool = False) -> str:
89
+ """
90
+ Generate completion from messages.
91
+
92
+ Args:
93
+ messages: List of message dicts with 'role' and 'content'
94
+ max_tokens: Maximum tokens to generate
95
+ temperature: Sampling temperature
96
+ json_mode: Whether to request JSON output (supported by some providers)
97
+
98
+ Returns:
99
+ Generated text response
100
+ """
101
+ try:
102
+ if self.provider == LLMProvider.OPENAI:
103
+ return self._generate_openai(messages, max_tokens, temperature, json_mode)
104
+ elif self.provider == LLMProvider.ANTHROPIC:
105
+ return self._generate_anthropic(messages, max_tokens, temperature)
106
+ elif self.provider == LLMProvider.HUGGINGFACE:
107
+ return self._generate_huggingface(messages, max_tokens, temperature)
108
+ elif self.provider == LLMProvider.LM_STUDIO:
109
+ return self._generate_lm_studio(messages, max_tokens, temperature)
110
+ except Exception as e:
111
+ raise Exception(f"LLM generation failed: {str(e)}")
112
+
113
+ def _generate_openai(self, messages, max_tokens, temperature, json_mode) -> str:
114
+ """Generate using OpenAI API"""
115
+ headers = {
116
+ "Authorization": f"Bearer {self.api_key}",
117
+ "Content-Type": "application/json"
118
+ }
119
+
120
+ payload = {
121
+ "model": self.model,
122
+ "messages": messages,
123
+ "max_tokens": max_tokens,
124
+ "temperature": temperature
125
+ }
126
+
127
+ if json_mode:
128
+ payload["response_format"] = {"type": "json_object"}
129
+
130
+ response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
131
+ response.raise_for_status()
132
+
133
+ data = response.json()
134
+ return data["choices"][0]["message"]["content"]
135
+
136
+ def _generate_anthropic(self, messages, max_tokens, temperature) -> str:
137
+ """Generate using Anthropic API"""
138
+ headers = {
139
+ "x-api-key": self.api_key,
140
+ "anthropic-version": "2023-06-01",
141
+ "Content-Type": "application/json"
142
+ }
143
+
144
+ # Convert messages format (extract system message if present)
145
+ system_message = None
146
+ converted_messages = []
147
+
148
+ for msg in messages:
149
+ if msg["role"] == "system":
150
+ system_message = msg["content"]
151
+ else:
152
+ converted_messages.append(msg)
153
+
154
+ payload = {
155
+ "model": self.model,
156
+ "messages": converted_messages,
157
+ "max_tokens": max_tokens,
158
+ "temperature": temperature
159
+ }
160
+
161
+ if system_message:
162
+ payload["system"] = system_message
163
+
164
+ response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
165
+ response.raise_for_status()
166
+
167
+ data = response.json()
168
+ return data["content"][0]["text"]
169
+
170
+ def _generate_huggingface(self, messages, max_tokens, temperature) -> str:
171
+ """Generate using HuggingFace Inference API"""
172
+ headers = {
173
+ "Authorization": f"Bearer {self.api_key}",
174
+ "Content-Type": "application/json"
175
+ }
176
+
177
+ # Convert messages to prompt
178
+ prompt = self._messages_to_prompt(messages)
179
+
180
+ payload = {
181
+ "inputs": prompt,
182
+ "parameters": {
183
+ "max_new_tokens": max_tokens,
184
+ "temperature": temperature,
185
+ "return_full_text": False
186
+ }
187
+ }
188
+
189
+ response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
190
+ response.raise_for_status()
191
+
192
+ data = response.json()
193
+ if isinstance(data, list) and len(data) > 0:
194
+ return data[0].get("generated_text", "")
195
+ return ""
196
+
197
+ def _generate_lm_studio(self, messages, max_tokens, temperature) -> str:
198
+ """Generate using LM Studio local API"""
199
+ payload = {
200
+ "model": self.model,
201
+ "messages": messages,
202
+ "max_tokens": max_tokens,
203
+ "temperature": temperature
204
+ }
205
+
206
+ response = requests.post(self.api_url, json=payload, timeout=60)
207
+ response.raise_for_status()
208
+
209
+ data = response.json()
210
+ return data["choices"][0]["message"]["content"]
211
+
212
+ def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
213
+ """Convert message format to simple prompt"""
214
+ prompt_parts = []
215
+ for msg in messages:
216
+ role = msg["role"].capitalize()
217
+ content = msg["content"]
218
+ prompt_parts.append(f"{role}: {content}")
219
+ prompt_parts.append("Assistant:")
220
+ return "\n\n".join(prompt_parts)
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- gradio==4.38.1
2
- requests==2.32.3
3
  pandas==2.2.2
 
1
+ gradio==5.45.0
2
+ requests==2.32.3
3
  pandas==2.2.2
survey_generator.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Survey Generation Module - Generate AI-powered surveys from outlines
3
+ """
4
+ import json
5
+ from typing import List, Dict, Optional
6
+ from llm_backend import LLMBackend
7
+
8
+
9
+ class SurveyGenerator:
10
+ """
11
+ Generates professional surveys from user outlines using AI.
12
+ Follows industry best practices for qualitative research.
13
+ """
14
+
15
+ def __init__(self, llm_backend: LLMBackend):
16
+ self.llm = llm_backend
17
+
18
+ def generate_survey(self,
19
+ outline: str,
20
+ survey_type: str = "qualitative",
21
+ num_questions: int = 10,
22
+ target_audience: str = "general") -> Dict:
23
+ """
24
+ Generate a complete survey from an outline.
25
+
26
+ Args:
27
+ outline: User's outline or topic description
28
+ survey_type: Type of survey (qualitative, quantitative, mixed)
29
+ num_questions: Target number of questions
30
+ target_audience: Description of target respondents
31
+
32
+ Returns:
33
+ Dict containing survey metadata and questions
34
+ """
35
+ prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience)
36
+
37
+ messages = [
38
+ {"role": "system", "content": self._get_system_prompt()},
39
+ {"role": "user", "content": prompt}
40
+ ]
41
+
42
+ try:
43
+ response = self.llm.generate(messages, max_tokens=2000, temperature=0.7)
44
+ survey_data = self._parse_survey_response(response)
45
+
46
+ # Add metadata
47
+ survey_data["metadata"] = {
48
+ "outline": outline,
49
+ "survey_type": survey_type,
50
+ "target_audience": target_audience,
51
+ "generated_question_count": len(survey_data.get("questions", []))
52
+ }
53
+
54
+ return survey_data
55
+
56
+ except Exception as e:
57
+ raise Exception(f"Survey generation failed: {str(e)}")
58
+
59
+ def _get_system_prompt(self) -> str:
60
+ """System prompt for survey generation"""
61
+ return """You are an expert survey designer and qualitative researcher with deep knowledge of:
62
+ - Industry best practices for survey design
63
+ - Question formulation techniques (open-ended, closed-ended, Likert scales)
64
+ - Avoiding bias and leading questions
65
+ - Survey flow and respondent experience
66
+ - Research methodologies (interviews, focus groups, ethnographic studies)
67
+
68
+ Your task is to generate professional, well-structured surveys that will yield high-quality research data.
69
+ Follow these principles:
70
+ 1. Use clear, unambiguous language
71
+ 2. Avoid double-barreled questions
72
+ 3. Include a logical flow from general to specific
73
+ 4. Balance open-ended and structured questions appropriately
74
+ 5. Consider the respondent's cognitive load
75
+ 6. Include screening questions when relevant
76
+ 7. Add instructions and context where helpful
77
+
78
+ Always respond with valid JSON containing the survey structure."""
79
+
80
+ def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
81
+ """Build the user prompt for survey generation"""
82
+ return f"""Generate a professional {survey_type} survey based on the following outline:
83
+
84
+ OUTLINE:
85
+ {outline}
86
+
87
+ REQUIREMENTS:
88
+ - Target number of questions: {num_questions}
89
+ - Target audience: {target_audience}
90
+ - Survey type: {survey_type}
91
+
92
+ Please generate a complete survey with:
93
+ 1. A clear title
94
+ 2. An introduction/welcome message
95
+ 3. Well-crafted questions following best practices
96
+ 4. Appropriate question types for the research goals
97
+ 5. A thank you/closing message
98
+
99
+ Respond with a JSON object in this exact format:
100
+ {{
101
+ "title": "Survey Title",
102
+ "introduction": "Welcome message and instructions",
103
+ "questions": [
104
+ {{
105
+ "id": 1,
106
+ "question_text": "The question to ask",
107
+ "question_type": "open_ended|multiple_choice|likert_scale|yes_no|rating",
108
+ "options": ["option1", "option2"],
109
+ "required": true|false,
110
+ "help_text": "Optional clarification"
111
+ }}
112
+ ],
113
+ "closing": "Thank you message"
114
+ }}
115
+
116
+ For open-ended questions, omit the "options" field.
117
+ For multiple choice and Likert questions, include appropriate options.
118
+ Ensure questions follow best practices and are unbiased."""
119
+
120
+ def _parse_survey_response(self, response: str) -> Dict:
121
+ """Parse LLM response into survey structure"""
122
+ # Try to extract JSON from response
123
+ response = response.strip()
124
+
125
+ # Handle code blocks
126
+ if "```json" in response:
127
+ start = response.find("```json") + 7
128
+ end = response.find("```", start)
129
+ response = response[start:end].strip()
130
+ elif "```" in response:
131
+ start = response.find("```") + 3
132
+ end = response.find("```", start)
133
+ response = response[start:end].strip()
134
+
135
+ try:
136
+ survey_data = json.loads(response)
137
+
138
+ # Validate required fields
139
+ required_fields = ["title", "introduction", "questions", "closing"]
140
+ for field in required_fields:
141
+ if field not in survey_data:
142
+ raise ValueError(f"Missing required field: {field}")
143
+
144
+ # Validate questions
145
+ if not isinstance(survey_data["questions"], list) or len(survey_data["questions"]) == 0:
146
+ raise ValueError("Survey must contain at least one question")
147
+
148
+ return survey_data
149
+
150
+ except json.JSONDecodeError as e:
151
+ raise Exception(f"Failed to parse survey JSON: {str(e)}\nResponse: {response}")
152
+
153
+ def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
154
+ """
155
+ Refine a single survey question.
156
+
157
+ Args:
158
+ question: The question to improve
159
+ improvement_type: Type of improvement (clarity, neutrality, specificity)
160
+
161
+ Returns:
162
+ Improved question text
163
+ """
164
+ prompt = f"""Improve the following survey question for better {improvement_type}:
165
+
166
+ Original Question: {question}
167
+
168
+ Provide an improved version that:
169
+ - {"Is clearer and easier to understand" if improvement_type == "clarity" else ""}
170
+ - {"Removes bias and leading language" if improvement_type == "neutrality" else ""}
171
+ - {"Is more specific and actionable" if improvement_type == "specificity" else ""}
172
+
173
+ Respond with only the improved question text, no explanation."""
174
+
175
+ messages = [
176
+ {"role": "system", "content": "You are an expert survey question designer."},
177
+ {"role": "user", "content": prompt}
178
+ ]
179
+
180
+ return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip()
181
+
182
+ def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
183
+ """
184
+ Generate follow-up questions for deeper exploration.
185
+
186
+ Args:
187
+ base_question: The main question
188
+ num_follow_ups: Number of follow-up questions to generate
189
+
190
+ Returns:
191
+ List of follow-up question texts
192
+ """
193
+ prompt = f"""Generate {num_follow_ups} follow-up questions for this main question:
194
+
195
+ Main Question: {base_question}
196
+
197
+ The follow-up questions should:
198
+ 1. Probe deeper into the topic
199
+ 2. Explore different aspects or dimensions
200
+ 3. Encourage detailed responses
201
+ 4. Follow a logical progression
202
+
203
+ Respond with a JSON array of question strings."""
204
+
205
+ messages = [
206
+ {"role": "system", "content": "You are an expert in qualitative research interviews."},
207
+ {"role": "user", "content": prompt}
208
+ ]
209
+
210
+ response = self.llm.generate(messages, max_tokens=500, temperature=0.7)
211
+
212
+ try:
213
+ # Extract JSON array
214
+ if "[" in response:
215
+ start = response.find("[")
216
+ end = response.rfind("]") + 1
217
+ follow_ups = json.loads(response[start:end])
218
+ return follow_ups[:num_follow_ups]
219
+ except:
220
+ pass
221
+
222
+ # Fallback: split by newlines
223
+ lines = [line.strip() for line in response.split("\n") if line.strip()]
224
+ return [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]
survey_translator.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Survey Translation Module - Translate surveys to reach wider audiences
3
+ """
4
+ import json
5
+ from typing import Dict, List
6
+ from llm_backend import LLMBackend
7
+
8
+
9
+ class SurveyTranslator:
10
+ """
11
+ Translates surveys into multiple languages while preserving
12
+ meaning, context, and cultural appropriateness.
13
+ """
14
+
15
+ # Common target languages for research
16
+ SUPPORTED_LANGUAGES = {
17
+ "es": "Spanish",
18
+ "fr": "French",
19
+ "de": "German",
20
+ "pt": "Portuguese",
21
+ "it": "Italian",
22
+ "zh": "Chinese (Simplified)",
23
+ "ja": "Japanese",
24
+ "ko": "Korean",
25
+ "ar": "Arabic",
26
+ "hi": "Hindi",
27
+ "ru": "Russian",
28
+ "nl": "Dutch",
29
+ "sv": "Swedish",
30
+ "pl": "Polish",
31
+ "tr": "Turkish",
32
+ "vi": "Vietnamese",
33
+ "th": "Thai",
34
+ "id": "Indonesian"
35
+ }
36
+
37
+ def __init__(self, llm_backend: LLMBackend):
38
+ self.llm = llm_backend
39
+
40
+ def translate_survey(self, survey_data: Dict, target_language: str) -> Dict:
41
+ """
42
+ Translate an entire survey to a target language.
43
+
44
+ Args:
45
+ survey_data: Survey dictionary with title, introduction, questions, closing
46
+ target_language: Target language code (e.g., 'es', 'fr') or full name
47
+
48
+ Returns:
49
+ Translated survey dictionary with same structure
50
+ """
51
+ # Resolve language name
52
+ language_name = self._resolve_language(target_language)
53
+
54
+ if not language_name:
55
+ raise ValueError(f"Unsupported language: {target_language}")
56
+
57
+ # Create a copy of the survey data
58
+ translated_survey = survey_data.copy()
59
+
60
+ # Translate main fields
61
+ translated_survey["title"] = self._translate_text(
62
+ survey_data.get("title", ""),
63
+ language_name,
64
+ context="survey title"
65
+ )
66
+
67
+ translated_survey["introduction"] = self._translate_text(
68
+ survey_data.get("introduction", ""),
69
+ language_name,
70
+ context="survey introduction"
71
+ )
72
+
73
+ translated_survey["closing"] = self._translate_text(
74
+ survey_data.get("closing", ""),
75
+ language_name,
76
+ context="survey closing message"
77
+ )
78
+
79
+ # Translate questions
80
+ translated_questions = []
81
+ for question in survey_data.get("questions", []):
82
+ translated_q = self._translate_question(question, language_name)
83
+ translated_questions.append(translated_q)
84
+
85
+ translated_survey["questions"] = translated_questions
86
+
87
+ # Add translation metadata
88
+ if "metadata" not in translated_survey:
89
+ translated_survey["metadata"] = {}
90
+ translated_survey["metadata"]["translated_to"] = language_name
91
+ translated_survey["metadata"]["original_language"] = "English"
92
+
93
+ return translated_survey
94
+
95
+ def translate_batch(self, survey_data: Dict, target_languages: List[str]) -> Dict[str, Dict]:
96
+ """
97
+ Translate survey to multiple languages.
98
+
99
+ Args:
100
+ survey_data: Original survey data
101
+ target_languages: List of target language codes
102
+
103
+ Returns:
104
+ Dictionary mapping language codes to translated surveys
105
+ """
106
+ translations = {}
107
+
108
+ for lang_code in target_languages:
109
+ try:
110
+ translated = self.translate_survey(survey_data, lang_code)
111
+ translations[lang_code] = translated
112
+ except Exception as e:
113
+ translations[lang_code] = {"error": str(e)}
114
+
115
+ return translations
116
+
117
+ def _resolve_language(self, language: str) -> str:
118
+ """Resolve language code or name to full name"""
119
+ language = language.strip().lower()
120
+
121
+ # Check if it's a code
122
+ if language in self.SUPPORTED_LANGUAGES:
123
+ return self.SUPPORTED_LANGUAGES[language]
124
+
125
+ # Check if it's a full name
126
+ for code, name in self.SUPPORTED_LANGUAGES.items():
127
+ if name.lower() == language:
128
+ return name
129
+
130
+ # Return as-is if not found (LLM might still handle it)
131
+ return language.title()
132
+
133
+ def _translate_text(self, text: str, target_language: str, context: str = "") -> str:
134
+ """
135
+ Translate a piece of text with context awareness.
136
+
137
+ Args:
138
+ text: Text to translate
139
+ target_language: Target language name
140
+ context: Context for better translation (e.g., "survey question")
141
+
142
+ Returns:
143
+ Translated text
144
+ """
145
+ if not text or not text.strip():
146
+ return text
147
+
148
+ context_note = f" (this is a {context})" if context else ""
149
+
150
+ prompt = f"""Translate the following text to {target_language}{context_note}.
151
+
152
+ Maintain:
153
+ - The original meaning and nuance
154
+ - Professional and respectful tone
155
+ - Cultural appropriateness
156
+ - Any formatting or structure
157
+
158
+ Original text:
159
+ {text}
160
+
161
+ Provide only the translation, no explanations or notes."""
162
+
163
+ messages = [
164
+ {"role": "system", "content": self._get_translation_system_prompt()},
165
+ {"role": "user", "content": prompt}
166
+ ]
167
+
168
+ try:
169
+ translation = self.llm.generate(messages, max_tokens=1000, temperature=0.3)
170
+ return translation.strip()
171
+ except Exception as e:
172
+ raise Exception(f"Translation failed: {str(e)}")
173
+
174
+ def _translate_question(self, question: Dict, target_language: str) -> Dict:
175
+ """
176
+ Translate a single question with all its components.
177
+
178
+ Args:
179
+ question: Question dictionary
180
+ target_language: Target language name
181
+
182
+ Returns:
183
+ Translated question dictionary
184
+ """
185
+ translated_q = question.copy()
186
+
187
+ # Translate question text
188
+ translated_q["question_text"] = self._translate_text(
189
+ question.get("question_text", ""),
190
+ target_language,
191
+ context="survey question"
192
+ )
193
+
194
+ # Translate options if present
195
+ if "options" in question and question["options"]:
196
+ translated_options = []
197
+ for option in question["options"]:
198
+ translated_option = self._translate_text(
199
+ option,
200
+ target_language,
201
+ context="answer option"
202
+ )
203
+ translated_options.append(translated_option)
204
+ translated_q["options"] = translated_options
205
+
206
+ # Translate help text if present
207
+ if "help_text" in question and question["help_text"]:
208
+ translated_q["help_text"] = self._translate_text(
209
+ question["help_text"],
210
+ target_language,
211
+ context="help text"
212
+ )
213
+
214
+ return translated_q
215
+
216
+ def _get_translation_system_prompt(self) -> str:
217
+ """System prompt for translation tasks"""
218
+ return """You are an expert translator specializing in survey research and qualitative studies.
219
+
220
+ Your translations must:
221
+ 1. Preserve the exact meaning and intent of the original text
222
+ 2. Use culturally appropriate language for the target audience
223
+ 3. Maintain professional and neutral tone
224
+ 4. Adapt idioms and expressions appropriately
225
+ 5. Keep the same level of formality
226
+ 6. Preserve any special formatting or structure
227
+
228
+ For survey questions, be especially careful to:
229
+ - Avoid introducing bias
230
+ - Keep questions clear and unambiguous
231
+ - Maintain the same question type and structure
232
+ - Use natural, conversational language when appropriate
233
+
234
+ Provide accurate, natural-sounding translations that a native speaker would use."""
235
+
236
+ def back_translate(self, translated_text: str, original_language: str = "English") -> str:
237
+ """
238
+ Back-translate text to check translation quality.
239
+
240
+ Args:
241
+ translated_text: The translated text
242
+ original_language: Language to translate back to
243
+
244
+ Returns:
245
+ Back-translated text
246
+ """
247
+ prompt = f"""Translate the following text back to {original_language}.
248
+
249
+ Text to translate:
250
+ {translated_text}
251
+
252
+ Provide only the translation, no explanations."""
253
+
254
+ messages = [
255
+ {"role": "system", "content": "You are an expert translator. Translate accurately."},
256
+ {"role": "user", "content": prompt}
257
+ ]
258
+
259
+ return self.llm.generate(messages, max_tokens=1000, temperature=0.3).strip()
260
+
261
+ def get_supported_languages(self) -> Dict[str, str]:
262
+ """Get dictionary of supported language codes and names"""
263
+ return self.SUPPORTED_LANGUAGES.copy()
test_app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Basic test script for ConversAI modules
3
+ Run this to verify core functionality
4
+ """
5
+ import json
6
+ from llm_backend import LLMBackend, LLMProvider
7
+ from survey_generator import SurveyGenerator
8
+ from survey_translator import SurveyTranslator
9
+ from data_analyzer import DataAnalyzer
10
+
11
+
12
+ def test_llm_backend():
13
+ """Test LLM backend initialization"""
14
+ print("\n=== Testing LLM Backend ===")
15
+ try:
16
+ backend = LLMBackend(provider=LLMProvider.LM_STUDIO)
17
+ print(f"βœ“ Backend initialized with provider: {backend.provider}")
18
+ print(f"βœ“ Model: {backend.model}")
19
+ print(f"βœ“ API URL: {backend.api_url}")
20
+ return backend
21
+ except Exception as e:
22
+ print(f"βœ— Backend initialization failed: {e}")
23
+ return None
24
+
25
+
26
+ def test_survey_generator(backend):
27
+ """Test survey generation"""
28
+ print("\n=== Testing Survey Generator ===")
29
+ if not backend:
30
+ print("βœ— Skipping (no backend)")
31
+ return None
32
+
33
+ try:
34
+ gen = SurveyGenerator(backend)
35
+ print("βœ“ Survey generator initialized")
36
+
37
+ # Note: Actual generation requires LLM connection
38
+ print(" (Actual survey generation requires LLM connection)")
39
+ return gen
40
+ except Exception as e:
41
+ print(f"βœ— Survey generator failed: {e}")
42
+ return None
43
+
44
+
45
+ def test_survey_translator(backend):
46
+ """Test survey translator"""
47
+ print("\n=== Testing Survey Translator ===")
48
+ if not backend:
49
+ print("βœ— Skipping (no backend)")
50
+ return None
51
+
52
+ try:
53
+ translator = SurveyTranslator(backend)
54
+ print("βœ“ Translator initialized")
55
+
56
+ # Test language list
57
+ langs = translator.get_supported_languages()
58
+ print(f"βœ“ Supports {len(langs)} languages")
59
+ print(f" Sample languages: {', '.join(list(langs.values())[:5])}")
60
+ return translator
61
+ except Exception as e:
62
+ print(f"βœ— Translator failed: {e}")
63
+ return None
64
+
65
+
66
+ def test_data_analyzer(backend):
67
+ """Test data analyzer"""
68
+ print("\n=== Testing Data Analyzer ===")
69
+ if not backend:
70
+ print("βœ— Skipping (no backend)")
71
+ return None
72
+
73
+ try:
74
+ analyzer = DataAnalyzer(backend)
75
+ print("βœ“ Analyzer initialized")
76
+
77
+ # Test with sample data
78
+ sample_responses = [
79
+ {"q1": "I had a great experience", "q2": "Very satisfied"},
80
+ {"q1": "It was okay", "q2": "Neutral feelings"},
81
+ {"q1": "Not very good", "q2": "Disappointed"}
82
+ ]
83
+
84
+ # Note: Actual analysis requires LLM connection
85
+ print(" (Actual analysis requires LLM connection)")
86
+ return analyzer
87
+ except Exception as e:
88
+ print(f"βœ— Analyzer failed: {e}")
89
+ return None
90
+
91
+
92
+ def test_modules():
93
+ """Test all modules"""
94
+ print("="*50)
95
+ print("ConversAI Module Tests")
96
+ print("="*50)
97
+
98
+ # Test backend
99
+ backend = test_llm_backend()
100
+
101
+ # Test generators
102
+ gen = test_survey_generator(backend)
103
+ translator = test_survey_translator(backend)
104
+ analyzer = test_data_analyzer(backend)
105
+
106
+ # Summary
107
+ print("\n=== Test Summary ===")
108
+ modules = {
109
+ "LLM Backend": backend is not None,
110
+ "Survey Generator": gen is not None,
111
+ "Survey Translator": translator is not None,
112
+ "Data Analyzer": analyzer is not None
113
+ }
114
+
115
+ for module, status in modules.items():
116
+ symbol = "βœ“" if status else "βœ—"
117
+ print(f"{symbol} {module}")
118
+
119
+ all_passed = all(modules.values())
120
+ print(f"\n{'βœ“ All tests passed!' if all_passed else 'βœ— Some tests failed'}")
121
+
122
+ if not all_passed:
123
+ print("\nNote: Make sure your LLM backend is configured correctly.")
124
+ print("Check environment variables or .env file.")
125
+
126
+ return all_passed
127
+
128
+
129
+ if __name__ == "__main__":
130
+ test_modules()