jmisak commited on
Commit
db39ccf
·
verified ·
1 Parent(s): 28613b6

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +1329 -1013
  2. conversation_flow.py +90 -0
  3. conversation_moderator.py +41 -19
  4. data_analyzer.py +65 -44
  5. llm_backend.py +73 -40
  6. survey_generator.py +73 -46
app.py CHANGED
@@ -1,1013 +1,1329 @@
1
- """
2
- ConversAI - AI-Powered Qualitative Research Assistant
3
- Production-grade survey generation, translation, and analysis platform
4
- """
5
- import gradio as gr
6
- import json
7
- import os
8
- import traceback
9
- from typing import Dict, List, Optional
10
-
11
- from llm_backend import LLMBackend, LLMProvider
12
- from survey_generator import SurveyGenerator
13
- from survey_translator import SurveyTranslator
14
- from data_analyzer import DataAnalyzer
15
- from export_utils import (save_json_file, survey_to_csv, analysis_to_markdown_file,
16
- conversation_to_transcript, conversation_to_json, conversation_to_csv,
17
- flow_to_markdown)
18
- from conversation_flow import ConversationFlow, ConversationNode, create_example_flow
19
- from conversation_session import ConversationSession, SessionManager
20
- from conversation_moderator import ConversationModerator
21
-
22
-
23
- # Global state for current survey
24
- current_survey = None
25
- current_responses = []
26
-
27
- # Global state for conversational research
28
- current_flow = None
29
- session_manager = SessionManager()
30
- current_session = None
31
- saved_flows = {}
32
-
33
-
34
- def initialize_backend():
35
- """Initialize LLM backend based on environment"""
36
- try:
37
- # Debug: Print all environment variables related to LLM
38
- print("=== LLM Backend Initialization ===")
39
- print(f"HF_TOKEN: {'SET' if os.getenv('HF_TOKEN') else 'NOT SET'}")
40
- print(f"HUGGINGFACE_API_KEY: {'SET' if os.getenv('HUGGINGFACE_API_KEY') else 'NOT SET'}")
41
- print(f"OPENAI_API_KEY: {'SET' if os.getenv('OPENAI_API_KEY') else 'NOT SET'}")
42
- print(f"ANTHROPIC_API_KEY: {'SET' if os.getenv('ANTHROPIC_API_KEY') else 'NOT SET'}")
43
- print(f"LLM_PROVIDER: {os.getenv('LLM_PROVIDER', 'NOT SET')}")
44
-
45
- # Check for explicit provider setting
46
- provider_env = os.getenv("LLM_PROVIDER", "").lower()
47
-
48
- # Priority 1: Explicitly set provider
49
- if provider_env == "openai" and os.getenv("OPENAI_API_KEY"):
50
- print("Using OpenAI (explicit)")
51
- return LLMBackend(provider=LLMProvider.OPENAI)
52
- elif provider_env == "anthropic" and os.getenv("ANTHROPIC_API_KEY"):
53
- print("Using Anthropic (explicit)")
54
- return LLMBackend(provider=LLMProvider.ANTHROPIC)
55
- elif provider_env == "huggingface" and (os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")):
56
- api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
57
- print("Using HuggingFace (explicit)")
58
- return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
59
- elif provider_env == "lm_studio":
60
- print("Using LM Studio (explicit)")
61
- return LLMBackend(provider=LLMProvider.LM_STUDIO)
62
-
63
- # Priority 2: Auto-detect based on available credentials
64
- # HF_TOKEN is automatically available in HF Spaces, so check it first
65
- hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
66
- if hf_token:
67
- print(f"Auto-detected HuggingFace credentials, using HF Inference API")
68
- print(f"Token preview: {hf_token[:10]}...")
69
- return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=hf_token)
70
- elif os.getenv("OPENAI_API_KEY"):
71
- print(f"Auto-detected OpenAI credentials")
72
- return LLMBackend(provider=LLMProvider.OPENAI)
73
- elif os.getenv("ANTHROPIC_API_KEY"):
74
- print(f"Auto-detected Anthropic credentials")
75
- return LLMBackend(provider=LLMProvider.ANTHROPIC)
76
- else:
77
- # No credentials found - return None to show error in UI
78
- print("="*60)
79
- print("WARNING: No LLM provider credentials found!")
80
- print("="*60)
81
- print("For HuggingFace Spaces:")
82
- print(" - HF_TOKEN should be automatically available")
83
- print(" - Make sure your Space is PUBLIC")
84
- print(" - Or add HUGGINGFACE_API_KEY in Settings")
85
- print("")
86
- print("For other providers, set one of:")
87
- print(" - OPENAI_API_KEY")
88
- print(" - ANTHROPIC_API_KEY")
89
- print(" - HUGGINGFACE_API_KEY")
90
- print("="*60)
91
- return None
92
-
93
- except Exception as e:
94
- print(f"Error during backend initialization: {e}")
95
- import traceback
96
- traceback.print_exc()
97
- return None
98
-
99
-
100
- # Initialize components
101
- llm_backend = initialize_backend()
102
-
103
- # Only initialize if backend is available
104
- if llm_backend:
105
- survey_gen = SurveyGenerator(llm_backend)
106
- survey_trans = SurveyTranslator(llm_backend)
107
- data_analyzer = DataAnalyzer(llm_backend)
108
- print(f"✓ ConversAI initialized with {llm_backend.provider.value} provider")
109
- else:
110
- survey_gen = None
111
- survey_trans = None
112
- data_analyzer = None
113
- print("✗ ConversAI initialization incomplete - no LLM credentials found")
114
-
115
-
116
- # ===========================
117
- # Survey Generation Functions
118
- # ===========================
119
-
120
- def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
121
- """Generate survey from user outline"""
122
- global current_survey
123
-
124
- # Check if backend is initialized
125
- if not survey_gen:
126
- return (
127
- "❌ LLM backend not configured. Please set up API credentials:\n"
128
- "- For HuggingFace Spaces: HF_TOKEN is auto-available\n"
129
- "- For OpenAI: Set OPENAI_API_KEY\n"
130
- "- For Anthropic: Set ANTHROPIC_API_KEY\n"
131
- "- For HuggingFace: Set HUGGINGFACE_API_KEY",
132
- "",
133
- None
134
- )
135
-
136
- if not outline or not outline.strip():
137
- return "❌ Please provide an outline or topic description.", "", None
138
-
139
- # Validate inputs
140
- if num_questions < 1 or num_questions > 50:
141
- return "❌ Number of questions must be between 1 and 50.", "", None
142
-
143
- try:
144
- # Generate survey
145
- survey_data = survey_gen.generate_survey(
146
- outline=outline,
147
- survey_type=survey_type.lower(),
148
- num_questions=num_questions,
149
- target_audience=audience
150
- )
151
-
152
- current_survey = survey_data
153
-
154
- # Format for display
155
- display_text = format_survey_display(survey_data)
156
-
157
- # Save to file for download
158
- filepath = save_json_file(survey_data, "survey")
159
-
160
- return (
161
- f"✅ Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
162
- display_text,
163
- filepath
164
- )
165
-
166
- except Exception as e:
167
- error_msg = f"❌ Error generating survey: {str(e)}"
168
- print(f"Survey generation error: {traceback.format_exc()}")
169
- return error_msg, "", None
170
-
171
-
172
- def format_survey_display(survey_data: Dict) -> str:
173
- """Format survey data for readable display"""
174
- output = f"# {survey_data.get('title', 'Survey')}\n\n"
175
- output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
176
- output += "## Questions\n\n"
177
-
178
- for i, q in enumerate(survey_data.get('questions', []), 1):
179
- output += f"**{i}. {q.get('question_text', '')}**\n"
180
- output += f" - Type: {q.get('question_type', 'N/A')}\n"
181
-
182
- if q.get('options'):
183
- output += " - Options:\n"
184
- for opt in q['options']:
185
- output += f" - {opt}\n"
186
-
187
- if q.get('help_text'):
188
- output += f" - Help: {q['help_text']}\n"
189
-
190
- output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"
191
-
192
- output += f"## Closing\n{survey_data.get('closing', '')}\n"
193
-
194
- return output
195
-
196
-
197
- # ===========================
198
- # Translation Functions
199
- # ===========================
200
-
201
- def translate_current_survey(target_languages: List[str]):
202
- """Translate the current survey to selected languages"""
203
- global current_survey
204
-
205
- # Check if backend is initialized
206
- if not survey_trans:
207
- return (
208
- "❌ LLM backend not configured. Please set up API credentials in Settings.",
209
- "",
210
- None
211
- )
212
-
213
- if not current_survey:
214
- return "❌ Please generate or upload a survey first.", "", None
215
-
216
- if not target_languages:
217
- return "❌ Please select at least one target language.", "", None
218
-
219
- try:
220
- # Translate to all selected languages
221
- translations = {}
222
- status_messages = []
223
- success_count = 0
224
-
225
- for lang_code in target_languages:
226
- try:
227
- translated = survey_trans.translate_survey(current_survey, lang_code)
228
- translations[lang_code] = translated
229
- lang_name = survey_trans._resolve_language(lang_code)
230
- status_messages.append(f"✅ Translated to {lang_name}")
231
- success_count += 1
232
- except Exception as e:
233
- lang_name = survey_trans._resolve_language(lang_code)
234
- status_messages.append(f"❌ Failed to translate to {lang_name}: {str(e)}")
235
- print(f"Translation error for {lang_code}: {traceback.format_exc()}")
236
-
237
- if success_count == 0:
238
- return "❌ All translations failed. Please check your LLM configuration.", "", None
239
-
240
- # Format translations for display
241
- display_text = ""
242
- for lang_code, trans_survey in translations.items():
243
- if "error" not in trans_survey:
244
- lang_name = survey_trans._resolve_language(lang_code)
245
- display_text += f"\n{'='*50}\n"
246
- display_text += f"TRANSLATION: {lang_name.upper()}\n"
247
- display_text += f"{'='*50}\n\n"
248
- display_text += format_survey_display(trans_survey)
249
-
250
- # Save to file for download
251
- filepath = save_json_file(translations, "translations")
252
-
253
- status = "\n".join(status_messages)
254
- return status, display_text, filepath
255
-
256
- except Exception as e:
257
- error_msg = f"❌ Error during translation: {str(e)}"
258
- print(f"Translation error: {traceback.format_exc()}")
259
- return error_msg, "", None
260
-
261
-
262
- def get_language_choices():
263
- """Get language choices for dropdown"""
264
- # Get languages directly from SurveyTranslator class (static list)
265
- from survey_translator import SurveyTranslator
266
- langs = SurveyTranslator.SUPPORTED_LANGUAGES
267
- return [f"{code} - {name}" for code, name in langs.items()]
268
-
269
-
270
- # ===========================
271
- # Data Analysis Functions
272
- # ===========================
273
-
274
- def analyze_survey_data(responses_json: str, questions_json: str = None):
275
- """Analyze survey responses"""
276
- # Check if backend is initialized
277
- if not data_analyzer:
278
- return (
279
- "❌ LLM backend not configured. Please set up API credentials in Settings.",
280
- "",
281
- None
282
- )
283
-
284
- if not responses_json or not responses_json.strip():
285
- return "❌ Please provide survey responses in JSON format.", "", None
286
-
287
- try:
288
- # Parse responses
289
- responses = json.loads(responses_json)
290
- questions = json.loads(questions_json) if questions_json and questions_json.strip() else None
291
-
292
- if not isinstance(responses, list):
293
- return "❌ Responses must be a JSON array.", "", None
294
-
295
- if len(responses) == 0:
296
- return "❌ No responses to analyze.", "", None
297
-
298
- # Validate questions if provided
299
- if questions and not isinstance(questions, list):
300
- return "❌ Questions must be a JSON array.", "", None
301
-
302
- # Run analysis
303
- analysis_results = data_analyzer.analyze_responses(responses, questions)
304
-
305
- if "error" in analysis_results:
306
- return f"❌ Analysis error: {analysis_results['error']}", "", None
307
-
308
- # Generate report
309
- report_md = data_analyzer.generate_report(analysis_results, format="markdown")
310
-
311
- # Save both JSON and Markdown
312
- json_filepath = save_json_file(analysis_results, "analysis_results")
313
- md_filepath = analysis_to_markdown_file(report_md, "analysis_report")
314
-
315
- status_msg = f"✅ Analysis complete! Analyzed {len(responses)} responses."
316
- if questions:
317
- status_msg += f" Considered {len(questions)} questions."
318
-
319
- return status_msg, report_md, json_filepath
320
-
321
- except json.JSONDecodeError as e:
322
- return f"❌ Invalid JSON format: {str(e)}", "", None
323
- except Exception as e:
324
- error_msg = f"❌ Error during analysis: {str(e)}"
325
- print(f"Analysis error: {traceback.format_exc()}")
326
- return error_msg, "", None
327
-
328
-
329
- def load_example_responses():
330
- """Load example responses for demonstration"""
331
- example = [
332
- {
333
- "q1": "The medication helped reduce my symptoms significantly within the first week.",
334
- "q2": "I experienced some mild side effects like drowsiness in the beginning.",
335
- "q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
336
- },
337
- {
338
- "q1": "I didn't notice much improvement in my condition after taking the medication.",
339
- "q2": "The side effects were quite severe and made it difficult to continue.",
340
- "q3": "I had to stop taking it after two weeks due to adverse reactions."
341
- },
342
- {
343
- "q1": "The medication worked well but took about 3-4 weeks to show results.",
344
- "q2": "No major side effects, just some occasional nausea.",
345
- "q3": "It's been effective for managing my symptoms on a daily basis."
346
- }
347
- ]
348
- return json.dumps(example, indent=2)
349
-
350
-
351
- # ===========================
352
- # Conversational Research Handlers
353
- # ===========================
354
-
355
- def create_new_flow(flow_name: str, flow_description: str):
356
- """Create a new conversation flow"""
357
- global current_flow, saved_flows
358
-
359
- if not flow_name or not flow_name.strip():
360
- return "❌ Please provide a flow name.", "", None
361
-
362
- try:
363
- flow = ConversationFlow(name=flow_name, description=flow_description)
364
- current_flow = flow
365
- saved_flows[flow.id] = flow
366
-
367
- return (
368
- f"✅ Flow '{flow_name}' created successfully!",
369
- f"**Flow ID:** {flow.id}\n**Name:** {flow.name}\n**Description:** {flow.description}",
370
- flow.id
371
- )
372
- except Exception as e:
373
- return f"❌ Error creating flow: {str(e)}", "", None
374
-
375
-
376
- def load_example_flow():
377
- """Load an example conversation flow"""
378
- global current_flow, saved_flows
379
-
380
- flow = create_example_flow()
381
- current_flow = flow
382
- saved_flows[flow.id] = flow
383
-
384
- return (
385
- f"✅ Example flow loaded: {flow.name}",
386
- display_flow(flow),
387
- flow.id
388
- )
389
-
390
-
391
- def add_flow_node(flow_id: str, node_content: str, node_type: str):
392
- """Add a node to the current flow"""
393
- global current_flow, saved_flows
394
-
395
- if not flow_id:
396
- return " No flow selected.", ""
397
-
398
- flow = saved_flows.get(flow_id)
399
- if not flow:
400
- return "❌ Flow not found.", ""
401
-
402
- if not node_content or not node_content.strip():
403
- return "❌ Please provide content for the node.", ""
404
-
405
- try:
406
- node = ConversationNode(content=node_content, node_type=node_type.lower())
407
-
408
- # Link to previous node if exists
409
- if flow.nodes:
410
- last_node = flow.nodes[-1]
411
- last_node.next = node.id
412
-
413
- flow.add_node(node)
414
- current_flow = flow
415
-
416
- return (
417
- f" Node added successfully! Total nodes: {len(flow.nodes)}",
418
- display_flow(flow)
419
- )
420
- except Exception as e:
421
- return f"❌ Error adding node: {str(e)}", ""
422
-
423
-
424
- def display_flow(flow: ConversationFlow) -> str:
425
- """Display flow as markdown"""
426
- if not flow or not flow.nodes:
427
- return "No flow to display"
428
-
429
- output = f"# {flow.name}\n\n"
430
- output += f"**Description:** {flow.description}\n\n"
431
- output += f"**Total Steps:** {len(flow.nodes)}\n\n"
432
- output += "---\n\n"
433
-
434
- for i, node in enumerate(flow.nodes, 1):
435
- output += f"### Step {i}: {node.type.capitalize()}\n\n"
436
- output += f"{node.content}\n\n"
437
-
438
- return output
439
-
440
-
441
- def save_current_flow(flow_id: str):
442
- """Save the current flow to file"""
443
- if not flow_id:
444
- return "❌ No flow selected.", None
445
-
446
- flow = saved_flows.get(flow_id)
447
- if not flow:
448
- return "❌ Flow not found.", None
449
-
450
- try:
451
- filepath = save_json_file(flow.to_dict(), "conversation_flow")
452
- return f"✅ Flow saved to {filepath}", filepath
453
- except Exception as e:
454
- return f"❌ Error saving flow: {str(e)}", None
455
-
456
-
457
- def start_conversation_session(flow_id: str):
458
- """Start a new conversation session"""
459
- global current_session, session_manager
460
-
461
- if not flow_id:
462
- return [], "❌ Please select a flow first."
463
-
464
- flow = saved_flows.get(flow_id)
465
- if not flow:
466
- return [], "❌ Flow not found."
467
-
468
- if not llm_backend:
469
- return [], "❌ LLM backend not initialized."
470
-
471
- try:
472
- # Create session
473
- session = session_manager.create_session(flow_id=flow.id, flow_name=flow.name)
474
- current_session = session
475
-
476
- # Create moderator
477
- moderator = ConversationModerator(llm_backend, flow)
478
-
479
- # Start conversation
480
- opening_message = moderator.start_conversation(session)
481
-
482
- # Return chat history in Gradio format
483
- return [[None, opening_message]], f" Conversation started! Session ID: {session.id}"
484
-
485
- except Exception as e:
486
- return [], f"❌ Error starting conversation: {str(e)}"
487
-
488
-
489
- def chat_with_moderator(user_message: str, history: List):
490
- """Handle chat messages with the AI moderator"""
491
- global current_session
492
-
493
- if not current_session:
494
- return history, "❌ No active session. Please start a conversation first."
495
-
496
- if not llm_backend:
497
- return history, "❌ LLM backend not initialized."
498
-
499
- if not user_message or not user_message.strip():
500
- return history, "❌ Please enter a message."
501
-
502
- try:
503
- # Get the flow
504
- flow = saved_flows.get(current_session.flow_id)
505
- if not flow:
506
- return history, "❌ Flow not found."
507
-
508
- # Create moderator
509
- moderator = ConversationModerator(llm_backend, flow)
510
-
511
- # Process user response
512
- ai_response = moderator.process_user_response(current_session, user_message)
513
-
514
- # Update history
515
- history.append([user_message, ai_response])
516
-
517
- status = f"Session: {current_session.id} | Turns: {current_session.get_turn_count()}"
518
- if current_session.status == "completed":
519
- status += " | Conversation completed"
520
-
521
- return history, status
522
-
523
- except Exception as e:
524
- return history, f"❌ Error: {str(e)}"
525
-
526
-
527
- def export_conversation():
528
- """Export the current conversation"""
529
- global current_session
530
-
531
- if not current_session:
532
- return "❌ No active session to export.", None
533
-
534
- try:
535
- filepath = conversation_to_transcript(current_session)
536
- return f"✅ Conversation exported to {filepath}", filepath
537
- except Exception as e:
538
- return f"❌ Error exporting conversation: {str(e)}", None
539
-
540
-
541
- # ===========================
542
- # Gradio Interface
543
- # ===========================
544
-
545
- def create_interface():
546
- """Create the main Gradio interface"""
547
-
548
- with gr.Blocks(
549
- title="ProjectEcho - Qualitative Research Assistant",
550
- theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
551
- ) as app:
552
-
553
- gr.Markdown("""
554
- # ProjectEcho - Your AI-Powered Qualitative Research Assistant
555
-
556
- Battle the blank page, reach global audiences, and uncover insights with AI assistance.
557
- """)
558
-
559
- # Show backend status
560
- if llm_backend:
561
- status_msg = f"✅ **Active LLM Provider:** {llm_backend.provider.value.upper()} | Model: {llm_backend.model}"
562
- bg_color = "rgba(0, 255, 0, 0.1)"
563
- else:
564
- status_msg = """⚠️ **LLM Provider Not Configured**
565
-
566
- **To use this app, you need to configure an LLM provider:**
567
-
568
- 1. **Easiest (HuggingFace Spaces):** Make sure your Space is PUBLIC and HF_TOKEN will be auto-available
569
- 2. **Best Quality:** Add `OPENAI_API_KEY` in Space Settings → Variables
570
- 3. **Alternative:** Add `ANTHROPIC_API_KEY` or `HUGGINGFACE_API_KEY`
571
-
572
- See the **About** tab for detailed instructions."""
573
- bg_color = "rgba(255, 165, 0, 0.2)"
574
-
575
- gr.Markdown(f'<div style="background-color: {bg_color}; padding: 15px; border-radius: 5px; margin: 10px 0; border-left: 4px solid #FF6B6B;">{status_msg}</div>')
576
-
577
- with gr.Tabs() as tabs:
578
-
579
- # ========== SURVEY GENERATION TAB ==========
580
- with gr.Tab("📝 Generate Survey"):
581
- gr.Markdown("""
582
- ## Battle the Blank Page
583
- Share an outline and get AI-powered surveys drafted in minutes,
584
- complete with industry best practices.
585
- """)
586
-
587
- with gr.Row():
588
- with gr.Column(scale=1):
589
- outline_input = gr.Textbox(
590
- label="Your Survey Outline or Topic",
591
- placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
592
- lines=6
593
- )
594
-
595
- survey_type_input = gr.Radio(
596
- label="Survey Type",
597
- choices=["Qualitative", "Quantitative", "Mixed"],
598
- value="Qualitative"
599
- )
600
-
601
- num_questions_input = gr.Slider(
602
- label="Number of Questions",
603
- minimum=5,
604
- maximum=25,
605
- value=10,
606
- step=1
607
- )
608
-
609
- audience_input = gr.Textbox(
610
- label="Target Audience",
611
- placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
612
- value="General audience"
613
- )
614
-
615
- generate_btn = gr.Button("🚀 Generate Survey", variant="primary", size="lg")
616
-
617
- with gr.Column(scale=1):
618
- gen_status = gr.Textbox(label="Status", interactive=False)
619
- gen_output = gr.Markdown(label="Generated Survey")
620
-
621
- gen_download = gr.File(label="Download Survey JSON", visible=False)
622
-
623
- # Event handlers
624
- generate_btn.click(
625
- fn=generate_survey_from_outline,
626
- inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
627
- outputs=[gen_status, gen_output, gen_download]
628
- ).then(
629
- fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
630
- inputs=[gen_download],
631
- outputs=[gen_download]
632
- )
633
-
634
- # ========== TRANSLATION TAB ==========
635
- with gr.Tab("🌍 Translate Survey"):
636
- gr.Markdown("""
637
- ## Reach Global Audiences
638
- Translate your surveys automatically to streamline efforts and reach wider audiences.
639
- """)
640
-
641
- with gr.Row():
642
- with gr.Column(scale=1):
643
- gr.Markdown("### Select Target Languages")
644
-
645
- # Create checkboxes for popular languages
646
- lang_checkboxes = gr.CheckboxGroup(
647
- label="Languages",
648
- choices=get_language_choices(),
649
- value=[]
650
- )
651
-
652
- translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")
653
-
654
- gr.Markdown("""
655
- **Note:** Make sure you've generated a survey first, or upload one using the JSON format.
656
- """)
657
-
658
- with gr.Column(scale=1):
659
- trans_status = gr.Textbox(label="Translation Status", interactive=False)
660
- trans_output = gr.Markdown(label="Translations")
661
-
662
- trans_download = gr.File(label="Download Translations JSON", visible=False)
663
-
664
- # Event handlers
665
- def extract_lang_codes(selected_items):
666
- """Extract language codes from checkbox selections"""
667
- return [item.split(" - ")[0] for item in selected_items]
668
-
669
- translate_btn.click(
670
- fn=lambda x: translate_current_survey(extract_lang_codes(x)),
671
- inputs=[lang_checkboxes],
672
- outputs=[trans_status, trans_output, trans_download]
673
- ).then(
674
- fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
675
- inputs=[trans_download],
676
- outputs=[trans_download]
677
- )
678
-
679
- # ========== ANALYSIS TAB ==========
680
- with gr.Tab("📊 Analyze Data"):
681
- gr.Markdown("""
682
- ## Uncover Key Insights
683
- Upload your survey responses and get AI-assisted summaries of key findings,
684
- themes, and trends.
685
- """)
686
-
687
- with gr.Row():
688
- with gr.Column(scale=1):
689
- responses_input = gr.Textbox(
690
- label="Survey Responses (JSON)",
691
- placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
692
- lines=10
693
- )
694
-
695
- questions_input = gr.Textbox(
696
- label="Questions (JSON, Optional)",
697
- placeholder='[{"question_text": "What is your experience?", ...}]',
698
- lines=5
699
- )
700
-
701
- with gr.Row():
702
- analyze_btn = gr.Button("🔍 Analyze Data", variant="primary", size="lg")
703
- example_btn = gr.Button("Load Example", variant="secondary")
704
-
705
- with gr.Column(scale=1):
706
- analysis_status = gr.Textbox(label="Status", interactive=False)
707
- analysis_output = gr.Markdown(label="Analysis Report")
708
-
709
- analysis_download = gr.File(label="Download Analysis JSON", visible=False)
710
-
711
- # Event handlers
712
- analyze_btn.click(
713
- fn=analyze_survey_data,
714
- inputs=[responses_input, questions_input],
715
- outputs=[analysis_status, analysis_output, analysis_download]
716
- ).then(
717
- fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
718
- inputs=[analysis_download],
719
- outputs=[analysis_download]
720
- )
721
-
722
- example_btn.click(
723
- fn=load_example_responses,
724
- outputs=[responses_input]
725
- )
726
-
727
- # ========== CONVERSATIONAL RESEARCH TAB ==========
728
- with gr.Tab("💬 Conversational Research"):
729
- gr.Markdown("""
730
- ## AI-Moderated Conversations
731
- Design conversation flows and conduct AI-powered qualitative interviews with respondents.
732
- """)
733
-
734
- with gr.Tabs():
735
- # Design Flow Sub-Tab
736
- with gr.Tab("🎨 Design Flow"):
737
- gr.Markdown("""
738
- ### Create Conversation Flows
739
- Design custom conversation paths for AI-moderated interviews.
740
- """)
741
-
742
- with gr.Row():
743
- with gr.Column(scale=1):
744
- gr.Markdown("#### Flow Setup")
745
-
746
- flow_name_input = gr.Textbox(
747
- label="Flow Name",
748
- placeholder="e.g., HCP Interview for New Dermatology Product",
749
- value=""
750
- )
751
-
752
- flow_desc_input = gr.Textbox(
753
- label="Flow Description",
754
- placeholder="Describe the purpose of this conversation flow...",
755
- lines=3
756
- )
757
-
758
- with gr.Row():
759
- create_flow_btn = gr.Button("✨ Create New Flow", variant="primary")
760
- load_example_flow_btn = gr.Button("📋 Load Example", variant="secondary")
761
-
762
- flow_id_state = gr.State(value="")
763
-
764
- gr.Markdown("#### Add Steps to Flow")
765
-
766
- node_content_input = gr.Textbox(
767
- label="Question/Message",
768
- placeholder="Enter the question or message for this step...",
769
- lines=4
770
- )
771
-
772
- node_type_input = gr.Radio(
773
- label="Step Type",
774
- choices=["Question", "End"],
775
- value="Question"
776
- )
777
-
778
- add_node_btn = gr.Button("➕ Add Step", variant="secondary")
779
-
780
- save_flow_btn = gr.Button("💾 Save Flow", variant="primary")
781
-
782
- with gr.Column(scale=1):
783
- flow_status = gr.Textbox(label="Status", interactive=False)
784
- flow_display = gr.Markdown(label="Flow Preview", value="No flow created yet")
785
-
786
- flow_download = gr.File(label="Download Flow JSON", visible=False)
787
-
788
- # Event handlers for flow design
789
- create_flow_btn.click(
790
- fn=create_new_flow,
791
- inputs=[flow_name_input, flow_desc_input],
792
- outputs=[flow_status, flow_display, flow_id_state]
793
- )
794
-
795
- load_example_flow_btn.click(
796
- fn=load_example_flow,
797
- outputs=[flow_status, flow_display, flow_id_state]
798
- )
799
-
800
- add_node_btn.click(
801
- fn=add_flow_node,
802
- inputs=[flow_id_state, node_content_input, node_type_input],
803
- outputs=[flow_status, flow_display]
804
- ).then(
805
- fn=lambda: "",
806
- outputs=[node_content_input]
807
- )
808
-
809
- save_flow_btn.click(
810
- fn=save_current_flow,
811
- inputs=[flow_id_state],
812
- outputs=[flow_status, flow_download]
813
- ).then(
814
- fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
815
- inputs=[flow_download],
816
- outputs=[flow_download]
817
- )
818
-
819
- # Conduct Interview Sub-Tab
820
- with gr.Tab("🎙️ Conduct Interview"):
821
- gr.Markdown("""
822
- ### AI-Moderated Interview
823
- Start a conversation session with the AI moderator using your designed flow.
824
- """)
825
-
826
- with gr.Row():
827
- with gr.Column(scale=1):
828
- conversation_flow_selector = gr.State(value="")
829
-
830
- gr.Markdown("""
831
- **Instructions:**
832
- 1. Design a flow in the 'Design Flow' tab first (or load the example)
833
- 2. Click 'Start Conversation' to begin
834
- 3. The AI moderator will ask questions from your flow
835
- 4. The AI adapts with follow-up questions based on responses
836
- 5. Export the conversation when finished
837
- """)
838
-
839
- with gr.Row():
840
- start_conversation_btn = gr.Button("🚀 Start Conversation", variant="primary")
841
- export_conversation_btn = gr.Button("📥 Export Conversation", variant="secondary")
842
-
843
- conversation_status = gr.Textbox(label="Session Status", interactive=False)
844
- conversation_download = gr.File(label="Download Transcript", visible=False)
845
-
846
- with gr.Column(scale=1):
847
- chatbot = gr.Chatbot(
848
- label="AI-Moderated Interview",
849
- height=500
850
- )
851
-
852
- msg_input = gr.Textbox(
853
- label="Your Response",
854
- placeholder="Type your response here...",
855
- lines=2
856
- )
857
-
858
- with gr.Row():
859
- submit_btn = gr.Button("Send", variant="primary")
860
- clear_btn = gr.Button("Clear")
861
-
862
- # Chat event handlers
863
- def user_submit(user_message, history):
864
- """Handle user message submission"""
865
- if not user_message:
866
- return history, history, ""
867
- return history, history + [[user_message, None]], ""
868
-
869
- def bot_respond(history):
870
- """Get bot response"""
871
- if not history or history[-1][1] is not None:
872
- return history, ""
873
-
874
- user_msg = history[-1][0]
875
- updated_history, status = chat_with_moderator(user_msg, history[:-1])
876
- return updated_history, status
877
-
878
- # Start conversation
879
- start_conversation_btn.click(
880
- fn=lambda: saved_flows[list(saved_flows.keys())[-1]].id if saved_flows else "",
881
- outputs=[conversation_flow_selector]
882
- ).then(
883
- fn=start_conversation_session,
884
- inputs=[conversation_flow_selector],
885
- outputs=[chatbot, conversation_status]
886
- )
887
-
888
- # Message submission
889
- msg_input.submit(
890
- fn=user_submit,
891
- inputs=[msg_input, chatbot],
892
- outputs=[chatbot, chatbot, msg_input],
893
- queue=False
894
- ).then(
895
- fn=bot_respond,
896
- inputs=[chatbot],
897
- outputs=[chatbot, conversation_status]
898
- )
899
-
900
- submit_btn.click(
901
- fn=user_submit,
902
- inputs=[msg_input, chatbot],
903
- outputs=[chatbot, chatbot, msg_input],
904
- queue=False
905
- ).then(
906
- fn=bot_respond,
907
- inputs=[chatbot],
908
- outputs=[chatbot, conversation_status]
909
- )
910
-
911
- clear_btn.click(lambda: None, None, chatbot, queue=False)
912
-
913
- # Export conversation
914
- export_conversation_btn.click(
915
- fn=export_conversation,
916
- outputs=[conversation_status, conversation_download]
917
- ).then(
918
- fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
919
- inputs=[conversation_download],
920
- outputs=[conversation_download]
921
- )
922
-
923
- # ========== ABOUT TAB ==========
924
- with gr.Tab("ℹ️ About"):
925
- gr.Markdown("""
926
- ## About ConversAI
927
-
928
- ConversAI is a comprehensive qualitative research assistant that helps you:
929
-
930
- ### 🎯 Generate Surveys
931
- - Create professional surveys from simple outlines
932
- - Follow industry best practices automatically
933
- - Save hours of questionnaire design time
934
-
935
- ### 🌍 Translate Globally
936
- - Reach audiences in 18+ languages
937
- - Maintain cultural appropriateness
938
- - Expand your research scope effortlessly
939
-
940
- ### 📊 Analyze Results
941
- - Extract key themes automatically
942
- - Identify patterns and trends
943
- - Generate actionable insights
944
-
945
- ### 🔧 Configuration Guide
946
-
947
- **For HuggingFace Spaces (Recommended):**
948
-
949
- No configuration needed! The app automatically uses the HF Inference API with the built-in `HF_TOKEN`.
950
-
951
- **Supported Models:**
952
- - Default: `mistralai/Mixtral-8x7B-Instruct-v0.1`
953
- - You can change by setting `LLM_MODEL` environment variable
954
-
955
- **For Other LLM Providers:**
956
-
957
- Add these environment variables in your Space Settings:
958
-
959
- 1. **OpenAI** (Best quality, paid):
960
- - `LLM_PROVIDER=openai`
961
- - `OPENAI_API_KEY=sk-your-key`
962
-
963
- 2. **Anthropic Claude** (Best reasoning, paid):
964
- - `LLM_PROVIDER=anthropic`
965
- - `ANTHROPIC_API_KEY=your-key`
966
-
967
- 3. **Custom HuggingFace Model**:
968
- - `LLM_PROVIDER=huggingface`
969
- - `LLM_MODEL=your-model-name`
970
-
971
- **💡 Pro Tip:** For production use, we recommend OpenAI or Anthropic for faster, more reliable results.
972
-
973
- **Supported LLM Providers:**
974
- - HuggingFace Inference API (Free tier available)
975
- - OpenAI (GPT-4, GPT-4o-mini, GPT-3.5)
976
- - Anthropic (Claude 3.5 Sonnet, Claude 3 Opus)
977
- - LM Studio (local development only)
978
-
979
- ### 📄 Data Privacy
980
-
981
- - All processing is done through your configured LLM provider
982
- - No data is stored permanently by this application
983
- - Survey data and responses remain in your control
984
-
985
- ### 🚀 Getting Started
986
-
987
- 1. **Generate** a survey from your research outline
988
- 2. **Translate** it to reach global audiences
989
- 3. Collect responses from participants
990
- 4. **Analyze** the data to uncover insights
991
-
992
- ---
993
-
994
- Built with ❤️ using Gradio and state-of-the-art LLMs
995
- """)
996
-
997
- return app
998
-
999
-
1000
- # ===========================
1001
- # Main Entry Point
1002
- # ===========================
1003
-
1004
- if __name__ == "__main__":
1005
- demo = create_interface()
1006
-
1007
- # Launch with appropriate settings
1008
- demo.launch(
1009
- server_name="0.0.0.0", # Allow external access
1010
- server_port=7860, # Standard HF Spaces port
1011
- share=False, # Don't create a public link (HF Spaces handles this)
1012
- show_error=True
1013
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Project Echo - AI-Powered Qualitative Research Assistant
3
+ Production-grade survey generation, translation, and analysis platform
4
+ """
5
+ import gradio as gr
6
+ import json
7
+ import os
8
+ import traceback
9
+ from typing import Dict, List, Optional
10
+
11
+ from llm_backend import LLMBackend, LLMProvider
12
+ from survey_generator import SurveyGenerator
13
+ from survey_translator import SurveyTranslator
14
+ from data_analyzer import DataAnalyzer
15
+ from export_utils import (save_json_file, survey_to_csv, analysis_to_markdown_file,
16
+ conversation_to_transcript, conversation_to_json, conversation_to_csv,
17
+ flow_to_markdown)
18
+ from conversation_flow import ConversationFlow, ConversationNode, create_example_flow
19
+ from conversation_session import ConversationSession, SessionManager
20
+ from conversation_moderator import ConversationModerator
21
+ from conversation_analytics import ConversationAnalytics
22
+
23
+
24
+ # Global state for current survey
25
+ current_survey = None
26
+ current_responses = []
27
+
28
+ # Global state for conversational research
29
+ current_flow = None
30
+ session_manager = SessionManager()
31
+ current_session = None
32
+ saved_flows = {}
33
+
34
+
35
+ def initialize_backend():
36
+ """Initialize LLM backend based on environment"""
37
+ try:
38
+ # Debug: Print all environment variables related to LLM
39
+ print("=== LLM Backend Initialization ===")
40
+ print(f"HF_TOKEN: {'SET' if os.getenv('HF_TOKEN') else 'NOT SET'}")
41
+ print(f"HUGGINGFACE_API_KEY: {'SET' if os.getenv('HUGGINGFACE_API_KEY') else 'NOT SET'}")
42
+ print(f"OPENAI_API_KEY: {'SET' if os.getenv('OPENAI_API_KEY') else 'NOT SET'}")
43
+ print(f"ANTHROPIC_API_KEY: {'SET' if os.getenv('ANTHROPIC_API_KEY') else 'NOT SET'}")
44
+ print(f"LLM_PROVIDER: {os.getenv('LLM_PROVIDER', 'NOT SET')}")
45
+
46
+ # Check for explicit provider setting
47
+ provider_env = os.getenv("LLM_PROVIDER", "").lower()
48
+
49
+ # Priority 1: Explicitly set provider
50
+ if provider_env == "openai" and os.getenv("OPENAI_API_KEY"):
51
+ print("Using OpenAI (explicit)")
52
+ return LLMBackend(provider=LLMProvider.OPENAI)
53
+ elif provider_env == "anthropic" and os.getenv("ANTHROPIC_API_KEY"):
54
+ print("Using Anthropic (explicit)")
55
+ return LLMBackend(provider=LLMProvider.ANTHROPIC)
56
+ elif provider_env == "huggingface" and (os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")):
57
+ api_key = os.getenv("HUGGINGFACE_API_KEY") or os.getenv("HF_TOKEN")
58
+ print("Using HuggingFace (explicit)")
59
+ return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=api_key)
60
+ elif provider_env == "lm_studio":
61
+ print("Using LM Studio (explicit)")
62
+ return LLMBackend(provider=LLMProvider.LM_STUDIO)
63
+
64
+ # Priority 2: Auto-detect based on available credentials
65
+ # HF_TOKEN is automatically available in HF Spaces, so check it first
66
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
67
+ if hf_token:
68
+ print(f"Auto-detected HuggingFace credentials, using HF Inference API")
69
+ print(f"Token preview: {hf_token[:10]}...")
70
+ return LLMBackend(provider=LLMProvider.HUGGINGFACE, api_key=hf_token)
71
+ elif os.getenv("OPENAI_API_KEY"):
72
+ print(f"Auto-detected OpenAI credentials")
73
+ return LLMBackend(provider=LLMProvider.OPENAI)
74
+ elif os.getenv("ANTHROPIC_API_KEY"):
75
+ print(f"Auto-detected Anthropic credentials")
76
+ return LLMBackend(provider=LLMProvider.ANTHROPIC)
77
+ else:
78
+ # No credentials found - return None to show error in UI
79
+ print("="*60)
80
+ print("WARNING: No LLM provider credentials found!")
81
+ print("="*60)
82
+ print("For HuggingFace Spaces:")
83
+ print(" - HF_TOKEN should be automatically available")
84
+ print(" - Make sure your Space is PUBLIC")
85
+ print(" - Or add HUGGINGFACE_API_KEY in Settings")
86
+ print("")
87
+ print("For other providers, set one of:")
88
+ print(" - OPENAI_API_KEY")
89
+ print(" - ANTHROPIC_API_KEY")
90
+ print(" - HUGGINGFACE_API_KEY")
91
+ print("="*60)
92
+ return None
93
+
94
+ except Exception as e:
95
+ print(f"Error during backend initialization: {e}")
96
+ import traceback
97
+ traceback.print_exc()
98
+ return None
99
+
100
+
101
+ # Initialize components
102
+ llm_backend = initialize_backend()
103
+
104
+ # Only initialize if backend is available
105
+ if llm_backend:
106
+ survey_gen = SurveyGenerator(llm_backend)
107
+ survey_trans = SurveyTranslator(llm_backend)
108
+ data_analyzer = DataAnalyzer(llm_backend)
109
+ print(f"✓ Project Echo initialized with {llm_backend.provider.value} provider")
110
+ else:
111
+ survey_gen = None
112
+ survey_trans = None
113
+ data_analyzer = None
114
+ print("✗ Project Echo initialization incomplete - no LLM credentials found")
115
+
116
+
117
+ # ===========================
118
+ # Survey Generation Functions
119
+ # ===========================
120
+
121
+ def generate_survey_from_outline(outline: str, survey_type: str, num_questions: int, audience: str):
122
+ """Generate survey from user outline"""
123
+ global current_survey
124
+
125
+ # Check if backend is initialized
126
+ if not survey_gen:
127
+ return (
128
+ " LLM backend not configured. Please set up API credentials:\n"
129
+ "- For HuggingFace Spaces: HF_TOKEN is auto-available\n"
130
+ "- For OpenAI: Set OPENAI_API_KEY\n"
131
+ "- For Anthropic: Set ANTHROPIC_API_KEY\n"
132
+ "- For HuggingFace: Set HUGGINGFACE_API_KEY",
133
+ "",
134
+ None
135
+ )
136
+
137
+ if not outline or not outline.strip():
138
+ return "❌ Please provide an outline or topic description.", "", None
139
+
140
+ # Validate inputs
141
+ if num_questions < 1 or num_questions > 50:
142
+ return "❌ Number of questions must be between 1 and 50.", "", None
143
+
144
+ try:
145
+ # Generate survey
146
+ survey_data = survey_gen.generate_survey(
147
+ outline=outline,
148
+ survey_type=survey_type.lower(),
149
+ num_questions=num_questions,
150
+ target_audience=audience
151
+ )
152
+
153
+ current_survey = survey_data
154
+
155
+ # Format for display
156
+ display_text = format_survey_display(survey_data)
157
+
158
+ # Save to file for download
159
+ filepath = save_json_file(survey_data, "survey")
160
+
161
+ return (
162
+ f"✅ Survey generated successfully! Contains {len(survey_data.get('questions', []))} questions.",
163
+ display_text,
164
+ filepath
165
+ )
166
+
167
+ except Exception as e:
168
+ error_msg = f" Error generating survey: {str(e)}"
169
+ print(f"Survey generation error: {traceback.format_exc()}")
170
+ return error_msg, "", None
171
+
172
+
173
+ def format_survey_display(survey_data: Dict) -> str:
174
+ """Format survey data for readable display"""
175
+ output = f"# {survey_data.get('title', 'Survey')}\n\n"
176
+ output += f"## Introduction\n{survey_data.get('introduction', '')}\n\n"
177
+ output += "## Questions\n\n"
178
+
179
+ for i, q in enumerate(survey_data.get('questions', []), 1):
180
+ output += f"**{i}. {q.get('question_text', '')}**\n"
181
+ output += f" - Type: {q.get('question_type', 'N/A')}\n"
182
+
183
+ if q.get('options'):
184
+ output += " - Options:\n"
185
+ for opt in q['options']:
186
+ output += f" - {opt}\n"
187
+
188
+ if q.get('help_text'):
189
+ output += f" - Help: {q['help_text']}\n"
190
+
191
+ output += f" - Required: {'Yes' if q.get('required', False) else 'No'}\n\n"
192
+
193
+ output += f"## Closing\n{survey_data.get('closing', '')}\n"
194
+
195
+ return output
196
+
197
+
198
+ # ===========================
199
+ # Translation Functions
200
+ # ===========================
201
+
202
+ def translate_current_survey(target_languages: List[str]):
203
+ """Translate the current survey to selected languages"""
204
+ global current_survey
205
+
206
+ # Check if backend is initialized
207
+ if not survey_trans:
208
+ return (
209
+ "❌ LLM backend not configured. Please set up API credentials in Settings.",
210
+ "",
211
+ None
212
+ )
213
+
214
+ if not current_survey:
215
+ return "❌ Please generate or upload a survey first.", "", None
216
+
217
+ if not target_languages:
218
+ return "❌ Please select at least one target language.", "", None
219
+
220
+ try:
221
+ # Translate to all selected languages
222
+ translations = {}
223
+ status_messages = []
224
+ success_count = 0
225
+
226
+ for lang_code in target_languages:
227
+ try:
228
+ translated = survey_trans.translate_survey(current_survey, lang_code)
229
+ translations[lang_code] = translated
230
+ lang_name = survey_trans._resolve_language(lang_code)
231
+ status_messages.append(f"✅ Translated to {lang_name}")
232
+ success_count += 1
233
+ except Exception as e:
234
+ lang_name = survey_trans._resolve_language(lang_code)
235
+ status_messages.append(f" Failed to translate to {lang_name}: {str(e)}")
236
+ print(f"Translation error for {lang_code}: {traceback.format_exc()}")
237
+
238
+ if success_count == 0:
239
+ return "❌ All translations failed. Please check your LLM configuration.", "", None
240
+
241
+ # Format translations for display
242
+ display_text = ""
243
+ for lang_code, trans_survey in translations.items():
244
+ if "error" not in trans_survey:
245
+ lang_name = survey_trans._resolve_language(lang_code)
246
+ display_text += f"\n{'='*50}\n"
247
+ display_text += f"TRANSLATION: {lang_name.upper()}\n"
248
+ display_text += f"{'='*50}\n\n"
249
+ display_text += format_survey_display(trans_survey)
250
+
251
+ # Save to file for download
252
+ filepath = save_json_file(translations, "translations")
253
+
254
+ status = "\n".join(status_messages)
255
+ return status, display_text, filepath
256
+
257
+ except Exception as e:
258
+ error_msg = f" Error during translation: {str(e)}"
259
+ print(f"Translation error: {traceback.format_exc()}")
260
+ return error_msg, "", None
261
+
262
+
263
+ def get_language_choices():
264
+ """Get language choices for dropdown"""
265
+ # Get languages directly from SurveyTranslator class (static list)
266
+ from survey_translator import SurveyTranslator
267
+ langs = SurveyTranslator.SUPPORTED_LANGUAGES
268
+ return [f"{code} - {name}" for code, name in langs.items()]
269
+
270
+
271
+ # ===========================
272
+ # Data Analysis Functions
273
+ # ===========================
274
+
275
+ def analyze_survey_data(responses_json: str, questions_json: str = None):
276
+ """Analyze survey responses"""
277
+ # Check if backend is initialized
278
+ if not data_analyzer:
279
+ return (
280
+ "❌ LLM backend not configured. Please set up API credentials in Settings.",
281
+ "",
282
+ None
283
+ )
284
+
285
+ if not responses_json or not responses_json.strip():
286
+ return "❌ Please provide survey responses in JSON format.", "", None
287
+
288
+ try:
289
+ # Parse responses
290
+ responses = json.loads(responses_json)
291
+ questions = json.loads(questions_json) if questions_json and questions_json.strip() else None
292
+
293
+ if not isinstance(responses, list):
294
+ return "❌ Responses must be a JSON array.", "", None
295
+
296
+ if len(responses) == 0:
297
+ return "❌ No responses to analyze.", "", None
298
+
299
+ # Validate questions if provided
300
+ if questions and not isinstance(questions, list):
301
+ return "❌ Questions must be a JSON array.", "", None
302
+
303
+ # Run analysis
304
+ analysis_results = data_analyzer.analyze_responses(responses, questions)
305
+
306
+ if "error" in analysis_results:
307
+ return f"❌ Analysis error: {analysis_results['error']}", "", None
308
+
309
+ # Generate report
310
+ report_md = data_analyzer.generate_report(analysis_results, format="markdown")
311
+
312
+ # Save both JSON and Markdown
313
+ json_filepath = save_json_file(analysis_results, "analysis_results")
314
+ md_filepath = analysis_to_markdown_file(report_md, "analysis_report")
315
+
316
+ status_msg = f"✅ Analysis complete! Analyzed {len(responses)} responses."
317
+ if questions:
318
+ status_msg += f" Considered {len(questions)} questions."
319
+
320
+ return status_msg, report_md, json_filepath
321
+
322
+ except json.JSONDecodeError as e:
323
+ return f"❌ Invalid JSON format: {str(e)}", "", None
324
+ except Exception as e:
325
+ error_msg = f" Error during analysis: {str(e)}"
326
+ print(f"Analysis error: {traceback.format_exc()}")
327
+ return error_msg, "", None
328
+
329
+
330
+ def load_example_responses():
331
+ """Load example responses for demonstration"""
332
+ example = [
333
+ {
334
+ "q1": "The medication helped reduce my symptoms significantly within the first week.",
335
+ "q2": "I experienced some mild side effects like drowsiness in the beginning.",
336
+ "q3": "Overall, I'm satisfied with the treatment and would recommend it to others."
337
+ },
338
+ {
339
+ "q1": "I didn't notice much improvement in my condition after taking the medication.",
340
+ "q2": "The side effects were quite severe and made it difficult to continue.",
341
+ "q3": "I had to stop taking it after two weeks due to adverse reactions."
342
+ },
343
+ {
344
+ "q1": "The medication worked well but took about 3-4 weeks to show results.",
345
+ "q2": "No major side effects, just some occasional nausea.",
346
+ "q3": "It's been effective for managing my symptoms on a daily basis."
347
+ }
348
+ ]
349
+ return json.dumps(example, indent=2)
350
+
351
+
352
+ # ===========================
353
+ # Conversational Research Handlers
354
+ # ===========================
355
+
356
+ def create_new_flow(flow_name: str, flow_description: str):
357
+ """Create a new conversation flow with AI-generated initial structure"""
358
+ global current_flow, saved_flows, llm_backend
359
+
360
+ if not flow_name or not flow_name.strip():
361
+ return "❌ Please provide a flow name.", "", None
362
+
363
+ if not flow_description or not flow_description.strip():
364
+ return "❌ Please provide a description of what you want to discuss in this flow.", "", None
365
+
366
+ if not llm_backend:
367
+ return "❌ LLM backend not configured. Cannot generate flow.", "", None
368
+
369
+ try:
370
+ # Create empty flow
371
+ flow = ConversationFlow(name=flow_name, description=flow_description)
372
+
373
+ # Generate initial conversation structure using AI
374
+ success, message = flow.generate_flow_with_ai(llm_backend, num_questions=5)
375
+
376
+ if not success:
377
+ return f"⚠️ Flow created but generation failed: {message}", display_flow(flow), None
378
+
379
+ current_flow = flow
380
+ saved_flows[flow.id] = flow
381
+
382
+ status_msg = f"✅ Flow '{flow_name}' created with {len(flow.nodes)} conversation steps!"
383
+
384
+ return (
385
+ status_msg,
386
+ display_flow(flow),
387
+ flow.id
388
+ )
389
+ except Exception as e:
390
+ error_msg = f"❌ Error creating flow: {str(e)}"
391
+ print(f"Flow creation error: {traceback.format_exc()}")
392
+ return error_msg, "", None
393
+
394
+
395
+ def regenerate_flow_content(flow_id: str):
396
+ """Regenerate the conversation flow nodes using AI"""
397
+ global saved_flows, current_flow, llm_backend
398
+
399
+ if not flow_id:
400
+ return "❌ No flow selected.", ""
401
+
402
+ flow = saved_flows.get(flow_id)
403
+ if not flow:
404
+ return "❌ Flow not found.", ""
405
+
406
+ if not llm_backend:
407
+ return "❌ LLM backend not configured.", ""
408
+
409
+ try:
410
+ # Clear existing nodes
411
+ flow.nodes = []
412
+
413
+ # Regenerate with AI
414
+ success, message = flow.generate_flow_with_ai(llm_backend, num_questions=5)
415
+
416
+ if not success:
417
+ return f"⚠️ Regeneration failed: {message}", ""
418
+
419
+ current_flow = flow
420
+
421
+ return (
422
+ f"✅ Flow regenerated with {len(flow.nodes)} new steps!",
423
+ display_flow(flow)
424
+ )
425
+ except Exception as e:
426
+ return f"❌ Error regenerating flow: {str(e)}", ""
427
+
428
+
429
+ def load_example_flow():
430
+ """Load an example conversation flow"""
431
+ global current_flow, saved_flows
432
+
433
+ flow = create_example_flow()
434
+ current_flow = flow
435
+ saved_flows[flow.id] = flow
436
+
437
+ return (
438
+ f"✅ Example flow loaded: {flow.name}",
439
+ display_flow(flow),
440
+ flow.id
441
+ )
442
+
443
+
444
+ def add_flow_node(flow_id: str, node_content: str, node_type: str):
445
+ """Add a node to the current flow"""
446
+ global current_flow, saved_flows
447
+
448
+ if not flow_id:
449
+ return "❌ No flow selected.", ""
450
+
451
+ flow = saved_flows.get(flow_id)
452
+ if not flow:
453
+ return "❌ Flow not found.", ""
454
+
455
+ if not node_content or not node_content.strip():
456
+ return "❌ Please provide content for the node.", ""
457
+
458
+ try:
459
+ node = ConversationNode(content=node_content, node_type=node_type.lower())
460
+
461
+ # Link to previous node if exists
462
+ if flow.nodes:
463
+ last_node = flow.nodes[-1]
464
+ last_node.next = node.id
465
+
466
+ flow.add_node(node)
467
+ current_flow = flow
468
+
469
+ return (
470
+ f"✅ Node added successfully! Total nodes: {len(flow.nodes)}",
471
+ display_flow(flow)
472
+ )
473
+ except Exception as e:
474
+ return f"❌ Error adding node: {str(e)}", ""
475
+
476
+
477
+ def display_flow(flow: ConversationFlow) -> str:
478
+ """Display flow as markdown"""
479
+ if not flow or not flow.nodes:
480
+ return "No flow to display"
481
+
482
+ output = f"# {flow.name}\n\n"
483
+ output += f"**Description:** {flow.description}\n\n"
484
+ output += f"**Total Steps:** {len(flow.nodes)}\n\n"
485
+ output += "---\n\n"
486
+
487
+ for i, node in enumerate(flow.nodes, 1):
488
+ output += f"### Step {i}: {node.type.capitalize()}\n\n"
489
+ output += f"{node.content}\n\n"
490
+
491
+ return output
492
+
493
+
494
+ def save_current_flow(flow_id: str):
495
+ """Save the current flow to file"""
496
+ if not flow_id:
497
+ return "❌ No flow selected.", None
498
+
499
+ flow = saved_flows.get(flow_id)
500
+ if not flow:
501
+ return "❌ Flow not found.", None
502
+
503
+ try:
504
+ filepath = save_json_file(flow.to_dict(), "conversation_flow")
505
+ return f"✅ Flow saved to {filepath}", filepath
506
+ except Exception as e:
507
+ return f"❌ Error saving flow: {str(e)}", None
508
+
509
+
510
+ def start_conversation_session(flow_id: str):
511
+ """Start a new conversation session"""
512
+ global current_session, session_manager
513
+
514
+ if not flow_id:
515
+ return [], "❌ Please select a flow first."
516
+
517
+ flow = saved_flows.get(flow_id)
518
+ if not flow:
519
+ return [], " Flow not found."
520
+
521
+ if not llm_backend:
522
+ return [], "❌ LLM backend not initialized."
523
+
524
+ try:
525
+ # Create session
526
+ session = session_manager.create_session(flow_id=flow.id, flow_name=flow.name)
527
+ current_session = session
528
+
529
+ # Create moderator
530
+ moderator = ConversationModerator(llm_backend, flow)
531
+
532
+ # Start conversation
533
+ opening_message = moderator.start_conversation(session)
534
+
535
+ # Return chat history in Gradio format
536
+ return [[None, opening_message]], f"✅ Conversation started! Session ID: {session.id}"
537
+
538
+ except Exception as e:
539
+ return [], f"❌ Error starting conversation: {str(e)}"
540
+
541
+
542
+ def chat_with_moderator(user_message: str, history: List):
543
+ """Handle chat messages with the AI moderator"""
544
+ global current_session
545
+
546
+ if not current_session:
547
+ return history, "❌ No active session. Please start a conversation first."
548
+
549
+ if not llm_backend:
550
+ return history, "❌ LLM backend not initialized."
551
+
552
+ if not user_message or not user_message.strip():
553
+ return history, "❌ Please enter a message."
554
+
555
+ try:
556
+ # Get the flow
557
+ flow = saved_flows.get(current_session.flow_id)
558
+ if not flow:
559
+ return history, "❌ Flow not found."
560
+
561
+ # Create moderator
562
+ moderator = ConversationModerator(llm_backend, flow)
563
+
564
+ # Process user response
565
+ ai_response = moderator.process_user_response(current_session, user_message)
566
+
567
+ # Update history
568
+ history.append([user_message, ai_response])
569
+
570
+ status = f"Session: {current_session.id} | Turns: {current_session.get_turn_count()}"
571
+ if current_session.status == "completed":
572
+ status += " | Conversation completed"
573
+
574
+ return history, status
575
+
576
+ except Exception as e:
577
+ return history, f"❌ Error: {str(e)}"
578
+
579
+
580
+ def export_conversation():
581
+ """Export the current conversation"""
582
+ global current_session
583
+
584
+ if not current_session:
585
+ return "❌ No active session to export.", None
586
+
587
+ try:
588
+ filepath = conversation_to_transcript(current_session)
589
+ return f"✅ Conversation exported to {filepath}", filepath
590
+ except Exception as e:
591
+ return f" Error exporting conversation: {str(e)}", None
592
+
593
+
594
+ def generate_conversation_summary():
595
+ """Generate AI summary of the current conversation"""
596
+ global current_session
597
+
598
+ if not current_session:
599
+ return "❌ No active session. Start a conversation first.", ""
600
+
601
+ if not llm_backend:
602
+ return " LLM backend not initialized.", ""
603
+
604
+ if current_session.get_turn_count() < 3:
605
+ return "❌ Not enough conversation data. Have at least 2-3 exchanges first.", ""
606
+
607
+ try:
608
+ # Get the flow
609
+ flow = saved_flows.get(current_session.flow_id)
610
+ if not flow:
611
+ return " Flow not found.", ""
612
+
613
+ # Create moderator and generate summary
614
+ moderator = ConversationModerator(llm_backend, flow)
615
+ summary = moderator.generate_summary(current_session)
616
+
617
+ # Format summary with stats
618
+ stats = current_session.get_summary_stats()
619
+ formatted_summary = f"""## Conversation Summary
620
+
621
+ **Session Details:**
622
+ - Session ID: {current_session.id}
623
+ - Flow: {current_session.flow_name}
624
+ - Total Turns: {stats['total_turns']} ({stats['user_turns']} user, {stats['ai_turns']} AI)
625
+ - Duration: {stats['duration_minutes']} minutes
626
+ - Status: {stats['status']}
627
+
628
+ ---
629
+
630
+ {summary}
631
+
632
+ ---
633
+
634
+ *Summary generated by AI. Review for accuracy.*
635
+ """
636
+
637
+ return "✅ Summary generated successfully!", formatted_summary
638
+
639
+ except Exception as e:
640
+ return f"❌ Error generating summary: {str(e)}", ""
641
+
642
+
643
+ def update_probing_threshold(threshold: int):
644
+ """Update the probing threshold for follow-up questions"""
645
+ # This will be used when creating new moderators
646
+ return f"✅ Probing threshold set to every {threshold} responses"
647
+
648
+
649
+ def get_conversation_metrics():
650
+ """Get real-time conversation metrics"""
651
+ global current_session
652
+
653
+ if not current_session:
654
+ return """**No Active Session**
655
+
656
+ Start a conversation to see metrics."""
657
+
658
+ stats = current_session.get_summary_stats()
659
+ user_turns = [t for t in current_session.conversation_history if t.role == "user"]
660
+
661
+ # Calculate follow-up count (AI turns that aren't linked to nodes)
662
+ follow_ups = len([t for t in current_session.conversation_history
663
+ if t.role == "ai" and not t.node_id])
664
+ scripted = stats['ai_turns'] - follow_ups
665
+
666
+ metrics_md = f"""## 📊 Live Conversation Metrics
667
+
668
+ **Engagement:**
669
+ - Total Exchanges: {stats['user_turns']}
670
+ - User Responses: {stats['user_turns']}
671
+ - AI Questions: {stats['ai_turns']}
672
+
673
+ **Question Mix:**
674
+ - Scripted Questions: {scripted}
675
+ - Dynamic Follow-ups: {follow_ups}
676
+ - Follow-up Rate: {(follow_ups / max(stats['ai_turns'], 1) * 100):.1f}%
677
+
678
+ **Quality Indicators:**
679
+ - Avg Response Length: {stats['avg_user_response_length']:.0f} characters
680
+ - Duration: {stats['duration_minutes']} min
681
+ - Status: {stats['status'].upper()}
682
+
683
+ **Session Info:**
684
+ - Session ID: `{current_session.id[:8]}...`
685
+ - Flow: {current_session.flow_name}
686
+ """
687
+
688
+ return metrics_md
689
+
690
+
691
+ def analyze_multiple_sessions(uploaded_files):
692
+ """Analyze multiple conversation sessions"""
693
+ if not uploaded_files:
694
+ return "❌ Please upload at least one conversation JSON file.", "", None
695
+
696
+ if not llm_backend:
697
+ return "⚠️ LLM backend not configured. Basic analysis only (no AI insights).", "", None
698
+
699
+ try:
700
+ # Load session data from uploaded files
701
+ session_data_list = []
702
+
703
+ for file in uploaded_files:
704
+ with open(file.name, 'r') as f:
705
+ data = json.load(f)
706
+ session_data_list.append(data)
707
+
708
+ # Create analytics instance
709
+ analytics = ConversationAnalytics(llm_backend)
710
+ loaded_count = analytics.load_sessions(session_data_list)
711
+
712
+ if loaded_count == 0:
713
+ return "❌ No valid sessions found in uploaded files.", "", None
714
+
715
+ # Generate comprehensive report
716
+ report = analytics.generate_comprehensive_report()
717
+
718
+ # Export aggregated data
719
+ export_data = analytics.export_aggregated_data()
720
+ export_file = save_json_file(export_data, "multi_session_analysis")
721
+
722
+ status = f"✅ Successfully analyzed {loaded_count} sessions from {len(uploaded_files)} files"
723
+
724
+ return status, report, export_file
725
+
726
+ except Exception as e:
727
+ return f"❌ Error analyzing sessions: {str(e)}", "", None
728
+
729
+
730
+ # ===========================
731
+ # Gradio Interface
732
+ # ===========================
733
+
734
+ def create_interface():
735
+ """Create the main Gradio interface"""
736
+
737
+ with gr.Blocks(
738
+ title="Project Echo - Qualitative Research Assistant",
739
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
740
+ ) as app:
741
+
742
+ gr.Markdown("""
743
+ # Project Echo - Your AI-Powered Qualitative Research Assistant
744
+
745
+ Battle the blank page, reach global audiences, and uncover insights with AI assistance.
746
+ """)
747
+
748
+ # Show backend status
749
+ if llm_backend:
750
+ status_msg = f"✅ **Active LLM Provider:** {llm_backend.provider.value.upper()} | Model: {llm_backend.model}"
751
+ bg_color = "rgba(0, 255, 0, 0.1)"
752
+ else:
753
+ status_msg = """⚠️ **LLM Provider Not Configured**
754
+
755
+ **To use this app, you need to configure an LLM provider:**
756
+
757
+ 1. **Easiest (HuggingFace Spaces):** Make sure your Space is PUBLIC and HF_TOKEN will be auto-available
758
+ 2. **Best Quality:** Add `OPENAI_API_KEY` in Space Settings → Variables
759
+ 3. **Alternative:** Add `ANTHROPIC_API_KEY` or `HUGGINGFACE_API_KEY`
760
+
761
+ See the **About** tab for detailed instructions."""
762
+ bg_color = "rgba(255, 165, 0, 0.2)"
763
+
764
+ gr.Markdown(f'<div style="background-color: {bg_color}; padding: 15px; border-radius: 5px; margin: 10px 0; border-left: 4px solid #FF6B6B;">{status_msg}</div>')
765
+
766
+ with gr.Tabs() as tabs:
767
+
768
+ # ========== SURVEY GENERATION TAB ==========
769
+ with gr.Tab("📝 Generate Survey"):
770
+ gr.Markdown("""
771
+ ## Battle the Blank Page
772
+ Share an outline and get AI-powered surveys drafted in minutes,
773
+ complete with industry best practices.
774
+ """)
775
+
776
+ with gr.Row():
777
+ with gr.Column(scale=1):
778
+ outline_input = gr.Textbox(
779
+ label="Your Survey Outline or Topic",
780
+ placeholder="Example: I want to understand patient experiences with a new diabetes medication, focusing on effectiveness, side effects, and quality of life impacts.",
781
+ lines=6
782
+ )
783
+
784
+ survey_type_input = gr.Radio(
785
+ label="Survey Type",
786
+ choices=["Qualitative", "Quantitative", "Mixed"],
787
+ value="Qualitative"
788
+ )
789
+
790
+ num_questions_input = gr.Slider(
791
+ label="Number of Questions",
792
+ minimum=5,
793
+ maximum=25,
794
+ value=10,
795
+ step=1
796
+ )
797
+
798
+ audience_input = gr.Textbox(
799
+ label="Target Audience",
800
+ placeholder="Example: Adults aged 30-65 with Type 2 diabetes",
801
+ value="General audience"
802
+ )
803
+
804
+ generate_btn = gr.Button("🚀 Generate Survey", variant="primary", size="lg")
805
+
806
+ with gr.Column(scale=1):
807
+ gen_status = gr.Textbox(label="Status", interactive=False)
808
+ gen_output = gr.Markdown(label="Generated Survey")
809
+
810
+ gen_download = gr.File(label="Download Survey JSON", visible=False)
811
+
812
+ # Event handlers
813
+ generate_btn.click(
814
+ fn=generate_survey_from_outline,
815
+ inputs=[outline_input, survey_type_input, num_questions_input, audience_input],
816
+ outputs=[gen_status, gen_output, gen_download]
817
+ ).then(
818
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
819
+ inputs=[gen_download],
820
+ outputs=[gen_download]
821
+ )
822
+
823
+ # ========== TRANSLATION TAB ==========
824
+ with gr.Tab("🌍 Translate Survey"):
825
+ gr.Markdown("""
826
+ ## Reach Global Audiences
827
+ Translate your surveys automatically to streamline efforts and reach wider audiences.
828
+ """)
829
+
830
+ with gr.Row():
831
+ with gr.Column(scale=1):
832
+ gr.Markdown("### Select Target Languages")
833
+
834
+ # Create checkboxes for popular languages
835
+ lang_checkboxes = gr.CheckboxGroup(
836
+ label="Languages",
837
+ choices=get_language_choices(),
838
+ value=[]
839
+ )
840
+
841
+ translate_btn = gr.Button("🌐 Translate Survey", variant="primary", size="lg")
842
+
843
+ gr.Markdown("""
844
+ **Note:** Make sure you've generated a survey first, or upload one using the JSON format.
845
+ """)
846
+
847
+ with gr.Column(scale=1):
848
+ trans_status = gr.Textbox(label="Translation Status", interactive=False)
849
+ trans_output = gr.Markdown(label="Translations")
850
+
851
+ trans_download = gr.File(label="Download Translations JSON", visible=False)
852
+
853
+ # Event handlers
854
+ def extract_lang_codes(selected_items):
855
+ """Extract language codes from checkbox selections"""
856
+ return [item.split(" - ")[0] for item in selected_items]
857
+
858
+ translate_btn.click(
859
+ fn=lambda x: translate_current_survey(extract_lang_codes(x)),
860
+ inputs=[lang_checkboxes],
861
+ outputs=[trans_status, trans_output, trans_download]
862
+ ).then(
863
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
864
+ inputs=[trans_download],
865
+ outputs=[trans_download]
866
+ )
867
+
868
+ # ========== ANALYSIS TAB ==========
869
+ with gr.Tab("📊 Analyze Data"):
870
+ gr.Markdown("""
871
+ ## Uncover Key Insights
872
+ Upload your survey responses and get AI-assisted summaries of key findings,
873
+ themes, and trends.
874
+ """)
875
+
876
+ with gr.Row():
877
+ with gr.Column(scale=1):
878
+ responses_input = gr.Textbox(
879
+ label="Survey Responses (JSON)",
880
+ placeholder='[{"q1": "response 1", "q2": "response 2"}, ...]',
881
+ lines=10
882
+ )
883
+
884
+ questions_input = gr.Textbox(
885
+ label="Questions (JSON, Optional)",
886
+ placeholder='[{"question_text": "What is your experience?", ...}]',
887
+ lines=5
888
+ )
889
+
890
+ with gr.Row():
891
+ analyze_btn = gr.Button("🔍 Analyze Data", variant="primary", size="lg")
892
+ example_btn = gr.Button("Load Example", variant="secondary")
893
+
894
+ with gr.Column(scale=1):
895
+ analysis_status = gr.Textbox(label="Status", interactive=False)
896
+ analysis_output = gr.Markdown(label="Analysis Report")
897
+
898
+ analysis_download = gr.File(label="Download Analysis JSON", visible=False)
899
+
900
+ # Event handlers
901
+ analyze_btn.click(
902
+ fn=analyze_survey_data,
903
+ inputs=[responses_input, questions_input],
904
+ outputs=[analysis_status, analysis_output, analysis_download]
905
+ ).then(
906
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
907
+ inputs=[analysis_download],
908
+ outputs=[analysis_download]
909
+ )
910
+
911
+ example_btn.click(
912
+ fn=load_example_responses,
913
+ outputs=[responses_input]
914
+ )
915
+
916
+ # ========== CONVERSATIONAL RESEARCH TAB ==========
917
+ with gr.Tab("💬 Conversational Research"):
918
+ gr.Markdown("""
919
+ ## AI-Moderated Conversations
920
+ Design conversation flows and conduct AI-powered qualitative interviews with respondents.
921
+ """)
922
+
923
+ with gr.Tabs():
924
+ # Design Flow Sub-Tab
925
+ with gr.Tab("🎨 Design Flow"):
926
+ gr.Markdown("""
927
+ ### Create Conversation Flows
928
+ Design custom conversation paths for AI-moderated interviews.
929
+ """)
930
+
931
+ with gr.Row():
932
+ with gr.Column(scale=1):
933
+ gr.Markdown("#### Flow Setup")
934
+
935
+ flow_name_input = gr.Textbox(
936
+ label="Flow Name",
937
+ placeholder="e.g., HCP Interview for New Dermatology Product",
938
+ value=""
939
+ )
940
+
941
+ flow_desc_input = gr.Textbox(
942
+ label="Flow Description",
943
+ placeholder="Describe the purpose of this conversation flow...",
944
+ lines=3
945
+ )
946
+
947
+ with gr.Row():
948
+ create_flow_btn = gr.Button("✨ Create New Flow", variant="primary")
949
+ load_example_flow_btn = gr.Button("📋 Load Example", variant="secondary")
950
+
951
+ with gr.Row():
952
+ regenerate_flow_btn = gr.Button("🔄 Regenerate Flow", variant="secondary")
953
+ clear_flow_btn = gr.Button("🗑️ Clear All Steps", variant="stop")
954
+
955
+ flow_id_state = gr.State(value="")
956
+
957
+ gr.Markdown("#### Add Steps to Flow")
958
+
959
+ node_content_input = gr.Textbox(
960
+ label="Question/Message",
961
+ placeholder="Enter the question or message for this step...",
962
+ lines=4
963
+ )
964
+
965
+ node_type_input = gr.Radio(
966
+ label="Step Type",
967
+ choices=["Question", "End"],
968
+ value="Question"
969
+ )
970
+
971
+ add_node_btn = gr.Button("➕ Add Step", variant="secondary")
972
+
973
+ save_flow_btn = gr.Button("💾 Save Flow", variant="primary")
974
+
975
+ with gr.Column(scale=1):
976
+ flow_status = gr.Textbox(label="Status", interactive=False)
977
+ flow_display = gr.Markdown(label="Flow Preview", value="No flow created yet")
978
+
979
+ flow_download = gr.File(label="Download Flow JSON", visible=False)
980
+
981
+ # Event handlers for flow design
982
+ create_flow_btn.click(
983
+ fn=create_new_flow,
984
+ inputs=[flow_name_input, flow_desc_input],
985
+ outputs=[flow_status, flow_display, flow_id_state]
986
+ )
987
+
988
+ load_example_flow_btn.click(
989
+ fn=load_example_flow,
990
+ outputs=[flow_status, flow_display, flow_id_state]
991
+ )
992
+
993
+ regenerate_flow_btn.click(
994
+ fn=regenerate_flow_content,
995
+ inputs=[flow_id_state],
996
+ outputs=[flow_status, flow_display]
997
+ )
998
+
999
+ def clear_flow(flow_id):
1000
+ """Clear all steps from the current flow"""
1001
+ if not flow_id:
1002
+ return "❌ No flow selected.", ""
1003
+ flow = saved_flows.get(flow_id)
1004
+ if flow:
1005
+ flow.nodes = []
1006
+ return "✅ All steps cleared. You can now add new ones.", display_flow(flow)
1007
+ return "❌ Flow not found.", ""
1008
+
1009
+ clear_flow_btn.click(
1010
+ fn=clear_flow,
1011
+ inputs=[flow_id_state],
1012
+ outputs=[flow_status, flow_display]
1013
+ )
1014
+
1015
+ add_node_btn.click(
1016
+ fn=add_flow_node,
1017
+ inputs=[flow_id_state, node_content_input, node_type_input],
1018
+ outputs=[flow_status, flow_display]
1019
+ ).then(
1020
+ fn=lambda: "",
1021
+ outputs=[node_content_input]
1022
+ )
1023
+
1024
+ save_flow_btn.click(
1025
+ fn=save_current_flow,
1026
+ inputs=[flow_id_state],
1027
+ outputs=[flow_status, flow_download]
1028
+ ).then(
1029
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
1030
+ inputs=[flow_download],
1031
+ outputs=[flow_download]
1032
+ )
1033
+
1034
+ # Conduct Interview Sub-Tab
1035
+ with gr.Tab("🎙️ Conduct Interview"):
1036
+ gr.Markdown("""
1037
+ ### AI-Moderated Interview
1038
+ Start a conversation session with the AI moderator using your designed flow.
1039
+ """)
1040
+
1041
+ with gr.Row():
1042
+ with gr.Column(scale=2):
1043
+ conversation_flow_selector = gr.State(value="")
1044
+
1045
+ gr.Markdown("""
1046
+ **Instructions:**
1047
+ 1. Design a flow in the 'Design Flow' tab first (or load the example)
1048
+ 2. Configure AI moderator settings below (optional)
1049
+ 3. Click 'Start Conversation' to begin
1050
+ 4. The AI moderator will ask questions and adapt with follow-ups
1051
+ 5. Generate summary and export when finished
1052
+ """)
1053
+
1054
+ # Moderator Configuration
1055
+ with gr.Accordion("⚙️ AI Moderator Settings", open=False):
1056
+ gr.Markdown("**Follow-up Question Configuration**")
1057
+ probing_threshold_slider = gr.Slider(
1058
+ label="Follow-up Frequency",
1059
+ info="Ask dynamic follow-ups every N user responses",
1060
+ minimum=2,
1061
+ maximum=10,
1062
+ value=3,
1063
+ step=1
1064
+ )
1065
+ probing_status = gr.Textbox(label="Settings Status", interactive=False, value="Default: Every 3 responses")
1066
+
1067
+ with gr.Row():
1068
+ start_conversation_btn = gr.Button("🚀 Start Conversation", variant="primary", scale=2)
1069
+ export_conversation_btn = gr.Button("📥 Export", variant="secondary", scale=1)
1070
+ summary_btn = gr.Button("✨ Generate Summary", variant="secondary", scale=2)
1071
+
1072
+ conversation_status = gr.Textbox(label="Session Status", interactive=False)
1073
+ conversation_download = gr.File(label="Download Transcript", visible=False)
1074
+
1075
+ # Summary Display
1076
+ with gr.Accordion("📝 Conversation Summary", open=False):
1077
+ summary_display = gr.Markdown(label="AI-Generated Summary", value="No summary yet. Complete conversation and click 'Generate Summary'.")
1078
+
1079
+ with gr.Column(scale=3):
1080
+ chatbot = gr.Chatbot(
1081
+ label="AI-Moderated Interview",
1082
+ height=400
1083
+ )
1084
+
1085
+ msg_input = gr.Textbox(
1086
+ label="Your Response",
1087
+ placeholder="Type your response here...",
1088
+ lines=2
1089
+ )
1090
+
1091
+ with gr.Row():
1092
+ submit_btn = gr.Button("Send", variant="primary")
1093
+ clear_btn = gr.Button("Clear")
1094
+
1095
+ # Live Metrics Panel
1096
+ with gr.Accordion("📊 Live Metrics", open=True):
1097
+ metrics_display = gr.Markdown(value="**No Active Session**\n\nStart a conversation to see metrics.")
1098
+
1099
+ # Chat event handlers
1100
+ def user_submit(user_message, history):
1101
+ """Handle user message submission"""
1102
+ if not user_message:
1103
+ return history, history, ""
1104
+ return history, history + [[user_message, None]], ""
1105
+
1106
+ def bot_respond(history):
1107
+ """Get bot response and update metrics"""
1108
+ if not history or history[-1][1] is not None:
1109
+ return history, "", get_conversation_metrics()
1110
+
1111
+ user_msg = history[-1][0]
1112
+ updated_history, status = chat_with_moderator(user_msg, history[:-1])
1113
+ metrics = get_conversation_metrics()
1114
+ return updated_history, status, metrics
1115
+
1116
+ # Probing threshold configuration
1117
+ probing_threshold_slider.change(
1118
+ fn=update_probing_threshold,
1119
+ inputs=[probing_threshold_slider],
1120
+ outputs=[probing_status]
1121
+ )
1122
+
1123
+ # Start conversation
1124
+ start_conversation_btn.click(
1125
+ fn=lambda: saved_flows[list(saved_flows.keys())[-1]].id if saved_flows else "",
1126
+ outputs=[conversation_flow_selector]
1127
+ ).then(
1128
+ fn=start_conversation_session,
1129
+ inputs=[conversation_flow_selector],
1130
+ outputs=[chatbot, conversation_status]
1131
+ ).then(
1132
+ fn=get_conversation_metrics,
1133
+ outputs=[metrics_display]
1134
+ )
1135
+
1136
+ # Message submission
1137
+ msg_input.submit(
1138
+ fn=user_submit,
1139
+ inputs=[msg_input, chatbot],
1140
+ outputs=[chatbot, chatbot, msg_input],
1141
+ queue=False
1142
+ ).then(
1143
+ fn=bot_respond,
1144
+ inputs=[chatbot],
1145
+ outputs=[chatbot, conversation_status, metrics_display]
1146
+ )
1147
+
1148
+ submit_btn.click(
1149
+ fn=user_submit,
1150
+ inputs=[msg_input, chatbot],
1151
+ outputs=[chatbot, chatbot, msg_input],
1152
+ queue=False
1153
+ ).then(
1154
+ fn=bot_respond,
1155
+ inputs=[chatbot],
1156
+ outputs=[chatbot, conversation_status, metrics_display]
1157
+ )
1158
+
1159
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
1160
+
1161
+ # Generate summary
1162
+ summary_btn.click(
1163
+ fn=generate_conversation_summary,
1164
+ outputs=[conversation_status, summary_display]
1165
+ )
1166
+
1167
+ # Export conversation
1168
+ export_conversation_btn.click(
1169
+ fn=export_conversation,
1170
+ outputs=[conversation_status, conversation_download]
1171
+ ).then(
1172
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
1173
+ inputs=[conversation_download],
1174
+ outputs=[conversation_download]
1175
+ )
1176
+
1177
+ # Analyze Conversations Sub-Tab
1178
+ with gr.Tab("📊 Analyze Conversations"):
1179
+ gr.Markdown("""
1180
+ ### Multi-Session Analysis
1181
+ Analyze patterns and insights across multiple conversation sessions.
1182
+ Upload conversation JSON files (exported from the 'Conduct Interview' tab).
1183
+ """)
1184
+
1185
+ with gr.Row():
1186
+ with gr.Column(scale=1):
1187
+ gr.Markdown("""
1188
+ **How it works:**
1189
+ 1. Conduct multiple interviews in the 'Conduct Interview' tab
1190
+ 2. Export each conversation as JSON
1191
+ 3. Upload all JSON files here
1192
+ 4. Click 'Analyze Sessions' to generate comprehensive report
1193
+ 5. Get AI-powered insights across all conversations
1194
+
1195
+ **Minimum Requirements:**
1196
+ - At least 3-5 sessions recommended
1197
+ - 10+ total user responses across all sessions
1198
+ """)
1199
+
1200
+ session_files_upload = gr.File(
1201
+ label="Upload Conversation Sessions (JSON)",
1202
+ file_count="multiple",
1203
+ file_types=[".json"],
1204
+ type="filepath"
1205
+ )
1206
+
1207
+ analyze_sessions_btn = gr.Button("🔍 Analyze Sessions", variant="primary", size="lg")
1208
+
1209
+ analytics_status = gr.Textbox(label="Analysis Status", interactive=False)
1210
+ analytics_download = gr.File(label="Download Analysis JSON", visible=False)
1211
+
1212
+ with gr.Column(scale=1):
1213
+ analytics_report = gr.Markdown(
1214
+ label="Multi-Session Analysis Report",
1215
+ value="""# Multi-Session Analysis
1216
+
1217
+ **Upload session files to begin analysis.**
1218
+
1219
+ The report will include:
1220
+ - 📊 Aggregate statistics across all sessions
1221
+ - 🔑 Common keywords and topics
1222
+ - 💡 AI-powered cross-session insights
1223
+ - 📋 Individual session summaries
1224
+ - 🎯 Research recommendations
1225
+ """
1226
+ )
1227
+
1228
+ # Analytics event handlers
1229
+ analyze_sessions_btn.click(
1230
+ fn=analyze_multiple_sessions,
1231
+ inputs=[session_files_upload],
1232
+ outputs=[analytics_status, analytics_report, analytics_download]
1233
+ ).then(
1234
+ fn=lambda x: gr.File(value=x, visible=True) if x else gr.File(visible=False),
1235
+ inputs=[analytics_download],
1236
+ outputs=[analytics_download]
1237
+ )
1238
+
1239
+ # ========== ABOUT TAB ==========
1240
+ with gr.Tab("ℹ️ About"):
1241
+ gr.Markdown("""
1242
+ ## About Project Echo
1243
+
1244
+ Project Echo is a comprehensive qualitative research assistant that helps you:
1245
+
1246
+ ### 🎯 Generate Surveys
1247
+ - Create professional surveys from simple outlines
1248
+ - Follow industry best practices automatically
1249
+ - Save hours of questionnaire design time
1250
+
1251
+ ### 🌍 Translate Globally
1252
+ - Reach audiences in 18+ languages
1253
+ - Maintain cultural appropriateness
1254
+ - Expand your research scope effortlessly
1255
+
1256
+ ### 📊 Analyze Results
1257
+ - Extract key themes automatically
1258
+ - Identify patterns and trends
1259
+ - Generate actionable insights
1260
+
1261
+ ### 🔧 Configuration Guide
1262
+
1263
+ **For HuggingFace Spaces (Recommended):**
1264
+
1265
+ No configuration needed! The app automatically uses the HF Inference API with the built-in `HF_TOKEN`.
1266
+
1267
+ **Supported Models:**
1268
+ - Default: `mistralai/Mixtral-8x7B-Instruct-v0.1`
1269
+ - You can change by setting `LLM_MODEL` environment variable
1270
+
1271
+ **For Other LLM Providers:**
1272
+
1273
+ Add these environment variables in your Space Settings:
1274
+
1275
+ 1. **OpenAI** (Best quality, paid):
1276
+ - `LLM_PROVIDER=openai`
1277
+ - `OPENAI_API_KEY=sk-your-key`
1278
+
1279
+ 2. **Anthropic Claude** (Best reasoning, paid):
1280
+ - `LLM_PROVIDER=anthropic`
1281
+ - `ANTHROPIC_API_KEY=your-key`
1282
+
1283
+ 3. **Custom HuggingFace Model**:
1284
+ - `LLM_PROVIDER=huggingface`
1285
+ - `LLM_MODEL=your-model-name`
1286
+
1287
+ **💡 Pro Tip:** For production use, we recommend OpenAI or Anthropic for faster, more reliable results.
1288
+
1289
+ **Supported LLM Providers:**
1290
+ - HuggingFace Inference API (Free tier available)
1291
+ - OpenAI (GPT-4, GPT-4o-mini, GPT-3.5)
1292
+ - Anthropic (Claude 3.5 Sonnet, Claude 3 Opus)
1293
+ - LM Studio (local development only)
1294
+
1295
+ ### 📄 Data Privacy
1296
+
1297
+ - All processing is done through your configured LLM provider
1298
+ - No data is stored permanently by this application
1299
+ - Survey data and responses remain in your control
1300
+
1301
+ ### 🚀 Getting Started
1302
+
1303
+ 1. **Generate** a survey from your research outline
1304
+ 2. **Translate** it to reach global audiences
1305
+ 3. Collect responses from participants
1306
+ 4. **Analyze** the data to uncover insights
1307
+
1308
+ ---
1309
+
1310
+ Built with ❤️ using Gradio and state-of-the-art LLMs
1311
+ """)
1312
+
1313
+ return app
1314
+
1315
+
1316
+ # ===========================
1317
+ # Main Entry Point
1318
+ # ===========================
1319
+
1320
+ if __name__ == "__main__":
1321
+ demo = create_interface()
1322
+
1323
+ # Launch with appropriate settings
1324
+ demo.launch(
1325
+ server_name="0.0.0.0", # Allow external access
1326
+ server_port=7860, # Standard HF Spaces port
1327
+ share=False, # Don't create a public link (HF Spaces handles this)
1328
+ show_error=True
1329
+ )
conversation_flow.py CHANGED
@@ -141,6 +141,96 @@ class ConversationFlow:
141
 
142
  return True, "Flow is valid"
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
  def create_example_flow() -> ConversationFlow:
146
  """Create an example conversation flow"""
 
141
 
142
  return True, "Flow is valid"
143
 
144
+ def generate_flow_with_ai(self, llm_backend, num_questions: int = 5):
145
+ """
146
+ Generate conversation flow nodes using AI based on flow name and description.
147
+
148
+ Args:
149
+ llm_backend: LLM backend to use for generation
150
+ num_questions: Number of conversation steps to generate
151
+ """
152
+ if not self.name or not self.description:
153
+ raise ValueError("Flow must have a name and description to generate nodes")
154
+
155
+ # Build prompt for generating conversation flow
156
+ prompt = f"""Task: Design a structured conversation flow
157
+
158
+ **Interview Topic:** {self.name}
159
+
160
+ **Interview Purpose:** {self.description}
161
+
162
+ **Your Task:** Create {num_questions} conversation steps for a structured qualitative research interview.
163
+
164
+ **Guidelines for Each Step:**
165
+ - Start with an opening that builds rapport and explains the purpose
166
+ - Progress from general to specific questions
167
+ - Each step should be clear, open-ended, and encourage detailed responses
168
+ - Include natural transition phrases
169
+ - End with a closing that thanks the respondent
170
+ - Make questions natural and conversational, not robotic
171
+
172
+ **Output Format:** Number each step (1., 2., 3., etc.) with the exact question or statement to use.
173
+
174
+ **Generate {num_questions} Interview Steps:**
175
+
176
+ 1."""
177
+
178
+ messages = [
179
+ {
180
+ "role": "system",
181
+ "content": "You are an expert qualitative research interviewer designing a conversation flow. Create engaging, professional interview questions that will elicit detailed, meaningful responses."
182
+ },
183
+ {"role": "user", "content": prompt}
184
+ ]
185
+
186
+ try:
187
+ response = llm_backend.generate(messages, max_tokens=1500, temperature=0.7)
188
+ self._parse_and_add_nodes(response)
189
+ return True, "Flow generated successfully!"
190
+ except Exception as e:
191
+ return False, f"Flow generation failed: {str(e)}"
192
+
193
+ def _parse_and_add_nodes(self, response: str):
194
+ """
195
+ Parse LLM response and create conversation nodes.
196
+
197
+ Args:
198
+ response: The LLM-generated response containing numbered questions
199
+ """
200
+ import re
201
+
202
+ # Pattern to match numbered items: "1. Question" or "1) Question"
203
+ pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)'
204
+ matches = re.findall(pattern, response, re.DOTALL)
205
+
206
+ if not matches:
207
+ # Fallback: split by lines and look for question-like content
208
+ lines = response.split('\n')
209
+ matches = [line.strip() for line in lines if line.strip() and len(line.strip()) > 20]
210
+
211
+ for i, match in enumerate(matches):
212
+ # Clean up the match
213
+ content = match.split('\n')[0].strip()
214
+
215
+ if not content or len(content) < 10:
216
+ continue
217
+
218
+ # Determine node type
219
+ node_type = "question"
220
+ if i == 0:
221
+ node_type = "opening"
222
+ elif i == len(matches) - 1:
223
+ node_type = "end"
224
+
225
+ # Create and add node
226
+ node = ConversationNode(content=content, node_type=node_type)
227
+
228
+ if self.nodes:
229
+ # Link to previous node
230
+ self.nodes[-1].next = node.id
231
+
232
+ self.add_node(node)
233
+
234
 
235
  def create_example_flow() -> ConversationFlow:
236
  """Create an example conversation flow"""
conversation_moderator.py CHANGED
@@ -105,21 +105,33 @@ class ConversationModerator:
105
  Returns:
106
  A follow-up question
107
  """
108
- # Create prompt for generating follow-up
109
- system_prompt = """You are a professional qualitative research interviewer. Your goal is to probe deeper into the respondent's answers to uncover insights.
 
 
 
 
110
 
111
- Generate ONE follow-up question that:
112
- - Explores an interesting point the respondent mentioned
113
- - Asks for more detail or clarification
114
- - Uses phrases like "Tell me more about...", "Can you elaborate on...", "What do you mean by...", "Why do you think..."
115
- - Is empathetic and non-judgmental
116
- - Is concise (one sentence)
 
117
 
118
- Respond ONLY with the follow-up question, nothing else."""
119
 
120
- user_prompt = f"""The respondent just said: "{user_message}"
121
 
122
- Generate a single follow-up question to probe deeper into their response."""
 
 
 
 
 
 
 
123
 
124
  messages = [
125
  {"role": "system", "content": system_prompt},
@@ -185,19 +197,29 @@ Generate a single follow-up question to probe deeper into their response."""
185
 
186
  transcript = "\n".join(transcript_parts)
187
 
188
- system_prompt = """You are analyzing a qualitative research interview. Generate a concise summary that captures:
189
- 1. The main topics discussed
190
- 2. Key insights or themes from the respondent
191
- 3. Notable quotes or moments
192
- 4. Overall sentiment
 
 
193
 
194
- Keep the summary to 3-4 paragraphs."""
195
 
196
- user_prompt = f"""Summarize this interview:
197
 
198
  {transcript}
199
 
200
- Provide a professional summary suitable for a research report."""
 
 
 
 
 
 
 
 
201
 
202
  messages = [
203
  {"role": "system", "content": system_prompt},
 
105
  Returns:
106
  A follow-up question
107
  """
108
+ # Create prompt for generating follow-up - optimized for Mistral/Mixtral
109
+ system_prompt = """You are a skilled qualitative research interviewer conducting a professional interview. Your role is to:
110
+ - Build trust and rapport with respondents
111
+ - Probe deeper into meaningful points they raise
112
+ - Encourage detailed, thoughtful responses
113
+ - Stay curious and engaged without bias
114
 
115
+ When generating follow-up questions:
116
+ - Focus on a single interesting or important point they mentioned
117
+ - Ask for more detail, clarity, or deeper thinking
118
+ - Use natural, conversational phrasing
119
+ - Show genuine interest in their perspective
120
+ - Keep questions clear and concise (one sentence)
121
+ - Be empathetic and non-judgmental
122
 
123
+ Output ONLY the follow-up question text, with no additional explanation or commentary."""
124
 
125
+ user_prompt = f"""**Respondent's Statement:** "{user_message}"
126
 
127
+ **Task:** Generate one thoughtful follow-up question that probes deeper into what they said.
128
+
129
+ Focus on:
130
+ - Exploring an interesting or important point
131
+ - Asking for more detail or their reasoning
132
+ - Encouraging reflection and deeper thinking
133
+
134
+ Provide ONLY the follow-up question text."""
135
 
136
  messages = [
137
  {"role": "system", "content": system_prompt},
 
197
 
198
  transcript = "\n".join(transcript_parts)
199
 
200
+ system_prompt = """You are a qualitative research analyst summarizing a conducted interview. Your summary should be:
201
+ - Professional and objective
202
+ - Grounded in what the respondent actually said
203
+ - Organized by themes and key points
204
+ - Include representative quotes
205
+ - Highlight insights and implications
206
+ - Suitable for a research report or case study"""
207
 
208
+ user_prompt = f"""Task: Summarize this qualitative research interview
209
 
210
+ **Interview Transcript:**
211
 
212
  {transcript}
213
 
214
+ **Summary Requirements:**
215
+ 1. **Main Topics:** What topics or subjects did the respondent discuss?
216
+ 2. **Key Insights:** What are the most important or revealing points they made?
217
+ 3. **Themes:** What patterns or recurring themes emerge from their responses?
218
+ 4. **Representative Quotes:** Include 2-3 direct quotes that capture important moments
219
+ 5. **Sentiment & Tone:** What is the overall emotional tone and sentiment?
220
+
221
+ **Format:** Write a professional summary of 3-4 paragraphs suitable for a research report.
222
+ Start with a brief overview, then discuss key themes and insights."""
223
 
224
  messages = [
225
  {"role": "system", "content": system_prompt},
data_analyzer.py CHANGED
@@ -73,23 +73,26 @@ class DataAnalyzer:
73
  for i, resp in enumerate(sample_responses, 1):
74
  context += f"{i}. {resp[:200]}...\n" # Truncate long responses
75
 
76
- prompt = f"""Analyze the following survey responses and provide an executive summary.
77
 
78
  {context}
79
 
80
- Provide a summary that includes:
81
- 1. Overview: High-level summary of what the data shows (2-3 sentences)
82
- 2. Key patterns: Main patterns or trends observed
83
- 3. Notable findings: Interesting or unexpected discoveries
84
- 4. Response quality: Assessment of response depth and engagement
85
 
86
- Respond with a JSON object with these fields:
 
 
 
 
 
87
  {{
88
- "overview": "...",
89
- "key_patterns": ["pattern 1", "pattern 2", ...],
90
- "notable_findings": ["finding 1", "finding 2", ...],
91
- "response_quality": "..."
92
- }}"""
 
 
93
 
94
  messages = [
95
  {"role": "system", "content": self._get_analyst_system_prompt()},
@@ -113,26 +116,32 @@ Respond with a JSON object with these fields:
113
  sample_size = min(100, len(response_texts))
114
  sample_responses = response_texts[:sample_size]
115
 
116
- prompt = f"""Analyze the following {len(sample_responses)} survey responses and identify the top {num_themes} themes.
 
 
117
 
118
  Responses:
119
  {self._format_responses_for_prompt(sample_responses)}
120
 
121
- For each theme, provide:
122
- 1. Theme name: A short, descriptive name
123
- 2. Description: What this theme represents
124
- 3. Prevalence: Estimated percentage of responses mentioning this theme
125
- 4. Example quotes: 2-3 representative quotes from the responses
126
 
127
- Respond with a JSON array of theme objects:
 
 
 
 
 
 
128
  [
129
  {{
130
- "theme_name": "...",
131
- "description": "...",
132
  "prevalence": "XX%",
133
- "example_quotes": ["quote 1", "quote 2"]
134
  }}
135
- ]"""
 
 
136
 
137
  messages = [
138
  {"role": "system", "content": self._get_analyst_system_prompt()},
@@ -159,28 +168,34 @@ Respond with a JSON array of theme objects:
159
  sample_size = min(100, len(response_texts))
160
  sample_responses = response_texts[:sample_size]
161
 
162
- prompt = f"""Analyze the sentiment of these {len(sample_responses)} survey responses.
 
 
163
 
164
  Responses:
165
  {self._format_responses_for_prompt(sample_responses)}
166
 
167
- Provide sentiment analysis including:
168
- 1. Overall sentiment: positive, negative, neutral, or mixed
169
- 2. Sentiment distribution: Estimated percentage breakdown
170
- 3. Emotional tone: Key emotions detected
171
- 4. Intensity: How strong the sentiments are
 
 
172
 
173
- Respond with JSON:
174
  {{
175
- "overall_sentiment": "...",
176
  "distribution": {{
177
  "positive": "XX%",
178
  "neutral": "XX%",
179
  "negative": "XX%"
180
  }},
181
- "emotions": ["emotion1", "emotion2", ...],
182
  "intensity": "low|moderate|high"
183
- }}"""
 
 
184
 
185
  messages = [
186
  {"role": "system", "content": self._get_analyst_system_prompt()},
@@ -215,19 +230,25 @@ Respond with JSON:
215
  Sample responses:
216
  {self._format_responses_for_prompt(sample_responses)}
217
 
218
- Based on this data, provide 5-7 key insights that would be valuable for:
219
- - Understanding the target audience
220
- - Identifying opportunities or challenges
221
- - Informing strategic decisions
222
- - Recognizing patterns or trends
 
 
 
 
 
 
 
 
 
223
 
224
- Each insight should be:
225
- - Specific and actionable
226
- - Supported by the data
227
- - Clear and concise
228
 
229
- Respond with a JSON array of insight strings:
230
- ["insight 1", "insight 2", ...]"""
231
 
232
  messages = [
233
  {"role": "system", "content": self._get_analyst_system_prompt()},
 
73
  for i, resp in enumerate(sample_responses, 1):
74
  context += f"{i}. {resp[:200]}...\n" # Truncate long responses
75
 
76
+ prompt = f"""Task: Analyze survey responses and generate an executive summary
77
 
78
  {context}
79
 
80
+ **Your Analysis Should Include:**
 
 
 
 
81
 
82
+ 1. **Overview:** A clear, concise high-level summary of what the data reveals (2-3 sentences)
83
+ 2. **Key Patterns:** Main patterns, trends, or recurring themes observed across responses
84
+ 3. **Notable Findings:** Interesting, surprising, or unexpected discoveries in the data
85
+ 4. **Response Quality:** Assessment of how thoughtful, engaged, and detailed the responses are
86
+
87
+ **Output Format:** Respond ONLY with valid JSON:
88
  {{
89
+ "overview": "Clear summary of overall findings",
90
+ "key_patterns": ["pattern 1", "pattern 2", "pattern 3"],
91
+ "notable_findings": ["surprising finding 1", "unexpected discovery"],
92
+ "response_quality": "Assessment of engagement level"
93
+ }}
94
+
95
+ **Important:** Ensure your response is valid JSON that can be parsed. Do not include any text outside the JSON object."""
96
 
97
  messages = [
98
  {"role": "system", "content": self._get_analyst_system_prompt()},
 
116
  sample_size = min(100, len(response_texts))
117
  sample_responses = response_texts[:sample_size]
118
 
119
+ prompt = f"""Task: Extract and analyze themes from survey responses
120
+
121
+ **Data:** Analyzing {len(sample_responses)} survey responses
122
 
123
  Responses:
124
  {self._format_responses_for_prompt(sample_responses)}
125
 
126
+ **Your Task:** Identify the top {num_themes} distinct themes that emerge from these responses.
 
 
 
 
127
 
128
+ **For Each Theme, Provide:**
129
+ 1. **Theme Name:** A short, memorable, and descriptive label
130
+ 2. **Description:** Clear explanation of what this theme represents and its significance
131
+ 3. **Prevalence:** Estimated percentage of responses that mention or relate to this theme
132
+ 4. **Example Quotes:** 2-3 actual, representative quotes from responses that illustrate this theme
133
+
134
+ **Output Format:** Respond ONLY with a valid JSON array:
135
  [
136
  {{
137
+ "theme_name": "Clear, concise theme label",
138
+ "description": "What this theme means and why it matters",
139
  "prevalence": "XX%",
140
+ "example_quotes": ["exact quote from responses", "another quote"]
141
  }}
142
+ ]
143
+
144
+ **Important:** Ensure all responses are valid JSON. Do not include text outside the array."""
145
 
146
  messages = [
147
  {"role": "system", "content": self._get_analyst_system_prompt()},
 
168
  sample_size = min(100, len(response_texts))
169
  sample_responses = response_texts[:sample_size]
170
 
171
+ prompt = f"""Task: Analyze sentiment across survey responses
172
+
173
+ **Data:** Analyzing sentiment in {len(sample_responses)} survey responses
174
 
175
  Responses:
176
  {self._format_responses_for_prompt(sample_responses)}
177
 
178
+ **Your Task:** Conduct a comprehensive sentiment analysis of these responses.
179
+
180
+ **Analysis Should Include:**
181
+ 1. **Overall Sentiment:** The dominant sentiment tone (positive, negative, neutral, or mixed)
182
+ 2. **Sentiment Distribution:** Estimated percentage breakdown across sentiment categories
183
+ 3. **Emotional Tone:** Key emotions or emotional themes detected in responses
184
+ 4. **Intensity:** The strength of the sentiments (low, moderate, or high)
185
 
186
+ **Output Format:** Respond ONLY with valid JSON:
187
  {{
188
+ "overall_sentiment": "positive|negative|neutral|mixed",
189
  "distribution": {{
190
  "positive": "XX%",
191
  "neutral": "XX%",
192
  "negative": "XX%"
193
  }},
194
+ "emotions": ["emotion1", "emotion2", "emotion3"],
195
  "intensity": "low|moderate|high"
196
+ }}
197
+
198
+ **Important:** Return only valid JSON. Do not include explanatory text."""
199
 
200
  messages = [
201
  {"role": "system", "content": self._get_analyst_system_prompt()},
 
230
  Sample responses:
231
  {self._format_responses_for_prompt(sample_responses)}
232
 
233
+ **Task:** Extract key insights from this survey data
234
+
235
+ **Generate 5-7 actionable insights** that address:
236
+ - Understanding the target audience and their needs
237
+ - Identifying opportunities for growth or improvement
238
+ - Recognizing challenges or pain points
239
+ - Understanding patterns, trends, and correlations
240
+ - Informing strategic or product decisions
241
+
242
+ **Insight Quality Criteria:**
243
+ - **Specific:** Clear, concrete statements based on actual data patterns
244
+ - **Actionable:** Can be used to inform decisions or actions
245
+ - **Evidence-based:** Grounded in what respondents actually said
246
+ - **Concise:** Clear and to the point (1-2 sentences each)
247
 
248
+ **Output Format:** Respond ONLY with a valid JSON array of insight strings:
249
+ ["Clear, actionable insight from the data", "Another specific insight", ...]
 
 
250
 
251
+ **Important:** Return only JSON array. Do not include explanatory text."""
 
252
 
253
  messages = [
254
  {"role": "system", "content": self._get_analyst_system_prompt()},
llm_backend.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- LLM Backend for ConversAI - Supports multiple providers
3
  """
4
  import os
5
  import requests
@@ -74,19 +74,28 @@ class LLMBackend:
74
  self.device = None
75
 
76
  def _get_default_model(self) -> str:
77
- """Get default model for each provider"""
78
  defaults = {
79
  LLMProvider.OPENAI: "gpt-4o-mini",
80
  LLMProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
81
- # Using Microsoft Phi-2 - causal LM, better at creative text generation
82
- # Phi-2 is 2.7GB and instruction-tuned for following prompts
83
- # Alternative: google/gemma-2b-it (2GB) or TinyLlama/TinyLlama-1.1B-Chat-v1.0 (1.1GB)
84
- # NOTE: Flan-T5 models don't work well - they copy examples instead of generating
85
- LLMProvider.HUGGINGFACE: "microsoft/phi-2",
86
  LLMProvider.LM_STUDIO: "google/gemma-3-27b"
87
  }
88
  return os.getenv("LLM_MODEL", defaults[self.provider])
89
 
 
 
 
 
 
 
 
 
 
 
 
90
  def _get_api_url(self) -> str:
91
  """Get API URL for each provider"""
92
  if self.provider == LLMProvider.OPENAI:
@@ -222,39 +231,63 @@ class LLMBackend:
222
  print(f"Model loaded successfully!")
223
 
224
  def _generate_huggingface(self, messages, max_tokens, temperature) -> str:
225
- """Generate using local transformers model"""
226
- # Load model if not already loaded
227
- self._load_local_model()
228
-
229
- # Convert messages to prompt
230
- prompt = self._messages_to_prompt(messages)
231
-
232
- # Tokenize input
233
- inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
234
- inputs = inputs.to(self.device)
235
-
236
- # Generate
237
- with torch.no_grad():
238
- outputs = self.local_model.generate(
239
- **inputs,
240
- max_new_tokens=max_tokens,
241
- temperature=temperature,
242
- do_sample=temperature > 0,
243
- top_p=0.9,
244
- pad_token_id=self.tokenizer.eos_token_id
245
- )
246
-
247
- # Decode output
248
- generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
249
-
250
- # For T5 models, the output is just the generated text
251
- # For causal models, we need to remove the input prompt
252
- if "t5" not in self.model.lower() and "flan" not in self.model.lower():
253
- # Remove the input prompt from output
254
- if generated_text.startswith(prompt):
255
- generated_text = generated_text[len(prompt):].strip()
256
-
257
- return generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  def _generate_lm_studio(self, messages, max_tokens, temperature) -> str:
260
  """Generate using LM Studio local API"""
 
1
  """
2
+ LLM Backend for Project Echo - Supports multiple providers
3
  """
4
  import os
5
  import requests
 
74
  self.device = None
75
 
76
  def _get_default_model(self) -> str:
77
+ """Get default model for each provider with fallback chain"""
78
  defaults = {
79
  LLMProvider.OPENAI: "gpt-4o-mini",
80
  LLMProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
81
+ # Preferred: Mistral-7B (better instruction following, higher quality)
82
+ # Fallback chain for HF Inference API if primary is gated/unavailable
83
+ LLMProvider.HUGGINGFACE: "mistralai/Mistral-7B-Instruct-v0.1",
 
 
84
  LLMProvider.LM_STUDIO: "google/gemma-3-27b"
85
  }
86
  return os.getenv("LLM_MODEL", defaults[self.provider])
87
 
88
+ def get_fallback_models(self) -> List[str]:
89
+ """Get fallback model chain for HF Inference API"""
90
+ if self.provider == LLMProvider.HUGGINGFACE:
91
+ return [
92
+ "mistralai/Mistral-7B-Instruct-v0.1", # Primary
93
+ "mistralai/Mixtral-8x7B-Instruct-v0.1", # Fallback 1: Better quality
94
+ "google/gemma-7b-it", # Fallback 2: Smaller, faster
95
+ "microsoft/phi-2", # Fallback 3: Original
96
+ ]
97
+ return [self.model]
98
+
99
  def _get_api_url(self) -> str:
100
  """Get API URL for each provider"""
101
  if self.provider == LLMProvider.OPENAI:
 
231
  print(f"Model loaded successfully!")
232
 
233
  def _generate_huggingface(self, messages, max_tokens, temperature) -> str:
234
+ """Generate using local transformers model with fallback chain"""
235
+ # Try to load and generate with fallback chain
236
+ fallback_models = self.get_fallback_models()
237
+ last_error = None
238
+
239
+ for model_to_try in fallback_models:
240
+ try:
241
+ # Temporarily set model for this attempt
242
+ original_model = self.model
243
+ self.model = model_to_try
244
+ self.tokenizer = None # Reset tokenizer cache
245
+ self.local_model = None # Reset model cache
246
+
247
+ # Load model if not already loaded
248
+ self._load_local_model()
249
+
250
+ # Convert messages to prompt
251
+ prompt = self._messages_to_prompt(messages)
252
+
253
+ # Tokenize input
254
+ inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
255
+ inputs = inputs.to(self.device)
256
+
257
+ # Generate
258
+ with torch.no_grad():
259
+ outputs = self.local_model.generate(
260
+ **inputs,
261
+ max_new_tokens=max_tokens,
262
+ temperature=temperature,
263
+ do_sample=temperature > 0,
264
+ top_p=0.9,
265
+ pad_token_id=self.tokenizer.eos_token_id
266
+ )
267
+
268
+ # Decode output
269
+ generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
270
+
271
+ # For T5 models, the output is just the generated text
272
+ # For causal models, we need to remove the input prompt
273
+ if "t5" not in self.model.lower() and "flan" not in self.model.lower():
274
+ # Remove the input prompt from output
275
+ if generated_text.startswith(prompt):
276
+ generated_text = generated_text[len(prompt):].strip()
277
+
278
+ # Success! Update the default model for future use
279
+ self.model = model_to_try
280
+ print(f"✓ Successfully using model: {model_to_try}")
281
+ return generated_text
282
+
283
+ except Exception as e:
284
+ last_error = e
285
+ print(f"⚠ Model {model_to_try} failed: {str(e)[:100]}")
286
+ self.model = original_model # Restore original
287
+ continue
288
+
289
+ # All fallbacks failed
290
+ raise Exception(f"All HuggingFace models failed. Last error: {str(last_error)}")
291
 
292
  def _generate_lm_studio(self, messages, max_tokens, temperature) -> str:
293
  """Generate using LM Studio local API"""
survey_generator.py CHANGED
@@ -78,29 +78,30 @@ class SurveyGenerator:
78
  return f"{topic} Survey"
79
 
80
  def _get_system_prompt(self) -> str:
81
- """System prompt for survey generation"""
82
- return """You are a professional survey designer. Generate clear, professional survey questions."""
83
 
84
  def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
85
- """Build the user prompt for survey generation"""
86
- # For causal LMs (Phi, Gemma, etc.) - more direct and explicit
87
- return f"""Task: Create a {survey_type} research survey
88
 
89
- Research Topic: {outline}
90
 
91
- Target Audience: {target_audience}
92
 
93
- Create exactly {num_questions} survey questions.
94
 
95
- Requirements:
96
- - Each question must be clear, specific, and relevant to the topic
97
- - Questions should be appropriate for the target audience
98
- - Avoid yes/no questions in qualitative surveys
99
- - Make questions open-ended to encourage detailed responses
 
 
100
 
101
- Format: Use numbered list (1., 2., 3., etc.)
102
 
103
- Here are the {num_questions} survey questions:
104
 
105
  1."""
106
 
@@ -277,7 +278,7 @@ Here are the {num_questions} survey questions:
277
 
278
  def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
279
  """
280
- Refine a single survey question.
281
 
282
  Args:
283
  question: The question to improve
@@ -286,19 +287,32 @@ Here are the {num_questions} survey questions:
286
  Returns:
287
  Improved question text
288
  """
289
- prompt = f"""Improve the following survey question for better {improvement_type}:
 
 
 
 
 
 
 
 
 
 
290
 
291
- Original Question: {question}
292
 
293
- Provide an improved version that:
294
- - {"Is clearer and easier to understand" if improvement_type == "clarity" else ""}
295
- - {"Removes bias and leading language" if improvement_type == "neutrality" else ""}
296
- - {"Is more specific and actionable" if improvement_type == "specificity" else ""}
297
 
298
- Respond with only the improved question text, no explanation."""
 
 
 
 
 
 
299
 
300
  messages = [
301
- {"role": "system", "content": "You are an expert survey question designer."},
302
  {"role": "user", "content": prompt}
303
  ]
304
 
@@ -306,7 +320,7 @@ Respond with only the improved question text, no explanation."""
306
 
307
  def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
308
  """
309
- Generate follow-up questions for deeper exploration.
310
 
311
  Args:
312
  base_question: The main question
@@ -315,35 +329,48 @@ Respond with only the improved question text, no explanation."""
315
  Returns:
316
  List of follow-up question texts
317
  """
318
- prompt = f"""Generate {num_follow_ups} follow-up questions for this main question:
 
 
319
 
320
- Main Question: {base_question}
321
 
322
- The follow-up questions should:
323
- 1. Probe deeper into the topic
324
- 2. Explore different aspects or dimensions
325
- 3. Encourage detailed responses
326
- 4. Follow a logical progression
 
327
 
328
- Respond with a JSON array of question strings."""
 
 
 
 
329
 
330
  messages = [
331
- {"role": "system", "content": "You are an expert in qualitative research interviews."},
332
  {"role": "user", "content": prompt}
333
  ]
334
 
335
  response = self.llm.generate(messages, max_tokens=500, temperature=0.7)
336
 
337
- try:
338
- # Extract JSON array
339
- if "[" in response:
340
- start = response.find("[")
341
- end = response.rfind("]") + 1
342
- follow_ups = json.loads(response[start:end])
343
- return follow_ups[:num_follow_ups]
344
- except:
345
- pass
346
-
347
- # Fallback: split by newlines
 
 
 
 
348
  lines = [line.strip() for line in response.split("\n") if line.strip()]
349
- return [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]
 
 
 
78
  return f"{topic} Survey"
79
 
80
  def _get_system_prompt(self) -> str:
81
+ """System prompt for survey generation - optimized for Mistral/Mixtral"""
82
+ return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents."""
83
 
84
  def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
85
+ """Build the user prompt for survey generation - optimized for Mistral/Mixtral"""
86
+ return f"""You are creating a {survey_type.lower()} research survey.
 
87
 
88
+ **Research Focus:** {outline}
89
 
90
+ **Target Participants:** {target_audience}
91
 
92
+ **Your Task:** Generate exactly {num_questions} high-quality survey questions.
93
 
94
+ **Quality Requirements:**
95
+ - Each question must be directly relevant to the research focus
96
+ - Questions should be specific enough to guide responses but open enough to capture diverse perspectives
97
+ - For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses
98
+ - Avoid leading questions, double questions, or jargon that may confuse respondents
99
+ - Ensure questions are appropriate for the target audience's knowledge and context
100
+ - Progress from general to specific topics when possible
101
 
102
+ **Format:** Output as a numbered list (1. Question text 2. Question text, etc.)
103
 
104
+ **Output {num_questions} Survey Questions:**
105
 
106
  1."""
107
 
 
278
 
279
  def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
280
  """
281
+ Refine a single survey question - optimized for Mistral/Mixtral
282
 
283
  Args:
284
  question: The question to improve
 
287
  Returns:
288
  Improved question text
289
  """
290
+ improvement_guidance = {
291
+ "clarity": "Makes the question clearer and easier for respondents to understand without ambiguity",
292
+ "neutrality": "Removes any bias, leading language, or assumptions that could influence responses",
293
+ "specificity": "Makes the question more specific and actionable while remaining open-ended"
294
+ }
295
+
296
+ guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"])
297
+
298
+ prompt = f"""Task: Improve a survey question
299
+
300
+ **Original Question:** "{question}"
301
 
302
+ **Improvement Type:** {improvement_type.title()}
303
 
304
+ **Your Goal:** Rewrite this question so that it {guidance}.
 
 
 
305
 
306
+ **Guidelines:**
307
+ - Keep the question focused on a single topic
308
+ - Use simple, clear language appropriate for the target audience
309
+ - Avoid assumptions or leading language
310
+ - Ensure the question can elicit meaningful responses
311
+
312
+ Provide ONLY the improved question text. Do not include explanations or alternative versions."""
313
 
314
  messages = [
315
+ {"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."},
316
  {"role": "user", "content": prompt}
317
  ]
318
 
 
320
 
321
  def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
322
  """
323
+ Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral
324
 
325
  Args:
326
  base_question: The main question
 
329
  Returns:
330
  List of follow-up question texts
331
  """
332
+ prompt = f"""Task: Generate probing follow-up questions
333
+
334
+ **Main Question:** {base_question}
335
 
336
+ **Your Task:** Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer.
337
 
338
+ **Quality Criteria for Follow-ups:**
339
+ 1. Each question should explore a different aspect, dimension, or implication of the main topic
340
+ 2. Questions should encourage more detailed, nuanced responses
341
+ 3. Follow a logical progression from the main question
342
+ 4. Build on what a respondent might answer to the main question
343
+ 5. Each should be specific but open-ended
344
 
345
+ **Format:** Number each question (1., 2., 3., etc.)
346
+
347
+ **Output {num_follow_ups} Follow-up Questions:**
348
+
349
+ 1."""
350
 
351
  messages = [
352
+ {"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."},
353
  {"role": "user", "content": prompt}
354
  ]
355
 
356
  response = self.llm.generate(messages, max_tokens=500, temperature=0.7)
357
 
358
+ # Parse the response for follow-up questions
359
+ import re
360
+
361
+ # Try numbered list format first
362
+ pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)'
363
+ matches = re.findall(pattern, response, re.DOTALL)
364
+
365
+ if matches:
366
+ follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups]
367
+ # Ensure all end with question mark
368
+ follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups]
369
+ if follow_ups:
370
+ return follow_ups
371
+
372
+ # Fallback: split by newlines and look for questions
373
  lines = [line.strip() for line in response.split("\n") if line.strip()]
374
+ follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]
375
+
376
+ return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)]