Spaces:
Sleeping
Sleeping
File size: 16,359 Bytes
196c707 8056e83 196c707 8056e83 196c707 4858e1f 196c707 4858e1f 196c707 db39ccf 196c707 db39ccf fbc9719 db39ccf fbc9719 db39ccf fbc9719 db39ccf 28613b6 db39ccf 28613b6 db39ccf 28613b6 db39ccf 4858e1f 56fed0f 196c707 4858e1f 196c707 4858e1f 56fed0f 28613b6 56fed0f fbc9719 56fed0f 4858e1f 56fed0f 28613b6 56fed0f 28613b6 56fed0f 28613b6 56fed0f 4858e1f 56fed0f 4858e1f 28613b6 4858e1f fbc9719 4858e1f fbc9719 4858e1f fbc9719 196c707 28613b6 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf 196c707 db39ccf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
"""
Survey Generation Module - Generate AI-powered surveys from outlines
"""
import json
import sys
import os
from typing import List, Dict, Optional
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(__file__))
from llm_backend import LLMBackend
class SurveyGenerator:
"""
Generates professional surveys from user outlines using AI.
Follows industry best practices for qualitative research.
"""
def __init__(self, llm_backend: LLMBackend):
self.llm = llm_backend
def generate_survey(self,
outline: str,
survey_type: str = "qualitative",
num_questions: int = 10,
target_audience: str = "general") -> Dict:
"""
Generate a complete survey from an outline.
Args:
outline: User's outline or topic description
survey_type: Type of survey (qualitative, quantitative, mixed)
num_questions: Target number of questions
target_audience: Description of target respondents
Returns:
Dict containing survey metadata and questions
"""
prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience)
messages = [
{"role": "system", "content": self._get_system_prompt()},
{"role": "user", "content": prompt}
]
try:
response = self.llm.generate(messages, max_tokens=2000, temperature=0.7)
survey_data = self._parse_survey_response(response)
# Generate better title based on outline
survey_data["title"] = self._generate_title(outline, survey_type)
# Add metadata
survey_data["metadata"] = {
"outline": outline,
"survey_type": survey_type,
"target_audience": target_audience,
"generated_question_count": len(survey_data.get("questions", []))
}
return survey_data
except Exception as e:
raise Exception(f"Survey generation failed: {str(e)}")
def _generate_title(self, outline: str, survey_type: str) -> str:
"""Generate a survey title from the outline"""
# Extract key topic from outline (first sentence or first 50 chars)
first_sentence = outline.split('.')[0].strip()
if len(first_sentence) > 60:
first_sentence = first_sentence[:60] + "..."
# Capitalize first letter
topic = first_sentence[0].upper() + first_sentence[1:] if first_sentence else "Research"
# Create title based on survey type
if survey_type.lower() == "qualitative":
return f"{topic} - Qualitative Survey"
elif survey_type.lower() == "quantitative":
return f"{topic} - Quantitative Survey"
else:
return f"{topic} Survey"
def _get_system_prompt(self) -> str:
"""System prompt for survey generation - optimized for Mistral/Mixtral"""
return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents."""
def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
"""Build the user prompt for survey generation - optimized for Mistral/Mixtral"""
return f"""You are creating a {survey_type.lower()} research survey.
**Research Focus:** {outline}
**Target Participants:** {target_audience}
**Your Task:** Generate exactly {num_questions} high-quality survey questions.
**Quality Requirements:**
- Each question must be directly relevant to the research focus
- Questions should be specific enough to guide responses but open enough to capture diverse perspectives
- For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses
- Avoid leading questions, double questions, or jargon that may confuse respondents
- Ensure questions are appropriate for the target audience's knowledge and context
- Progress from general to specific topics when possible
**Format:** Output as a numbered list (1. Question text 2. Question text, etc.)
**Output {num_questions} Survey Questions:**
1."""
def _parse_survey_response(self, response: str) -> Dict:
"""Parse LLM response into survey structure"""
# Parse numbered list format (not JSON)
return self._parse_numbered_list(response)
def _parse_numbered_list(self, response: str) -> Dict:
"""Parse numbered list of questions into survey structure"""
import re
# First, try numbered list approach
# Pattern to match numbered questions: "1. Question" or "1) Question"
pattern = r'\d+[\.\)]\s+'
parts = re.split(pattern, response)
parts = [p.strip() for p in parts if p.strip()]
questions = []
question_id = 1
for part in parts:
# Skip if too short
if len(part) < 10:
continue
# Take only the first sentence/question if there are multiple
# Split by question mark, period, or newline
sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part)
clean_line = sentences[0].strip()
# Remove any leading hyphens or bullets that might appear
clean_line = re.sub(r'^[-•*]\s*', '', clean_line)
# Add question mark if missing
if clean_line and not clean_line.endswith('?'):
clean_line += '?'
# Skip if still too short
if len(clean_line) < 10:
continue
# Determine question type based on content
question_type = "open_ended"
options = None
lower_line = clean_line.lower()
# Check for rating/scale questions
if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
question_type = "rating"
options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
# Check for yes/no questions
elif clean_line.endswith('?') and any(word in lower_line for word in ['do you', 'have you', 'would you', 'can you', 'should', 'is it', 'are you']):
if 'how much' not in lower_line and 'how many' not in lower_line:
question_type = "yes_no"
options = ["Yes", "No"]
# Check for satisfaction questions
elif any(word in lower_line for word in ['satisfy', 'satisfaction', 'satisfied']):
question_type = "likert_scale"
options = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"]
question = {
"id": question_id,
"question_text": clean_line,
"question_type": question_type,
"required": True
}
if options:
question["options"] = options
questions.append(question)
question_id += 1
# If we found few or no questions from numbered list, try alternative parsing
# This helps catch responses that don't use numbered format
if len(questions) < 3:
alt_questions = self._parse_alternative_format(response)
# Use alternative if it found more questions
if len(alt_questions) > len(questions):
questions = alt_questions
# Final fallback if still no questions
if len(questions) == 0:
questions = [
{"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
{"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
{"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
]
return {
"title": "Research Survey",
"introduction": "Thank you for taking the time to participate in this survey. Your responses will help us better understand your experiences and perspectives. Please answer all questions honestly and thoroughly.",
"questions": questions[:20], # Limit to 20 questions
"closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
}
def _parse_alternative_format(self, response: str) -> List[Dict]:
"""Try alternative parsing approaches if numbered list fails"""
import re
questions = []
question_id = 1
# Try splitting by lines and looking for question patterns
lines = response.split('\n')
for line in lines:
line = line.strip()
# Skip empty lines
if not line or len(line) < 10:
continue
# Skip lines that are just labels or instructions
skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:']
if any(keyword in line.lower() for keyword in skip_keywords):
continue
# Check if this looks like a question (has ?, or starts with question words)
has_question_mark = '?' in line
starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you'])
if has_question_mark or starts_with_question_word:
# Clean up the line (remove bullets, numbers, etc)
clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip()
# Ensure it ends with question mark
if clean_line and not clean_line.endswith('?'):
# Only add if it doesn't already end with punctuation
if not any(c in clean_line for c in [':', '!', '.']):
clean_line += '?'
# Skip if too short after cleaning
if len(clean_line) < 10:
continue
# Determine question type based on content
question_type = "open_ended"
options = None
lower_line = clean_line.lower()
# Check for rating/scale questions
if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
question_type = "rating"
options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
question = {
"id": question_id,
"question_text": clean_line,
"question_type": question_type,
"required": True
}
if options:
question["options"] = options
questions.append(question)
question_id += 1
# If still no questions found, create fallback questions based on topic hints
if len(questions) == 0:
questions = [
{"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
{"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
{"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
]
return questions
def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
"""
Refine a single survey question - optimized for Mistral/Mixtral
Args:
question: The question to improve
improvement_type: Type of improvement (clarity, neutrality, specificity)
Returns:
Improved question text
"""
improvement_guidance = {
"clarity": "Makes the question clearer and easier for respondents to understand without ambiguity",
"neutrality": "Removes any bias, leading language, or assumptions that could influence responses",
"specificity": "Makes the question more specific and actionable while remaining open-ended"
}
guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"])
prompt = f"""Task: Improve a survey question
**Original Question:** "{question}"
**Improvement Type:** {improvement_type.title()}
**Your Goal:** Rewrite this question so that it {guidance}.
**Guidelines:**
- Keep the question focused on a single topic
- Use simple, clear language appropriate for the target audience
- Avoid assumptions or leading language
- Ensure the question can elicit meaningful responses
Provide ONLY the improved question text. Do not include explanations or alternative versions."""
messages = [
{"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."},
{"role": "user", "content": prompt}
]
return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip()
def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
"""
Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral
Args:
base_question: The main question
num_follow_ups: Number of follow-up questions to generate
Returns:
List of follow-up question texts
"""
prompt = f"""Task: Generate probing follow-up questions
**Main Question:** {base_question}
**Your Task:** Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer.
**Quality Criteria for Follow-ups:**
1. Each question should explore a different aspect, dimension, or implication of the main topic
2. Questions should encourage more detailed, nuanced responses
3. Follow a logical progression from the main question
4. Build on what a respondent might answer to the main question
5. Each should be specific but open-ended
**Format:** Number each question (1., 2., 3., etc.)
**Output {num_follow_ups} Follow-up Questions:**
1."""
messages = [
{"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."},
{"role": "user", "content": prompt}
]
response = self.llm.generate(messages, max_tokens=500, temperature=0.7)
# Parse the response for follow-up questions
import re
# Try numbered list format first
pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)'
matches = re.findall(pattern, response, re.DOTALL)
if matches:
follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups]
# Ensure all end with question mark
follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups]
if follow_ups:
return follow_ups
# Fallback: split by newlines and look for questions
lines = [line.strip() for line in response.split("\n") if line.strip()]
follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]
return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)]
|