""" Survey Generation Module - Generate AI-powered surveys from outlines """ import json import sys import os from typing import List, Dict, Optional # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(__file__)) from llm_backend import LLMBackend class SurveyGenerator: """ Generates professional surveys from user outlines using AI. Follows industry best practices for qualitative research. """ def __init__(self, llm_backend: LLMBackend): self.llm = llm_backend def generate_survey(self, outline: str, survey_type: str = "qualitative", num_questions: int = 10, target_audience: str = "general") -> Dict: """ Generate a complete survey from an outline. Args: outline: User's outline or topic description survey_type: Type of survey (qualitative, quantitative, mixed) num_questions: Target number of questions target_audience: Description of target respondents Returns: Dict containing survey metadata and questions """ prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience) messages = [ {"role": "system", "content": self._get_system_prompt()}, {"role": "user", "content": prompt} ] try: response = self.llm.generate(messages, max_tokens=2000, temperature=0.7) survey_data = self._parse_survey_response(response) # Generate better title based on outline survey_data["title"] = self._generate_title(outline, survey_type) # Add metadata survey_data["metadata"] = { "outline": outline, "survey_type": survey_type, "target_audience": target_audience, "generated_question_count": len(survey_data.get("questions", [])) } return survey_data except Exception as e: raise Exception(f"Survey generation failed: {str(e)}") def _generate_title(self, outline: str, survey_type: str) -> str: """Generate a survey title from the outline""" # Extract key topic from outline (first sentence or first 50 chars) first_sentence = outline.split('.')[0].strip() if len(first_sentence) > 60: first_sentence = first_sentence[:60] + "..." # Capitalize first letter topic = first_sentence[0].upper() + first_sentence[1:] if first_sentence else "Research" # Create title based on survey type if survey_type.lower() == "qualitative": return f"{topic} - Qualitative Survey" elif survey_type.lower() == "quantitative": return f"{topic} - Quantitative Survey" else: return f"{topic} Survey" def _get_system_prompt(self) -> str: """System prompt for survey generation - optimized for Mistral/Mixtral""" return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents.""" def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str: """Build the user prompt for survey generation - optimized for Mistral/Mixtral""" return f"""You are creating a {survey_type.lower()} research survey. **Research Focus:** {outline} **Target Participants:** {target_audience} **Your Task:** Generate exactly {num_questions} high-quality survey questions. **Quality Requirements:** - Each question must be directly relevant to the research focus - Questions should be specific enough to guide responses but open enough to capture diverse perspectives - For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses - Avoid leading questions, double questions, or jargon that may confuse respondents - Ensure questions are appropriate for the target audience's knowledge and context - Progress from general to specific topics when possible **Format:** Output as a numbered list (1. Question text 2. Question text, etc.) **Output {num_questions} Survey Questions:** 1.""" def _parse_survey_response(self, response: str) -> Dict: """Parse LLM response into survey structure""" # Parse numbered list format (not JSON) return self._parse_numbered_list(response) def _parse_numbered_list(self, response: str) -> Dict: """Parse numbered list of questions into survey structure""" import re # First, try numbered list approach # Pattern to match numbered questions: "1. Question" or "1) Question" pattern = r'\d+[\.\)]\s+' parts = re.split(pattern, response) parts = [p.strip() for p in parts if p.strip()] questions = [] question_id = 1 for part in parts: # Skip if too short if len(part) < 10: continue # Take only the first sentence/question if there are multiple # Split by question mark, period, or newline sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part) clean_line = sentences[0].strip() # Remove any leading hyphens or bullets that might appear clean_line = re.sub(r'^[-•*]\s*', '', clean_line) # Add question mark if missing if clean_line and not clean_line.endswith('?'): clean_line += '?' # Skip if still too short if len(clean_line) < 10: continue # Determine question type based on content question_type = "open_ended" options = None lower_line = clean_line.lower() # Check for rating/scale questions if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']): question_type = "rating" options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"] # Check for yes/no questions elif clean_line.endswith('?') and any(word in lower_line for word in ['do you', 'have you', 'would you', 'can you', 'should', 'is it', 'are you']): if 'how much' not in lower_line and 'how many' not in lower_line: question_type = "yes_no" options = ["Yes", "No"] # Check for satisfaction questions elif any(word in lower_line for word in ['satisfy', 'satisfaction', 'satisfied']): question_type = "likert_scale" options = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"] question = { "id": question_id, "question_text": clean_line, "question_type": question_type, "required": True } if options: question["options"] = options questions.append(question) question_id += 1 # If we found few or no questions from numbered list, try alternative parsing # This helps catch responses that don't use numbered format if len(questions) < 3: alt_questions = self._parse_alternative_format(response) # Use alternative if it found more questions if len(alt_questions) > len(questions): questions = alt_questions # Final fallback if still no questions if len(questions) == 0: questions = [ {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True}, {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True}, {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True} ] return { "title": "Research Survey", "introduction": "Thank you for taking the time to participate in this survey. Your responses will help us better understand your experiences and perspectives. Please answer all questions honestly and thoroughly.", "questions": questions[:20], # Limit to 20 questions "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic." } def _parse_alternative_format(self, response: str) -> List[Dict]: """Try alternative parsing approaches if numbered list fails""" import re questions = [] question_id = 1 # Try splitting by lines and looking for question patterns lines = response.split('\n') for line in lines: line = line.strip() # Skip empty lines if not line or len(line) < 10: continue # Skip lines that are just labels or instructions skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:'] if any(keyword in line.lower() for keyword in skip_keywords): continue # Check if this looks like a question (has ?, or starts with question words) has_question_mark = '?' in line starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you']) if has_question_mark or starts_with_question_word: # Clean up the line (remove bullets, numbers, etc) clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip() # Ensure it ends with question mark if clean_line and not clean_line.endswith('?'): # Only add if it doesn't already end with punctuation if not any(c in clean_line for c in [':', '!', '.']): clean_line += '?' # Skip if too short after cleaning if len(clean_line) < 10: continue # Determine question type based on content question_type = "open_ended" options = None lower_line = clean_line.lower() # Check for rating/scale questions if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']): question_type = "rating" options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"] question = { "id": question_id, "question_text": clean_line, "question_type": question_type, "required": True } if options: question["options"] = options questions.append(question) question_id += 1 # If still no questions found, create fallback questions based on topic hints if len(questions) == 0: questions = [ {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True}, {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True}, {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True} ] return questions def refine_question(self, question: str, improvement_type: str = "clarity") -> str: """ Refine a single survey question - optimized for Mistral/Mixtral Args: question: The question to improve improvement_type: Type of improvement (clarity, neutrality, specificity) Returns: Improved question text """ improvement_guidance = { "clarity": "Makes the question clearer and easier for respondents to understand without ambiguity", "neutrality": "Removes any bias, leading language, or assumptions that could influence responses", "specificity": "Makes the question more specific and actionable while remaining open-ended" } guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"]) prompt = f"""Task: Improve a survey question **Original Question:** "{question}" **Improvement Type:** {improvement_type.title()} **Your Goal:** Rewrite this question so that it {guidance}. **Guidelines:** - Keep the question focused on a single topic - Use simple, clear language appropriate for the target audience - Avoid assumptions or leading language - Ensure the question can elicit meaningful responses Provide ONLY the improved question text. Do not include explanations or alternative versions.""" messages = [ {"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."}, {"role": "user", "content": prompt} ] return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip() def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]: """ Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral Args: base_question: The main question num_follow_ups: Number of follow-up questions to generate Returns: List of follow-up question texts """ prompt = f"""Task: Generate probing follow-up questions **Main Question:** {base_question} **Your Task:** Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer. **Quality Criteria for Follow-ups:** 1. Each question should explore a different aspect, dimension, or implication of the main topic 2. Questions should encourage more detailed, nuanced responses 3. Follow a logical progression from the main question 4. Build on what a respondent might answer to the main question 5. Each should be specific but open-ended **Format:** Number each question (1., 2., 3., etc.) **Output {num_follow_ups} Follow-up Questions:** 1.""" messages = [ {"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."}, {"role": "user", "content": prompt} ] response = self.llm.generate(messages, max_tokens=500, temperature=0.7) # Parse the response for follow-up questions import re # Try numbered list format first pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)' matches = re.findall(pattern, response, re.DOTALL) if matches: follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups] # Ensure all end with question mark follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups] if follow_ups: return follow_ups # Fallback: split by newlines and look for questions lines = [line.strip() for line in response.split("\n") if line.strip()] follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups] return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)]