Spaces:
Sleeping
Sleeping
| """ | |
| Survey Generation Module - Generate AI-powered surveys from outlines | |
| """ | |
| import json | |
| import sys | |
| import os | |
| from typing import List, Dict, Optional | |
| # Add parent directory to path for imports | |
| sys.path.insert(0, os.path.dirname(__file__)) | |
| from llm_backend import LLMBackend | |
| class SurveyGenerator: | |
| """ | |
| Generates professional surveys from user outlines using AI. | |
| Follows industry best practices for qualitative research. | |
| """ | |
| def __init__(self, llm_backend: LLMBackend): | |
| self.llm = llm_backend | |
| def generate_survey(self, | |
| outline: str, | |
| survey_type: str = "qualitative", | |
| num_questions: int = 10, | |
| target_audience: str = "general") -> Dict: | |
| """ | |
| Generate a complete survey from an outline. | |
| Args: | |
| outline: User's outline or topic description | |
| survey_type: Type of survey (qualitative, quantitative, mixed) | |
| num_questions: Target number of questions | |
| target_audience: Description of target respondents | |
| Returns: | |
| Dict containing survey metadata and questions | |
| """ | |
| prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience) | |
| messages = [ | |
| {"role": "system", "content": self._get_system_prompt()}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| try: | |
| response = self.llm.generate(messages, max_tokens=2000, temperature=0.7) | |
| survey_data = self._parse_survey_response(response) | |
| # Generate better title based on outline | |
| survey_data["title"] = self._generate_title(outline, survey_type) | |
| # Add metadata | |
| survey_data["metadata"] = { | |
| "outline": outline, | |
| "survey_type": survey_type, | |
| "target_audience": target_audience, | |
| "generated_question_count": len(survey_data.get("questions", [])) | |
| } | |
| return survey_data | |
| except Exception as e: | |
| raise Exception(f"Survey generation failed: {str(e)}") | |
| def _generate_title(self, outline: str, survey_type: str) -> str: | |
| """Generate a survey title from the outline""" | |
| # Extract key topic from outline (first sentence or first 50 chars) | |
| first_sentence = outline.split('.')[0].strip() | |
| if len(first_sentence) > 60: | |
| first_sentence = first_sentence[:60] + "..." | |
| # Capitalize first letter | |
| topic = first_sentence[0].upper() + first_sentence[1:] if first_sentence else "Research" | |
| # Create title based on survey type | |
| if survey_type.lower() == "qualitative": | |
| return f"{topic} - Qualitative Survey" | |
| elif survey_type.lower() == "quantitative": | |
| return f"{topic} - Quantitative Survey" | |
| else: | |
| return f"{topic} Survey" | |
| def _get_system_prompt(self) -> str: | |
| """System prompt for survey generation - optimized for Mistral/Mixtral""" | |
| return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents.""" | |
| def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str: | |
| """Build the user prompt for survey generation - optimized for Mistral/Mixtral""" | |
| return f"""You are creating a {survey_type.lower()} research survey. | |
| **Research Focus:** {outline} | |
| **Target Participants:** {target_audience} | |
| **Your Task:** Generate exactly {num_questions} high-quality survey questions. | |
| **Quality Requirements:** | |
| - Each question must be directly relevant to the research focus | |
| - Questions should be specific enough to guide responses but open enough to capture diverse perspectives | |
| - For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses | |
| - Avoid leading questions, double questions, or jargon that may confuse respondents | |
| - Ensure questions are appropriate for the target audience's knowledge and context | |
| - Progress from general to specific topics when possible | |
| **Format:** Output as a numbered list (1. Question text 2. Question text, etc.) | |
| **Output {num_questions} Survey Questions:** | |
| 1.""" | |
| def _parse_survey_response(self, response: str) -> Dict: | |
| """Parse LLM response into survey structure""" | |
| # Parse numbered list format (not JSON) | |
| return self._parse_numbered_list(response) | |
| def _parse_numbered_list(self, response: str) -> Dict: | |
| """Parse numbered list of questions into survey structure""" | |
| import re | |
| # First, try numbered list approach | |
| # Pattern to match numbered questions: "1. Question" or "1) Question" | |
| pattern = r'\d+[\.\)]\s+' | |
| parts = re.split(pattern, response) | |
| parts = [p.strip() for p in parts if p.strip()] | |
| questions = [] | |
| question_id = 1 | |
| for part in parts: | |
| # Skip if too short | |
| if len(part) < 10: | |
| continue | |
| # Take only the first sentence/question if there are multiple | |
| # Split by question mark, period, or newline | |
| sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part) | |
| clean_line = sentences[0].strip() | |
| # Remove any leading hyphens or bullets that might appear | |
| clean_line = re.sub(r'^[-•*]\s*', '', clean_line) | |
| # Add question mark if missing | |
| if clean_line and not clean_line.endswith('?'): | |
| clean_line += '?' | |
| # Skip if still too short | |
| if len(clean_line) < 10: | |
| continue | |
| # Determine question type based on content | |
| question_type = "open_ended" | |
| options = None | |
| lower_line = clean_line.lower() | |
| # Check for rating/scale questions | |
| if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']): | |
| question_type = "rating" | |
| options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"] | |
| # Check for yes/no questions | |
| elif clean_line.endswith('?') and any(word in lower_line for word in ['do you', 'have you', 'would you', 'can you', 'should', 'is it', 'are you']): | |
| if 'how much' not in lower_line and 'how many' not in lower_line: | |
| question_type = "yes_no" | |
| options = ["Yes", "No"] | |
| # Check for satisfaction questions | |
| elif any(word in lower_line for word in ['satisfy', 'satisfaction', 'satisfied']): | |
| question_type = "likert_scale" | |
| options = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"] | |
| question = { | |
| "id": question_id, | |
| "question_text": clean_line, | |
| "question_type": question_type, | |
| "required": True | |
| } | |
| if options: | |
| question["options"] = options | |
| questions.append(question) | |
| question_id += 1 | |
| # If we found few or no questions from numbered list, try alternative parsing | |
| # This helps catch responses that don't use numbered format | |
| if len(questions) < 3: | |
| alt_questions = self._parse_alternative_format(response) | |
| # Use alternative if it found more questions | |
| if len(alt_questions) > len(questions): | |
| questions = alt_questions | |
| # Final fallback if still no questions | |
| if len(questions) == 0: | |
| questions = [ | |
| {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True}, | |
| {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True}, | |
| {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True} | |
| ] | |
| return { | |
| "title": "Research Survey", | |
| "introduction": "Thank you for taking the time to participate in this survey. Your responses will help us better understand your experiences and perspectives. Please answer all questions honestly and thoroughly.", | |
| "questions": questions[:20], # Limit to 20 questions | |
| "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic." | |
| } | |
| def _parse_alternative_format(self, response: str) -> List[Dict]: | |
| """Try alternative parsing approaches if numbered list fails""" | |
| import re | |
| questions = [] | |
| question_id = 1 | |
| # Try splitting by lines and looking for question patterns | |
| lines = response.split('\n') | |
| for line in lines: | |
| line = line.strip() | |
| # Skip empty lines | |
| if not line or len(line) < 10: | |
| continue | |
| # Skip lines that are just labels or instructions | |
| skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:'] | |
| if any(keyword in line.lower() for keyword in skip_keywords): | |
| continue | |
| # Check if this looks like a question (has ?, or starts with question words) | |
| has_question_mark = '?' in line | |
| starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you']) | |
| if has_question_mark or starts_with_question_word: | |
| # Clean up the line (remove bullets, numbers, etc) | |
| clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip() | |
| # Ensure it ends with question mark | |
| if clean_line and not clean_line.endswith('?'): | |
| # Only add if it doesn't already end with punctuation | |
| if not any(c in clean_line for c in [':', '!', '.']): | |
| clean_line += '?' | |
| # Skip if too short after cleaning | |
| if len(clean_line) < 10: | |
| continue | |
| # Determine question type based on content | |
| question_type = "open_ended" | |
| options = None | |
| lower_line = clean_line.lower() | |
| # Check for rating/scale questions | |
| if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']): | |
| question_type = "rating" | |
| options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"] | |
| question = { | |
| "id": question_id, | |
| "question_text": clean_line, | |
| "question_type": question_type, | |
| "required": True | |
| } | |
| if options: | |
| question["options"] = options | |
| questions.append(question) | |
| question_id += 1 | |
| # If still no questions found, create fallback questions based on topic hints | |
| if len(questions) == 0: | |
| questions = [ | |
| {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True}, | |
| {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True}, | |
| {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True} | |
| ] | |
| return questions | |
| def refine_question(self, question: str, improvement_type: str = "clarity") -> str: | |
| """ | |
| Refine a single survey question - optimized for Mistral/Mixtral | |
| Args: | |
| question: The question to improve | |
| improvement_type: Type of improvement (clarity, neutrality, specificity) | |
| Returns: | |
| Improved question text | |
| """ | |
| improvement_guidance = { | |
| "clarity": "Makes the question clearer and easier for respondents to understand without ambiguity", | |
| "neutrality": "Removes any bias, leading language, or assumptions that could influence responses", | |
| "specificity": "Makes the question more specific and actionable while remaining open-ended" | |
| } | |
| guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"]) | |
| prompt = f"""Task: Improve a survey question | |
| **Original Question:** "{question}" | |
| **Improvement Type:** {improvement_type.title()} | |
| **Your Goal:** Rewrite this question so that it {guidance}. | |
| **Guidelines:** | |
| - Keep the question focused on a single topic | |
| - Use simple, clear language appropriate for the target audience | |
| - Avoid assumptions or leading language | |
| - Ensure the question can elicit meaningful responses | |
| Provide ONLY the improved question text. Do not include explanations or alternative versions.""" | |
| messages = [ | |
| {"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip() | |
| def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]: | |
| """ | |
| Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral | |
| Args: | |
| base_question: The main question | |
| num_follow_ups: Number of follow-up questions to generate | |
| Returns: | |
| List of follow-up question texts | |
| """ | |
| prompt = f"""Task: Generate probing follow-up questions | |
| **Main Question:** {base_question} | |
| **Your Task:** Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer. | |
| **Quality Criteria for Follow-ups:** | |
| 1. Each question should explore a different aspect, dimension, or implication of the main topic | |
| 2. Questions should encourage more detailed, nuanced responses | |
| 3. Follow a logical progression from the main question | |
| 4. Build on what a respondent might answer to the main question | |
| 5. Each should be specific but open-ended | |
| **Format:** Number each question (1., 2., 3., etc.) | |
| **Output {num_follow_ups} Follow-up Questions:** | |
| 1.""" | |
| messages = [ | |
| {"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| response = self.llm.generate(messages, max_tokens=500, temperature=0.7) | |
| # Parse the response for follow-up questions | |
| import re | |
| # Try numbered list format first | |
| pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)' | |
| matches = re.findall(pattern, response, re.DOTALL) | |
| if matches: | |
| follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups] | |
| # Ensure all end with question mark | |
| follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups] | |
| if follow_ups: | |
| return follow_ups | |
| # Fallback: split by newlines and look for questions | |
| lines = [line.strip() for line in response.split("\n") if line.strip()] | |
| follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups] | |
| return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)] | |