Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

ProjectEcho / survey_generator.py

jmisak

Upload 5 files

8056e83 verified 2 months ago

raw

history blame contribute delete

16.4 kB

	"""
	Survey Generation Module - Generate AI-powered surveys from outlines
	"""
	import json
	import sys
	import os
	from typing import List, Dict, Optional

	# Add parent directory to path for imports
	sys.path.insert(0, os.path.dirname(__file__))

	from llm_backend import LLMBackend


	class SurveyGenerator:
	"""
	Generates professional surveys from user outlines using AI.
	Follows industry best practices for qualitative research.
	"""

	def __init__(self, llm_backend: LLMBackend):
	self.llm = llm_backend

	def generate_survey(self,
	outline: str,
	survey_type: str = "qualitative",
	num_questions: int = 10,
	target_audience: str = "general") -> Dict:
	"""
	Generate a complete survey from an outline.

	Args:
	outline: User's outline or topic description
	survey_type: Type of survey (qualitative, quantitative, mixed)
	num_questions: Target number of questions
	target_audience: Description of target respondents

	Returns:
	Dict containing survey metadata and questions
	"""
	prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience)

	messages = [
	{"role": "system", "content": self._get_system_prompt()},
	{"role": "user", "content": prompt}
	]

	try:
	response = self.llm.generate(messages, max_tokens=2000, temperature=0.7)
	survey_data = self._parse_survey_response(response)

	# Generate better title based on outline
	survey_data["title"] = self._generate_title(outline, survey_type)

	# Add metadata
	survey_data["metadata"] = {
	"outline": outline,
	"survey_type": survey_type,
	"target_audience": target_audience,
	"generated_question_count": len(survey_data.get("questions", []))
	}

	return survey_data

	except Exception as e:
	raise Exception(f"Survey generation failed: {str(e)}")

	def _generate_title(self, outline: str, survey_type: str) -> str:
	"""Generate a survey title from the outline"""
	# Extract key topic from outline (first sentence or first 50 chars)
	first_sentence = outline.split('.')[0].strip()
	if len(first_sentence) > 60:
	first_sentence = first_sentence[:60] + "..."

	# Capitalize first letter
	topic = first_sentence[0].upper() + first_sentence[1:] if first_sentence else "Research"

	# Create title based on survey type
	if survey_type.lower() == "qualitative":
	return f"{topic} - Qualitative Survey"
	elif survey_type.lower() == "quantitative":
	return f"{topic} - Quantitative Survey"
	else:
	return f"{topic} Survey"

	def _get_system_prompt(self) -> str:
	"""System prompt for survey generation - optimized for Mistral/Mixtral"""
	return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents."""

	def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
	"""Build the user prompt for survey generation - optimized for Mistral/Mixtral"""
	return f"""You are creating a {survey_type.lower()} research survey.

	Research Focus: {outline}

	Target Participants: {target_audience}

	Your Task: Generate exactly {num_questions} high-quality survey questions.

	Quality Requirements:
	- Each question must be directly relevant to the research focus
	- Questions should be specific enough to guide responses but open enough to capture diverse perspectives
	- For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses
	- Avoid leading questions, double questions, or jargon that may confuse respondents
	- Ensure questions are appropriate for the target audience's knowledge and context
	- Progress from general to specific topics when possible

	Format: Output as a numbered list (1. Question text 2. Question text, etc.)

	Output {num_questions} Survey Questions:

	1."""

	def _parse_survey_response(self, response: str) -> Dict:
	"""Parse LLM response into survey structure"""
	# Parse numbered list format (not JSON)
	return self._parse_numbered_list(response)

	def _parse_numbered_list(self, response: str) -> Dict:
	"""Parse numbered list of questions into survey structure"""
	import re

	# First, try numbered list approach
	# Pattern to match numbered questions: "1. Question" or "1) Question"
	pattern = r'\d+[\.\)]\s+'
	parts = re.split(pattern, response)
	parts = [p.strip() for p in parts if p.strip()]

	questions = []
	question_id = 1

	for part in parts:
	# Skip if too short
	if len(part) < 10:
	continue

	# Take only the first sentence/question if there are multiple
	# Split by question mark, period, or newline
	sentences = re.split(r'[\n]+\|[?.!]\s+(?=\d+[\.\)]\|\Z)', part)
	clean_line = sentences[0].strip()

	# Remove any leading hyphens or bullets that might appear
	clean_line = re.sub(r'^[-•]\s', '', clean_line)

	# Add question mark if missing
	if clean_line and not clean_line.endswith('?'):
	clean_line += '?'

	# Skip if still too short
	if len(clean_line) < 10:
	continue

	# Determine question type based on content
	question_type = "open_ended"
	options = None

	lower_line = clean_line.lower()

	# Check for rating/scale questions
	if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
	question_type = "rating"
	options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]

	# Check for yes/no questions
	elif clean_line.endswith('?') and any(word in lower_line for word in ['do you', 'have you', 'would you', 'can you', 'should', 'is it', 'are you']):
	if 'how much' not in lower_line and 'how many' not in lower_line:
	question_type = "yes_no"
	options = ["Yes", "No"]

	# Check for satisfaction questions
	elif any(word in lower_line for word in ['satisfy', 'satisfaction', 'satisfied']):
	question_type = "likert_scale"
	options = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"]

	question = {
	"id": question_id,
	"question_text": clean_line,
	"question_type": question_type,
	"required": True
	}

	if options:
	question["options"] = options

	questions.append(question)
	question_id += 1

	# If we found few or no questions from numbered list, try alternative parsing
	# This helps catch responses that don't use numbered format
	if len(questions) < 3:
	alt_questions = self._parse_alternative_format(response)
	# Use alternative if it found more questions
	if len(alt_questions) > len(questions):
	questions = alt_questions

	# Final fallback if still no questions
	if len(questions) == 0:
	questions = [
	{"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
	{"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
	{"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
	]

	return {
	"title": "Research Survey",
	"introduction": "Thank you for taking the time to participate in this survey. Your responses will help us better understand your experiences and perspectives. Please answer all questions honestly and thoroughly.",
	"questions": questions[:20], # Limit to 20 questions
	"closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
	}

	def _parse_alternative_format(self, response: str) -> List[Dict]:
	"""Try alternative parsing approaches if numbered list fails"""
	import re

	questions = []
	question_id = 1

	# Try splitting by lines and looking for question patterns
	lines = response.split('\n')

	for line in lines:
	line = line.strip()

	# Skip empty lines
	if not line or len(line) < 10:
	continue

	# Skip lines that are just labels or instructions
	skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:']
	if any(keyword in line.lower() for keyword in skip_keywords):
	continue

	# Check if this looks like a question (has ?, or starts with question words)
	has_question_mark = '?' in line
	starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you'])

	if has_question_mark or starts_with_question_word:
	# Clean up the line (remove bullets, numbers, etc)
	clean_line = re.sub(r'^[-•\d+\.\)]\s', '', line).strip()

	# Ensure it ends with question mark
	if clean_line and not clean_line.endswith('?'):
	# Only add if it doesn't already end with punctuation
	if not any(c in clean_line for c in [':', '!', '.']):
	clean_line += '?'

	# Skip if too short after cleaning
	if len(clean_line) < 10:
	continue

	# Determine question type based on content
	question_type = "open_ended"
	options = None

	lower_line = clean_line.lower()

	# Check for rating/scale questions
	if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
	question_type = "rating"
	options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]

	question = {
	"id": question_id,
	"question_text": clean_line,
	"question_type": question_type,
	"required": True
	}

	if options:
	question["options"] = options

	questions.append(question)
	question_id += 1

	# If still no questions found, create fallback questions based on topic hints
	if len(questions) == 0:
	questions = [
	{"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
	{"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
	{"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
	]

	return questions

	def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
	"""
	Refine a single survey question - optimized for Mistral/Mixtral

	Args:
	question: The question to improve
	improvement_type: Type of improvement (clarity, neutrality, specificity)

	Returns:
	Improved question text
	"""
	improvement_guidance = {
	"clarity": "Makes the question clearer and easier for respondents to understand without ambiguity",
	"neutrality": "Removes any bias, leading language, or assumptions that could influence responses",
	"specificity": "Makes the question more specific and actionable while remaining open-ended"
	}

	guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"])

	prompt = f"""Task: Improve a survey question

	Original Question: "{question}"

	Improvement Type: {improvement_type.title()}

	Your Goal: Rewrite this question so that it {guidance}.

	Guidelines:
	- Keep the question focused on a single topic
	- Use simple, clear language appropriate for the target audience
	- Avoid assumptions or leading language
	- Ensure the question can elicit meaningful responses

	Provide ONLY the improved question text. Do not include explanations or alternative versions."""

	messages = [
	{"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."},
	{"role": "user", "content": prompt}
	]

	return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip()

	def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
	"""
	Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral

	Args:
	base_question: The main question
	num_follow_ups: Number of follow-up questions to generate

	Returns:
	List of follow-up question texts
	"""
	prompt = f"""Task: Generate probing follow-up questions

	Main Question: {base_question}

	Your Task: Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer.

	Quality Criteria for Follow-ups:
	1. Each question should explore a different aspect, dimension, or implication of the main topic
	2. Questions should encourage more detailed, nuanced responses
	3. Follow a logical progression from the main question
	4. Build on what a respondent might answer to the main question
	5. Each should be specific but open-ended

	Format: Number each question (1., 2., 3., etc.)

	Output {num_follow_ups} Follow-up Questions:

	1."""

	messages = [
	{"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."},
	{"role": "user", "content": prompt}
	]

	response = self.llm.generate(messages, max_tokens=500, temperature=0.7)

	# Parse the response for follow-up questions
	import re

	# Try numbered list format first
	pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]\|\Z)'
	matches = re.findall(pattern, response, re.DOTALL)

	if matches:
	follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups]
	# Ensure all end with question mark
	follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups]
	if follow_ups:
	return follow_ups

	# Fallback: split by newlines and look for questions
	lines = [line.strip() for line in response.split("\n") if line.strip()]
	follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]

	return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)]