jmisak commited on
Commit
28613b6
·
verified ·
1 Parent(s): 7e46f1a

Upload survey_generator.py

Browse files
Files changed (1) hide show
  1. survey_generator.py +105 -15
survey_generator.py CHANGED
@@ -83,16 +83,25 @@ class SurveyGenerator:
83
 
84
  def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
85
  """Build the user prompt for survey generation"""
86
- # For causal LMs (Phi, Gemma, etc.) - more conversational
87
- return f"""You are designing a {survey_type} survey for: {target_audience}
88
 
89
- Topic: {outline}
90
 
91
- Generate {num_questions} specific, relevant survey questions about this topic. Make each question clear and professional.
92
 
93
- Write your questions in a numbered list format (1., 2., 3., etc.). Focus on the specific topic and audience mentioned above.
 
 
 
 
 
 
 
 
 
 
94
 
95
- Questions:
96
  1."""
97
 
98
  def _parse_survey_response(self, response: str) -> Dict:
@@ -102,16 +111,12 @@ Questions:
102
 
103
  def _parse_numbered_list(self, response: str) -> Dict:
104
  """Parse numbered list of questions into survey structure"""
105
- # First, try to split by numbered patterns (1., 2., etc.)
106
  import re
107
 
 
108
  # Pattern to match numbered questions: "1. Question" or "1) Question"
109
  pattern = r'\d+[\.\)]\s+'
110
-
111
- # Split by the pattern but keep what comes after each number
112
  parts = re.split(pattern, response)
113
-
114
- # Remove empty first element if exists
115
  parts = [p.strip() for p in parts if p.strip()]
116
 
117
  questions = []
@@ -123,12 +128,15 @@ Questions:
123
  continue
124
 
125
  # Take only the first sentence/question if there are multiple
126
- # Split by question mark or period
127
- sentences = re.split(r'[?.!]\s+(?=\d+[\.\)]|\Z)', part)
128
  clean_line = sentences[0].strip()
129
 
 
 
 
130
  # Add question mark if missing
131
- if not clean_line.endswith('?'):
132
  clean_line += '?'
133
 
134
  # Skip if still too short
@@ -170,7 +178,15 @@ Questions:
170
  questions.append(question)
171
  question_id += 1
172
 
173
- # If we didn't find any questions, create generic ones
 
 
 
 
 
 
 
 
174
  if len(questions) == 0:
175
  questions = [
176
  {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
@@ -185,6 +201,80 @@ Questions:
185
  "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
186
  }
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
189
  """
190
  Refine a single survey question.
 
83
 
84
  def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
85
  """Build the user prompt for survey generation"""
86
+ # For causal LMs (Phi, Gemma, etc.) - more direct and explicit
87
+ return f"""Task: Create a {survey_type} research survey
88
 
89
+ Research Topic: {outline}
90
 
91
+ Target Audience: {target_audience}
92
 
93
+ Create exactly {num_questions} survey questions.
94
+
95
+ Requirements:
96
+ - Each question must be clear, specific, and relevant to the topic
97
+ - Questions should be appropriate for the target audience
98
+ - Avoid yes/no questions in qualitative surveys
99
+ - Make questions open-ended to encourage detailed responses
100
+
101
+ Format: Use numbered list (1., 2., 3., etc.)
102
+
103
+ Here are the {num_questions} survey questions:
104
 
 
105
  1."""
106
 
107
  def _parse_survey_response(self, response: str) -> Dict:
 
111
 
112
  def _parse_numbered_list(self, response: str) -> Dict:
113
  """Parse numbered list of questions into survey structure"""
 
114
  import re
115
 
116
+ # First, try numbered list approach
117
  # Pattern to match numbered questions: "1. Question" or "1) Question"
118
  pattern = r'\d+[\.\)]\s+'
 
 
119
  parts = re.split(pattern, response)
 
 
120
  parts = [p.strip() for p in parts if p.strip()]
121
 
122
  questions = []
 
128
  continue
129
 
130
  # Take only the first sentence/question if there are multiple
131
+ # Split by question mark, period, or newline
132
+ sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part)
133
  clean_line = sentences[0].strip()
134
 
135
+ # Remove any leading hyphens or bullets that might appear
136
+ clean_line = re.sub(r'^[-•*]\s*', '', clean_line)
137
+
138
  # Add question mark if missing
139
+ if clean_line and not clean_line.endswith('?'):
140
  clean_line += '?'
141
 
142
  # Skip if still too short
 
178
  questions.append(question)
179
  question_id += 1
180
 
181
+ # If we found few or no questions from numbered list, try alternative parsing
182
+ # This helps catch responses that don't use numbered format
183
+ if len(questions) < 3:
184
+ alt_questions = self._parse_alternative_format(response)
185
+ # Use alternative if it found more questions
186
+ if len(alt_questions) > len(questions):
187
+ questions = alt_questions
188
+
189
+ # Final fallback if still no questions
190
  if len(questions) == 0:
191
  questions = [
192
  {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
 
201
  "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
202
  }
203
 
204
+ def _parse_alternative_format(self, response: str) -> List[Dict]:
205
+ """Try alternative parsing approaches if numbered list fails"""
206
+ import re
207
+
208
+ questions = []
209
+ question_id = 1
210
+
211
+ # Try splitting by lines and looking for question patterns
212
+ lines = response.split('\n')
213
+
214
+ for line in lines:
215
+ line = line.strip()
216
+
217
+ # Skip empty lines
218
+ if not line or len(line) < 10:
219
+ continue
220
+
221
+ # Skip lines that are just labels or instructions
222
+ skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:']
223
+ if any(keyword in line.lower() for keyword in skip_keywords):
224
+ continue
225
+
226
+ # Check if this looks like a question (has ?, or starts with question words)
227
+ has_question_mark = '?' in line
228
+ starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you'])
229
+
230
+ if has_question_mark or starts_with_question_word:
231
+ # Clean up the line (remove bullets, numbers, etc)
232
+ clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip()
233
+
234
+ # Ensure it ends with question mark
235
+ if clean_line and not clean_line.endswith('?'):
236
+ # Only add if it doesn't already end with punctuation
237
+ if not any(c in clean_line for c in [':', '!', '.']):
238
+ clean_line += '?'
239
+
240
+ # Skip if too short after cleaning
241
+ if len(clean_line) < 10:
242
+ continue
243
+
244
+ # Determine question type based on content
245
+ question_type = "open_ended"
246
+ options = None
247
+
248
+ lower_line = clean_line.lower()
249
+
250
+ # Check for rating/scale questions
251
+ if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
252
+ question_type = "rating"
253
+ options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
254
+
255
+ question = {
256
+ "id": question_id,
257
+ "question_text": clean_line,
258
+ "question_type": question_type,
259
+ "required": True
260
+ }
261
+
262
+ if options:
263
+ question["options"] = options
264
+
265
+ questions.append(question)
266
+ question_id += 1
267
+
268
+ # If still no questions found, create fallback questions based on topic hints
269
+ if len(questions) == 0:
270
+ questions = [
271
+ {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
272
+ {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
273
+ {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
274
+ ]
275
+
276
+ return questions
277
+
278
  def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
279
  """
280
  Refine a single survey question.