Spaces:

jmisak
/

ProjectEcho

Sleeping

App Files Files Community

jmisak commited on Nov 2, 2025

Commit

28613b6

verified ·

1 Parent(s): 7e46f1a

Upload survey_generator.py

Browse files

Files changed (1) hide show

survey_generator.py +105 -15

survey_generator.py CHANGED Viewed

@@ -83,16 +83,25 @@ class SurveyGenerator:
     def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
         """Build the user prompt for survey generation"""
-        # For causal LMs (Phi, Gemma, etc.) - more conversational
-        return f"""You are designing a {survey_type} survey for: {target_audience}
-Topic: {outline}
-Generate {num_questions} specific, relevant survey questions about this topic. Make each question clear and professional.
-Write your questions in a numbered list format (1., 2., 3., etc.). Focus on the specific topic and audience mentioned above.
-Questions:
 1."""
     def _parse_survey_response(self, response: str) -> Dict:
@@ -102,16 +111,12 @@ Questions:
     def _parse_numbered_list(self, response: str) -> Dict:
         """Parse numbered list of questions into survey structure"""
-        # First, try to split by numbered patterns (1., 2., etc.)
         import re
         # Pattern to match numbered questions: "1. Question" or "1) Question"
         pattern = r'\d+[\.\)]\s+'
-        # Split by the pattern but keep what comes after each number
         parts = re.split(pattern, response)
-        # Remove empty first element if exists
         parts = [p.strip() for p in parts if p.strip()]
         questions = []
@@ -123,12 +128,15 @@ Questions:
                 continue
             # Take only the first sentence/question if there are multiple
-            # Split by question mark or period
-            sentences = re.split(r'[?.!]\s+(?=\d+[\.\)]|\Z)', part)
             clean_line = sentences[0].strip()
             # Add question mark if missing
-            if not clean_line.endswith('?'):
                 clean_line += '?'
             # Skip if still too short
@@ -170,7 +178,15 @@ Questions:
             questions.append(question)
             question_id += 1
-        # If we didn't find any questions, create generic ones
         if len(questions) == 0:
             questions = [
                 {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
@@ -185,6 +201,80 @@ Questions:
             "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
         }
     def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
         """
         Refine a single survey question.

     def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
         """Build the user prompt for survey generation"""
+        # For causal LMs (Phi, Gemma, etc.) - more direct and explicit
+        return f"""Task: Create a {survey_type} research survey
+Research Topic: {outline}
+Target Audience: {target_audience}
+Create exactly {num_questions} survey questions.
+Requirements:
+- Each question must be clear, specific, and relevant to the topic
+- Questions should be appropriate for the target audience
+- Avoid yes/no questions in qualitative surveys
+- Make questions open-ended to encourage detailed responses
+Format: Use numbered list (1., 2., 3., etc.)
+Here are the {num_questions} survey questions:
 1."""
     def _parse_survey_response(self, response: str) -> Dict:
     def _parse_numbered_list(self, response: str) -> Dict:
         """Parse numbered list of questions into survey structure"""
         import re
+        # First, try numbered list approach
         # Pattern to match numbered questions: "1. Question" or "1) Question"
         pattern = r'\d+[\.\)]\s+'
         parts = re.split(pattern, response)
         parts = [p.strip() for p in parts if p.strip()]
         questions = []
                 continue
             # Take only the first sentence/question if there are multiple
+            # Split by question mark, period, or newline
+            sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part)
             clean_line = sentences[0].strip()
+            # Remove any leading hyphens or bullets that might appear
+            clean_line = re.sub(r'^[-•*]\s*', '', clean_line)
             # Add question mark if missing
+            if clean_line and not clean_line.endswith('?'):
                 clean_line += '?'
             # Skip if still too short
             questions.append(question)
             question_id += 1
+        # If we found few or no questions from numbered list, try alternative parsing
+        # This helps catch responses that don't use numbered format
+        if len(questions) < 3:
+            alt_questions = self._parse_alternative_format(response)
+            # Use alternative if it found more questions
+            if len(alt_questions) > len(questions):
+                questions = alt_questions
+        # Final fallback if still no questions
         if len(questions) == 0:
             questions = [
                 {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
             "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
         }
+    def _parse_alternative_format(self, response: str) -> List[Dict]:
+        """Try alternative parsing approaches if numbered list fails"""
+        import re
+        questions = []
+        question_id = 1
+        # Try splitting by lines and looking for question patterns
+        lines = response.split('\n')
+        for line in lines:
+            line = line.strip()
+            # Skip empty lines
+            if not line or len(line) < 10:
+                continue
+            # Skip lines that are just labels or instructions
+            skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:']
+            if any(keyword in line.lower() for keyword in skip_keywords):
+                continue
+            # Check if this looks like a question (has ?, or starts with question words)
+            has_question_mark = '?' in line
+            starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you'])
+            if has_question_mark or starts_with_question_word:
+                # Clean up the line (remove bullets, numbers, etc)
+                clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip()
+                # Ensure it ends with question mark
+                if clean_line and not clean_line.endswith('?'):
+                    # Only add if it doesn't already end with punctuation
+                    if not any(c in clean_line for c in [':', '!', '.']):
+                        clean_line += '?'
+                # Skip if too short after cleaning
+                if len(clean_line) < 10:
+                    continue
+                # Determine question type based on content
+                question_type = "open_ended"
+                options = None
+                lower_line = clean_line.lower()
+                # Check for rating/scale questions
+                if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
+                    question_type = "rating"
+                    options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
+                question = {
+                    "id": question_id,
+                    "question_text": clean_line,
+                    "question_type": question_type,
+                    "required": True
+                }
+                if options:
+                    question["options"] = options
+                questions.append(question)
+                question_id += 1
+        # If still no questions found, create fallback questions based on topic hints
+        if len(questions) == 0:
+            questions = [
+                {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
+                {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
+                {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
+            ]
+        return questions
     def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
         """
         Refine a single survey question.