File size: 16,359 Bytes
196c707
 
 
 
8056e83
 
196c707
8056e83
 
 
 
196c707
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4858e1f
 
 
196c707
 
 
 
 
 
 
 
 
 
 
 
 
4858e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196c707
db39ccf
 
196c707
 
db39ccf
 
fbc9719
db39ccf
fbc9719
db39ccf
fbc9719
db39ccf
28613b6
db39ccf
 
 
 
 
 
 
28613b6
db39ccf
28613b6
db39ccf
4858e1f
56fed0f
196c707
 
 
4858e1f
 
196c707
4858e1f
 
56fed0f
 
28613b6
56fed0f
 
 
 
fbc9719
 
 
 
56fed0f
 
 
4858e1f
 
56fed0f
28613b6
 
56fed0f
 
28613b6
 
 
56fed0f
28613b6
56fed0f
4858e1f
56fed0f
4858e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28613b6
 
 
 
 
 
 
 
 
4858e1f
fbc9719
4858e1f
 
 
fbc9719
 
 
4858e1f
 
 
 
fbc9719
196c707
28613b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196c707
 
db39ccf
196c707
 
 
 
 
 
 
 
db39ccf
 
 
 
 
 
 
 
 
 
 
196c707
db39ccf
196c707
db39ccf
196c707
db39ccf
 
 
 
 
 
 
196c707
 
db39ccf
196c707
 
 
 
 
 
 
db39ccf
196c707
 
 
 
 
 
 
 
db39ccf
 
 
196c707
db39ccf
196c707
db39ccf
 
 
 
 
 
196c707
db39ccf
 
 
 
 
196c707
 
db39ccf
196c707
 
 
 
 
db39ccf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196c707
db39ccf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
"""

Survey Generation Module - Generate AI-powered surveys from outlines

"""
import json
import sys
import os
from typing import List, Dict, Optional

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(__file__))

from llm_backend import LLMBackend


class SurveyGenerator:
    """

    Generates professional surveys from user outlines using AI.

    Follows industry best practices for qualitative research.

    """

    def __init__(self, llm_backend: LLMBackend):
        self.llm = llm_backend

    def generate_survey(self,

                       outline: str,

                       survey_type: str = "qualitative",

                       num_questions: int = 10,

                       target_audience: str = "general") -> Dict:
        """

        Generate a complete survey from an outline.



        Args:

            outline: User's outline or topic description

            survey_type: Type of survey (qualitative, quantitative, mixed)

            num_questions: Target number of questions

            target_audience: Description of target respondents



        Returns:

            Dict containing survey metadata and questions

        """
        prompt = self._build_generation_prompt(outline, survey_type, num_questions, target_audience)

        messages = [
            {"role": "system", "content": self._get_system_prompt()},
            {"role": "user", "content": prompt}
        ]

        try:
            response = self.llm.generate(messages, max_tokens=2000, temperature=0.7)
            survey_data = self._parse_survey_response(response)

            # Generate better title based on outline
            survey_data["title"] = self._generate_title(outline, survey_type)

            # Add metadata
            survey_data["metadata"] = {
                "outline": outline,
                "survey_type": survey_type,
                "target_audience": target_audience,
                "generated_question_count": len(survey_data.get("questions", []))
            }

            return survey_data

        except Exception as e:
            raise Exception(f"Survey generation failed: {str(e)}")

    def _generate_title(self, outline: str, survey_type: str) -> str:
        """Generate a survey title from the outline"""
        # Extract key topic from outline (first sentence or first 50 chars)
        first_sentence = outline.split('.')[0].strip()
        if len(first_sentence) > 60:
            first_sentence = first_sentence[:60] + "..."

        # Capitalize first letter
        topic = first_sentence[0].upper() + first_sentence[1:] if first_sentence else "Research"

        # Create title based on survey type
        if survey_type.lower() == "qualitative":
            return f"{topic} - Qualitative Survey"
        elif survey_type.lower() == "quantitative":
            return f"{topic} - Quantitative Survey"
        else:
            return f"{topic} Survey"

    def _get_system_prompt(self) -> str:
        """System prompt for survey generation - optimized for Mistral/Mixtral"""
        return """You are an expert survey designer specializing in qualitative research. Your role is to create clear, professionally-written, and contextually relevant survey questions that elicit detailed responses from respondents."""

    def _build_generation_prompt(self, outline, survey_type, num_questions, target_audience) -> str:
        """Build the user prompt for survey generation - optimized for Mistral/Mixtral"""
        return f"""You are creating a {survey_type.lower()} research survey.



**Research Focus:** {outline}



**Target Participants:** {target_audience}



**Your Task:** Generate exactly {num_questions} high-quality survey questions.



**Quality Requirements:**

- Each question must be directly relevant to the research focus

- Questions should be specific enough to guide responses but open enough to capture diverse perspectives

- For {survey_type.lower()} surveys: Use open-ended questions that encourage detailed, thoughtful responses

- Avoid leading questions, double questions, or jargon that may confuse respondents

- Ensure questions are appropriate for the target audience's knowledge and context

- Progress from general to specific topics when possible



**Format:** Output as a numbered list (1. Question text 2. Question text, etc.)



**Output {num_questions} Survey Questions:**



1."""

    def _parse_survey_response(self, response: str) -> Dict:
        """Parse LLM response into survey structure"""
        # Parse numbered list format (not JSON)
        return self._parse_numbered_list(response)

    def _parse_numbered_list(self, response: str) -> Dict:
        """Parse numbered list of questions into survey structure"""
        import re

        # First, try numbered list approach
        # Pattern to match numbered questions: "1. Question" or "1) Question"
        pattern = r'\d+[\.\)]\s+'
        parts = re.split(pattern, response)
        parts = [p.strip() for p in parts if p.strip()]

        questions = []
        question_id = 1

        for part in parts:
            # Skip if too short
            if len(part) < 10:
                continue

            # Take only the first sentence/question if there are multiple
            # Split by question mark, period, or newline
            sentences = re.split(r'[\n]+|[?.!]\s+(?=\d+[\.\)]|\Z)', part)
            clean_line = sentences[0].strip()

            # Remove any leading hyphens or bullets that might appear
            clean_line = re.sub(r'^[-•*]\s*', '', clean_line)

            # Add question mark if missing
            if clean_line and not clean_line.endswith('?'):
                clean_line += '?'

            # Skip if still too short
            if len(clean_line) < 10:
                continue

            # Determine question type based on content
            question_type = "open_ended"
            options = None

            lower_line = clean_line.lower()

            # Check for rating/scale questions
            if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
                question_type = "rating"
                options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]

            # Check for yes/no questions
            elif clean_line.endswith('?') and any(word in lower_line for word in ['do you', 'have you', 'would you', 'can you', 'should', 'is it', 'are you']):
                if 'how much' not in lower_line and 'how many' not in lower_line:
                    question_type = "yes_no"
                    options = ["Yes", "No"]

            # Check for satisfaction questions
            elif any(word in lower_line for word in ['satisfy', 'satisfaction', 'satisfied']):
                question_type = "likert_scale"
                options = ["Very Satisfied", "Satisfied", "Neutral", "Dissatisfied", "Very Dissatisfied"]

            question = {
                "id": question_id,
                "question_text": clean_line,
                "question_type": question_type,
                "required": True
            }

            if options:
                question["options"] = options

            questions.append(question)
            question_id += 1

        # If we found few or no questions from numbered list, try alternative parsing
        # This helps catch responses that don't use numbered format
        if len(questions) < 3:
            alt_questions = self._parse_alternative_format(response)
            # Use alternative if it found more questions
            if len(alt_questions) > len(questions):
                questions = alt_questions

        # Final fallback if still no questions
        if len(questions) == 0:
            questions = [
                {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
                {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
                {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
            ]

        return {
            "title": "Research Survey",
            "introduction": "Thank you for taking the time to participate in this survey. Your responses will help us better understand your experiences and perspectives. Please answer all questions honestly and thoroughly.",
            "questions": questions[:20],  # Limit to 20 questions
            "closing": "Thank you for your valuable time and feedback! Your responses are greatly appreciated and will be used to improve our understanding of this topic."
        }

    def _parse_alternative_format(self, response: str) -> List[Dict]:
        """Try alternative parsing approaches if numbered list fails"""
        import re

        questions = []
        question_id = 1

        # Try splitting by lines and looking for question patterns
        lines = response.split('\n')

        for line in lines:
            line = line.strip()

            # Skip empty lines
            if not line or len(line) < 10:
                continue

            # Skip lines that are just labels or instructions
            skip_keywords = ['format:', 'requirements:', 'task:', 'topic:', 'audience:', 'here are', 'survey questions:', 'questions:']
            if any(keyword in line.lower() for keyword in skip_keywords):
                continue

            # Check if this looks like a question (has ?, or starts with question words)
            has_question_mark = '?' in line
            starts_with_question_word = any(word in line.lower() for word in ['describe', 'explain', 'what', 'how', 'why', 'when', 'where', 'who', 'can you', 'would you', 'do you', 'have you'])

            if has_question_mark or starts_with_question_word:
                # Clean up the line (remove bullets, numbers, etc)
                clean_line = re.sub(r'^[-•*\d+\.\)]\s*', '', line).strip()

                # Ensure it ends with question mark
                if clean_line and not clean_line.endswith('?'):
                    # Only add if it doesn't already end with punctuation
                    if not any(c in clean_line for c in [':', '!', '.']):
                        clean_line += '?'

                # Skip if too short after cleaning
                if len(clean_line) < 10:
                    continue

                # Determine question type based on content
                question_type = "open_ended"
                options = None

                lower_line = clean_line.lower()

                # Check for rating/scale questions
                if any(word in lower_line for word in ['rate', 'scale', 'rating', 'score']):
                    question_type = "rating"
                    options = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]

                question = {
                    "id": question_id,
                    "question_text": clean_line,
                    "question_type": question_type,
                    "required": True
                }

                if options:
                    question["options"] = options

                questions.append(question)
                question_id += 1

        # If still no questions found, create fallback questions based on topic hints
        if len(questions) == 0:
            questions = [
                {"id": 1, "question_text": "What are your overall thoughts on this topic?", "question_type": "open_ended", "required": True},
                {"id": 2, "question_text": "Can you describe your experience in detail?", "question_type": "open_ended", "required": True},
                {"id": 3, "question_text": "What specific suggestions do you have for improvement?", "question_type": "open_ended", "required": True}
            ]

        return questions

    def refine_question(self, question: str, improvement_type: str = "clarity") -> str:
        """

        Refine a single survey question - optimized for Mistral/Mixtral



        Args:

            question: The question to improve

            improvement_type: Type of improvement (clarity, neutrality, specificity)



        Returns:

            Improved question text

        """
        improvement_guidance = {
            "clarity": "Makes the question clearer and easier for respondents to understand without ambiguity",
            "neutrality": "Removes any bias, leading language, or assumptions that could influence responses",
            "specificity": "Makes the question more specific and actionable while remaining open-ended"
        }

        guidance = improvement_guidance.get(improvement_type, improvement_guidance["clarity"])

        prompt = f"""Task: Improve a survey question



**Original Question:** "{question}"



**Improvement Type:** {improvement_type.title()}



**Your Goal:** Rewrite this question so that it {guidance}.



**Guidelines:**

- Keep the question focused on a single topic

- Use simple, clear language appropriate for the target audience

- Avoid assumptions or leading language

- Ensure the question can elicit meaningful responses



Provide ONLY the improved question text. Do not include explanations or alternative versions."""

        messages = [
            {"role": "system", "content": "You are an expert survey question designer with deep experience in qualitative research methodology."},
            {"role": "user", "content": prompt}
        ]

        return self.llm.generate(messages, max_tokens=150, temperature=0.5).strip()

    def add_follow_up_questions(self, base_question: str, num_follow_ups: int = 3) -> List[str]:
        """

        Generate follow-up questions for deeper exploration - optimized for Mistral/Mixtral



        Args:

            base_question: The main question

            num_follow_ups: Number of follow-up questions to generate



        Returns:

            List of follow-up question texts

        """
        prompt = f"""Task: Generate probing follow-up questions



**Main Question:** {base_question}



**Your Task:** Create {num_follow_ups} thoughtful follow-up questions that probe deeper into the respondent's answer.



**Quality Criteria for Follow-ups:**

1. Each question should explore a different aspect, dimension, or implication of the main topic

2. Questions should encourage more detailed, nuanced responses

3. Follow a logical progression from the main question

4. Build on what a respondent might answer to the main question

5. Each should be specific but open-ended



**Format:** Number each question (1., 2., 3., etc.)



**Output {num_follow_ups} Follow-up Questions:**



1."""

        messages = [
            {"role": "system", "content": "You are an expert qualitative research interviewer skilled at designing probing questions that uncover deeper insights and nuances."},
            {"role": "user", "content": prompt}
        ]

        response = self.llm.generate(messages, max_tokens=500, temperature=0.7)

        # Parse the response for follow-up questions
        import re

        # Try numbered list format first
        pattern = r'\d+[\.\)]\s+(.+?)(?=\d+[\.\)]|\Z)'
        matches = re.findall(pattern, response, re.DOTALL)

        if matches:
            follow_ups = [m.split('\n')[0].strip() for m in matches if m.strip()][:num_follow_ups]
            # Ensure all end with question mark
            follow_ups = [q if q.endswith('?') else q + '?' for q in follow_ups]
            if follow_ups:
                return follow_ups

        # Fallback: split by newlines and look for questions
        lines = [line.strip() for line in response.split("\n") if line.strip()]
        follow_ups = [line.lstrip("0123456789.-) ") for line in lines if "?" in line][:num_follow_ups]

        return follow_ups if follow_ups else [f"Can you elaborate on {base_question.lower()}?" for _ in range(num_follow_ups)]