Patricksturg commited on
Commit
d3fd0f4
·
verified ·
1 Parent(s): f1861eb

Upload 3 files

Browse files
Files changed (3) hide show
  1. dashboard.py +912 -0
  2. dashboard_backend.py +130 -0
  3. requirements.txt +4 -0
dashboard.py ADDED
@@ -0,0 +1,912 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Silicon Sampling Dashboard
4
+
5
+ Interactive web interface for generating synthetic survey responses.
6
+ Users can input custom questions and get silicon sample data without coding.
7
+
8
+ Usage:
9
+ streamlit run dashboard.py
10
+ """
11
+
12
+ import streamlit as st
13
+ import pandas as pd
14
+ from pathlib import Path
15
+ import json
16
+ from datetime import datetime
17
+
18
+ # Page configuration
19
+ st.set_page_config(
20
+ page_title="COGbot Dashboard",
21
+ page_icon="🤖",
22
+ layout="wide"
23
+ )
24
+
25
+ # Initialize session state
26
+ if 'results' not in st.session_state:
27
+ st.session_state.results = None
28
+ if 'processing' not in st.session_state:
29
+ st.session_state.processing = False
30
+ if 'mode' not in st.session_state:
31
+ st.session_state.mode = "Response Generation"
32
+ if 'question_text' not in st.session_state:
33
+ st.session_state.question_text = ""
34
+ if 'response_options_text' not in st.session_state:
35
+ st.session_state.response_options_text = ""
36
+ if 'iteration_results' not in st.session_state:
37
+ st.session_state.iteration_results = []
38
+ if 'current_iteration' not in st.session_state:
39
+ st.session_state.current_iteration = 0
40
+
41
+ # Title and description
42
+ st.title("🤖 COGbot Dashboard")
43
+ st.markdown("""
44
+ Generate synthetic survey responses using LLM-based persona simulation.
45
+ """)
46
+
47
+ # Sidebar - Logo and Configuration
48
+ # Display LSE logo at top of sidebar
49
+ logo_path = "LSE_logo.jpg"
50
+ if Path(logo_path).exists():
51
+ st.sidebar.image(logo_path, width=180)
52
+ st.sidebar.markdown("---")
53
+
54
+ st.sidebar.header("⚙️ Configuration")
55
+
56
+ # Data source
57
+ data_source = st.sidebar.radio(
58
+ "Data Source",
59
+ ["Default ESS UK (1,286 respondents)", "Upload CSV (not available yet)"]
60
+ )
61
+
62
+ # Backstory length option
63
+ backstory_length = st.sidebar.radio(
64
+ "Backstory Length",
65
+ ["Long (detailed)", "Short (concise)"],
66
+ help="Choose between detailed backstories with full demographic info or concise versions"
67
+ )
68
+
69
+ if "Upload CSV" in data_source:
70
+ uploaded_file = st.sidebar.file_uploader(
71
+ "Upload backstories CSV",
72
+ type=['csv'],
73
+ help="CSV must have 'backstory' column"
74
+ )
75
+ if uploaded_file:
76
+ df_backstories = pd.read_csv(uploaded_file)
77
+ else:
78
+ df_backstories = None
79
+ else:
80
+ # Load default ESS data
81
+ default_path = Path("ess_uk_with_backstories.csv")
82
+ if default_path.exists():
83
+ df_backstories = pd.read_csv(default_path)
84
+ else:
85
+ df_backstories = None
86
+ st.sidebar.warning("⚠️ Default file not found: ess_uk_with_backstories.csv")
87
+
88
+ # Show data info
89
+ if df_backstories is not None:
90
+ st.sidebar.success(f"✅ Loaded {len(df_backstories):,} respondents")
91
+
92
+ # Sample size
93
+ max_size = len(df_backstories)
94
+ sample_size = st.sidebar.slider(
95
+ "Sample Size",
96
+ min_value=10,
97
+ max_value=max_size,
98
+ value=min(50, max_size),
99
+ step=10,
100
+ help="Start with small sample for testing"
101
+ )
102
+ else:
103
+ sample_size = 0
104
+
105
+ # Model settings
106
+ st.sidebar.subheader("Model Settings")
107
+
108
+ model_option = st.sidebar.selectbox(
109
+ "Model",
110
+ ["Claude (Anthropic)", "GPT-4 (OpenAI)"],
111
+ help="API-based models. Provide your API key below."
112
+ )
113
+
114
+ # API key input
115
+ if "Claude" in model_option:
116
+ api_key = st.sidebar.text_input(
117
+ "Anthropic API Key",
118
+ type="password",
119
+ help="Get your key from https://console.anthropic.com/"
120
+ )
121
+ else:
122
+ api_key = st.sidebar.text_input(
123
+ "OpenAI API Key",
124
+ type="password",
125
+ help="Get your key from https://platform.openai.com/"
126
+ )
127
+
128
+ temperature = st.sidebar.slider(
129
+ "Temperature",
130
+ min_value=0.0,
131
+ max_value=1.0,
132
+ value=0.7,
133
+ step=0.1,
134
+ help="Higher = more creative, Lower = more consistent"
135
+ )
136
+
137
+ # Main panel - Question configuration
138
+ st.header("📋 Step 1: Configure Question")
139
+
140
+ # Mode selection: Response Generation vs Question Testing
141
+ mode = st.radio(
142
+ "Mode",
143
+ ["Response Generation", "Question Testing", "Question Testing (Continuous Improvement)"],
144
+ help="Response Generation: Get synthetic survey responses. Question Testing: Get feedback on question quality. Continuous Improvement: Iteratively improve question through multiple rounds of testing - manually control each iteration."
145
+ )
146
+
147
+ col1, col2 = st.columns([2, 1])
148
+
149
+ with col1:
150
+ # Use next_question from session state if available (from re-run)
151
+ default_question = st.session_state.get('next_question', '')
152
+ if default_question:
153
+ # Clear next_question after using it
154
+ st.session_state.next_question = ''
155
+
156
+ question_text = st.text_area(
157
+ "Survey Question",
158
+ value=default_question,
159
+ height=80,
160
+ placeholder="Enter your survey question here...",
161
+ help="The question your synthetic respondents will answer" if mode == "Response Generation" else "The draft question you want to test for clarity and quality"
162
+ )
163
+
164
+ # Add concept description for Question Testing mode
165
+ if "Question Testing" in mode:
166
+ # Use stored concept description if available (persists across re-runs)
167
+ default_concept = st.session_state.get('concept_description', '')
168
+
169
+ concept_description = st.text_area(
170
+ "Concept Description",
171
+ value=default_concept,
172
+ height=100,
173
+ placeholder="Describe what you are trying to measure with this question...",
174
+ help="Describe in as much detail as you can what you are trying to measure with this question. This helps the LLM understand your intent and provide better feedback."
175
+ )
176
+ else:
177
+ concept_description = ""
178
+
179
+ with col2:
180
+ if mode == "Response Generation":
181
+ response_format = st.selectbox(
182
+ "Response Format",
183
+ ["Scale (0-10)", "Scale (1-5)", "Multiple Choice", "Yes/No", "Open Text"]
184
+ )
185
+ else: # Question Testing mode
186
+ response_format = "Open Text"
187
+ st.info("📝 Question Testing uses open text responses to gather feedback on question quality.")
188
+
189
+ # Configure prompt based on mode
190
+ # Initialize variables that will be used in preview
191
+ mc_options = ""
192
+ response_options_text = ""
193
+
194
+ if "Question Testing" in mode:
195
+ # Question Testing mode: Create critique prompt
196
+ st.subheader("Response Options/Instructions")
197
+
198
+ # Use next_options from session state if available (from re-run)
199
+ default_options = st.session_state.get('next_options', '')
200
+ if default_options:
201
+ # Clear next_options after using it
202
+ st.session_state.next_options = ''
203
+
204
+ response_options_text = st.text_area(
205
+ "Response Options (if applicable)",
206
+ value=default_options,
207
+ height=100,
208
+ placeholder="e.g., Scale from 0-10 where 0=Not at all, 10=Extremely, or Multiple choice options A, B, C, D",
209
+ help="Include any response options or scales that are part of the question being tested"
210
+ )
211
+
212
+ # Show message if this is a re-run iteration
213
+ if st.session_state.get('show_rerun_message', False):
214
+ st.info(f"🔄 **Iteration {st.session_state.current_iteration + 1}:** Improved question loaded. Click 'Generate Responses' below to test the new version.")
215
+ st.session_state.show_rerun_message = False
216
+
217
+ # Build the testing prompt
218
+ instructions = f"""Please provide feedback on this survey question. Comment on:
219
+
220
+ 1. Are there any parts of the question that are ambiguous or unclear?
221
+ 2. Are there any parts that are difficult to understand?
222
+ 3. Did you have any problems thinking about how to answer?
223
+ 4. Are the response options (if provided) appropriate and complete?
224
+
225
+ Provide your feedback in 2-3 sentences, being specific about any issues you identify."""
226
+
227
+ # Automatically enable thematic coding for Question Testing
228
+ enable_thematic_coding = True
229
+ st.info("🔍 Thematic analysis will automatically run to identify common issues in the question.")
230
+
231
+ else:
232
+ # Response Generation mode: Original behavior
233
+ # Scale anchor labels (if scale selected)
234
+ if "Scale" in response_format:
235
+ st.subheader("Scale Labels")
236
+
237
+ if "0-10" in response_format:
238
+ # 10-point scale: just endpoints
239
+ col_low, col_high = st.columns(2)
240
+ with col_low:
241
+ low_label = st.text_input(
242
+ "0 means",
243
+ value="Not at all",
244
+ help="What does the lowest value mean?"
245
+ )
246
+ with col_high:
247
+ high_label = st.text_input(
248
+ "10 means",
249
+ value="Extremely",
250
+ help="What does the highest value mean?"
251
+ )
252
+ instructions = f"Respond with a single integer from 0 to 10, where 0 means '{low_label}' and 10 means '{high_label}'. Only output the number."
253
+
254
+ else: # 1-5 scale: label all 5 points
255
+ label_1 = st.text_input("1 means", value="Strongly disagree")
256
+ label_2 = st.text_input("2 means", value="Disagree")
257
+ label_3 = st.text_input("3 means", value="Neither agree nor disagree")
258
+ label_4 = st.text_input("4 means", value="Agree")
259
+ label_5 = st.text_input("5 means", value="Strongly agree")
260
+
261
+ instructions = f"""Respond with a single integer from 1 to 5 based on these labels:
262
+ 1 = {label_1}
263
+ 2 = {label_2}
264
+ 3 = {label_3}
265
+ 4 = {label_4}
266
+ 5 = {label_5}
267
+
268
+ Only output the number."""
269
+ else:
270
+ # Non-scale formats
271
+ format_instructions = {
272
+ "Multiple Choice": "Choose one option and respond with only the letter (A, B, C, or D).",
273
+ "Yes/No": "Respond with only 'Yes' or 'No'.",
274
+ "Open Text": "Provide a brief 1-2 sentence response based on your persona."
275
+ }
276
+ instructions = format_instructions.get(response_format, "")
277
+
278
+ # Multiple choice options (if selected)
279
+ if response_format == "Multiple Choice":
280
+ st.subheader("Response Options")
281
+ col1, col2, col3, col4 = st.columns(4)
282
+ with col1:
283
+ option_a = st.text_input("Option A", "Strongly agree")
284
+ with col2:
285
+ option_b = st.text_input("Option B", "Agree")
286
+ with col3:
287
+ option_c = st.text_input("Option C", "Disagree")
288
+ with col4:
289
+ option_d = st.text_input("Option D", "Strongly disagree")
290
+
291
+ mc_options = f"\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}"
292
+ else:
293
+ mc_options = ""
294
+
295
+ # Thematic coding option (if open text selected)
296
+ enable_thematic_coding = False
297
+ if response_format == "Open Text":
298
+ st.subheader("Thematic Coding")
299
+ enable_thematic_coding = st.checkbox(
300
+ "Perform automated thematic analysis after generating responses",
301
+ value=False,
302
+ help="Uses LLM to identify themes, counts, and percentages in open text responses. Runs automatically after response generation."
303
+ )
304
+
305
+ # Preview full prompt
306
+ with st.expander("🔍 Preview Full Prompt"):
307
+ st.markdown("**System Prompt:**")
308
+ st.code("""Adopt the following persona and answer only based on it.
309
+ Do not invent details beyond the provided attributes.
310
+
311
+ [Backstory will be inserted here for each respondent]""")
312
+
313
+ st.markdown("**User Prompt:**")
314
+ if mode == "Question Testing":
315
+ # Include response options in the question display for testing
316
+ full_question = f"Question: {question_text}\n"
317
+ if response_options_text.strip():
318
+ full_question += f"\nResponse Options: {response_options_text}\n"
319
+ full_question += f"\n{instructions}"
320
+ else:
321
+ full_question = question_text + mc_options + "\n\n" + instructions
322
+ st.code(full_question)
323
+
324
+ # Generate button
325
+ if mode == "Question Testing":
326
+ st.header("🧪 Step 2: Test Question")
327
+ button_text = "🧪 Test Question with Synthetic Respondents"
328
+ else:
329
+ st.header("🚀 Step 2: Generate Responses")
330
+ button_text = "🎯 Generate Responses"
331
+
332
+ can_generate = (
333
+ df_backstories is not None
334
+ and question_text.strip() != ""
335
+ and not st.session_state.processing
336
+ )
337
+
338
+ if st.button(
339
+ button_text,
340
+ disabled=not can_generate,
341
+ type="primary",
342
+ use_container_width=True
343
+ ):
344
+ st.session_state.processing = True
345
+ st.session_state.results = None
346
+ st.session_state.mode = mode # Store mode for results display
347
+ st.session_state.question_text = question_text # Store for thematic analysis
348
+ if mode == "Question Testing":
349
+ st.session_state.response_options_text = response_options_text # Store for improved version
350
+ st.session_state.concept_description = concept_description # Store concept description
351
+
352
+ # Prepare configuration
353
+ config = {
354
+ "question": full_question,
355
+ "temperature": temperature,
356
+ "sample_size": sample_size
357
+ }
358
+
359
+ # Validate API key
360
+ if not api_key:
361
+ st.error(f"⚠️ Please provide your {'Anthropic' if 'Claude' in model_option else 'OpenAI'} API key in the sidebar.")
362
+ st.stop()
363
+
364
+ # Create sampler based on model selection
365
+ if "Claude" in model_option:
366
+ from dashboard_backend import AnthropicSampler
367
+ config["anthropic_api_key"] = api_key
368
+ sampler = AnthropicSampler(config)
369
+ else: # OpenAI
370
+ from dashboard_backend import OpenAISampler
371
+ config["openai_api_key"] = api_key
372
+ sampler = OpenAISampler(config)
373
+
374
+ # Progress bar
375
+ progress_bar = st.progress(0)
376
+ status_text = st.empty()
377
+
378
+ # Sample backstories (random sample)
379
+ df_sample = df_backstories.sample(n=sample_size, random_state=42).copy()
380
+
381
+ # Apply backstory length preference
382
+ if "Short" in backstory_length and 'backstory' in df_sample.columns:
383
+ # Truncate to first 150 characters for short version
384
+ df_sample['backstory'] = df_sample['backstory'].apply(
385
+ lambda x: x[:150] + "..." if isinstance(x, str) and len(x) > 150 else x
386
+ )
387
+
388
+ # Process
389
+ try:
390
+ results = sampler.generate_responses(
391
+ df_sample,
392
+ progress_callback=lambda i, total: (
393
+ progress_bar.progress(i / total),
394
+ status_text.text(f"Processing: {i}/{total} respondents ({100*i/total:.1f}%)")
395
+ )
396
+ )
397
+
398
+ st.session_state.results = results
399
+ st.session_state.processing = False
400
+ st.success(f"✅ Generated {len(results)} responses!")
401
+ st.rerun()
402
+
403
+ except Exception as e:
404
+ st.error(f"❌ Error: {str(e)}")
405
+ st.session_state.processing = False
406
+
407
+ # Show results
408
+ if st.session_state.results is not None:
409
+ st.header("📊 Step 3: Results")
410
+
411
+ results_df = st.session_state.results
412
+
413
+ # Summary stats
414
+ col1, col2, col3 = st.columns(3)
415
+ with col1:
416
+ st.metric("Total Responses", len(results_df))
417
+ with col2:
418
+ valid_responses = results_df['response'].notna().sum()
419
+ st.metric("Valid Responses", valid_responses)
420
+ with col3:
421
+ completion_rate = 100 * valid_responses / len(results_df)
422
+ st.metric("Completion Rate", f"{completion_rate:.1f}%")
423
+
424
+ # Preview
425
+ st.subheader("Preview (First 10 rows)")
426
+ st.dataframe(results_df.head(10), use_container_width=True)
427
+
428
+ # Download
429
+ st.subheader("Download Results")
430
+
431
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
432
+ filename = f"silicon_sample_{timestamp}.csv"
433
+
434
+ csv = results_df.to_csv(index=False)
435
+ st.download_button(
436
+ label="📥 Download CSV",
437
+ data=csv,
438
+ file_name=filename,
439
+ mime="text/csv",
440
+ use_container_width=True
441
+ )
442
+
443
+ # Response distribution and statistics
444
+ if response_format in ["Scale (0-10)", "Scale (1-5)", "Yes/No", "Multiple Choice"]:
445
+ st.subheader(f"Response Distribution: {question_text}")
446
+ try:
447
+ # For numeric formats, convert to numbers
448
+ if response_format.startswith("Scale"):
449
+ numeric_responses = pd.to_numeric(results_df['response'], errors='coerce')
450
+ valid_responses = numeric_responses.dropna()
451
+ elif response_format == "Yes/No":
452
+ # For Yes/No, show frequency distribution
453
+ valid_responses = results_df['response'].dropna()
454
+ elif response_format == "Multiple Choice":
455
+ # For Multiple Choice, show frequency distribution
456
+ valid_responses = results_df['response'].dropna()
457
+
458
+ if len(valid_responses) > 0:
459
+ # Show statistics for numeric scales
460
+ if response_format.startswith("Scale"):
461
+ col1, col2, col3, col4, col5 = st.columns(5)
462
+
463
+ with col1:
464
+ st.metric("Mean", f"{valid_responses.mean():.2f}")
465
+ with col2:
466
+ st.metric("Median", f"{valid_responses.median():.2f}")
467
+ with col3:
468
+ st.metric("Std Dev", f"{valid_responses.std():.2f}")
469
+ with col4:
470
+ mode_val = valid_responses.mode()
471
+ mode_display = f"{mode_val.iloc[0]:.0f}" if len(mode_val) > 0 else "N/A"
472
+ st.metric("Mode", mode_display)
473
+ with col5:
474
+ st.metric("Valid N", f"{len(valid_responses)}")
475
+
476
+ # Distribution chart
477
+ st.bar_chart(pd.to_numeric(results_df['response'], errors='coerce').value_counts().sort_index())
478
+
479
+ # Show frequency counts for categorical
480
+ else:
481
+ value_counts = valid_responses.value_counts()
482
+
483
+ # Display as metrics
484
+ cols = st.columns(min(len(value_counts), 5))
485
+ for idx, (value, count) in enumerate(value_counts.items()):
486
+ if idx < 5: # Limit to 5 columns
487
+ with cols[idx]:
488
+ pct = 100 * count / len(valid_responses)
489
+ st.metric(f"{value}", f"{count} ({pct:.1f}%)")
490
+
491
+ # Also show total N
492
+ st.metric("Total Valid N", f"{len(valid_responses)}")
493
+
494
+ # Distribution chart
495
+ st.bar_chart(value_counts)
496
+ else:
497
+ st.info("No valid responses to analyze")
498
+ except Exception as e:
499
+ st.info(f"Could not generate statistics: {str(e)}")
500
+
501
+ # Thematic coding for open text responses
502
+ elif response_format == "Open Text" and enable_thematic_coding:
503
+ # Get the stored mode and question text
504
+ stored_mode = st.session_state.get('mode', 'Response Generation')
505
+ stored_question = st.session_state.get('question_text', question_text)
506
+
507
+ # Different heading based on mode
508
+ if stored_mode == "Question Testing":
509
+ st.subheader(f"Question Testing Results: {stored_question}")
510
+ else:
511
+ st.subheader(f"Thematic Analysis: {stored_question}")
512
+
513
+ # Get valid text responses
514
+ valid_responses = results_df['response'].dropna()
515
+ valid_responses = valid_responses[valid_responses.str.strip() != ""]
516
+
517
+ if len(valid_responses) > 0:
518
+ st.info(f"Analyzing {len(valid_responses)} open text responses...")
519
+
520
+ # Automatically run thematic coding
521
+ if True: # Changed from button to automatic
522
+ with st.spinner("Analyzing themes with LLM..."):
523
+ try:
524
+ # Prepare responses for analysis
525
+ responses_text = "\n\n".join([f"Response {i+1}: {resp}" for i, resp in enumerate(valid_responses)])
526
+
527
+ # Create thematic analysis prompt - different for Question Testing
528
+ if stored_mode == "Question Testing":
529
+ coding_prompt = f"""You are a survey methodology expert analyzing feedback from respondents who tested a draft survey question.
530
+
531
+ Question being tested: "{stored_question}"
532
+
533
+ Here is the feedback from respondents (total of {len(valid_responses)} responses):
534
+
535
+ {responses_text}
536
+
537
+ CRITICAL INSTRUCTIONS:
538
+ - DO NOT list individual responses
539
+ - DO NOT copy feedback verbatim
540
+ - DO NOT fabricate or hallucinate problems that aren't genuinely present in the feedback
541
+ - DO NOT feel pressured to find a specific number of issues
542
+ - ONLY report genuine problems, ambiguities, or concerns that respondents actually raised
543
+ - If the question and response scales are clear and well-designed, say so - it's perfectly acceptable to find zero issues
544
+ - DO group similar issues together and count how many respondents mentioned each
545
+
546
+ Task:
547
+ 1. Read ALL responses carefully and identify ONLY genuine recurring issues and concerns
548
+ 2. If respondents found the question clear and had no problems, state that the question appears well-designed
549
+ 3. Group similar problems together (e.g., all mentions of "unclear terminology" should be one issue)
550
+ 4. For each distinct issue that was genuinely raised, provide:
551
+ - Issue name (2-4 words, e.g., "Ambiguous wording", "Unclear scale", "Missing context")
552
+ - Brief description (1 sentence explaining the specific problem)
553
+ - Count of how many respondents mentioned this issue
554
+ - Percentage of total respondents
555
+
556
+ REQUIRED FORMAT (follow exactly):
557
+
558
+ ISSUE 1: [Name]
559
+ DESCRIPTION: [One sentence explaining the problem]
560
+ COUNT: [Number of respondents who mentioned this]
561
+ PERCENTAGE: [Percentage]
562
+
563
+ ISSUE 2: [Name]
564
+ DESCRIPTION: [One sentence explaining the problem]
565
+ COUNT: [Number]
566
+ PERCENTAGE: [Percentage]
567
+
568
+ [Continue for all distinct issues]
569
+
570
+ SUMMARY:
571
+ [If issues were identified: 2-3 sentence summary of the most critical problems requiring attention]
572
+ [If no significant issues were found: Statement confirming the question appears clear and well-designed based on respondent feedback]
573
+
574
+ Example of CORRECT output when issues are found:
575
+ ISSUE 1: Ambiguous term "partner"
576
+ DESCRIPTION: Respondents were unclear whether "partner" refers to romantic partner, business partner, or roommate
577
+ COUNT: 15
578
+ PERCENTAGE: 75%
579
+
580
+ ISSUE 2: Vague timeframe
581
+ DESCRIPTION: The phrase "these days" lacks specificity about the time period being asked about
582
+ COUNT: 8
583
+ PERCENTAGE: 40%
584
+
585
+ Example of CORRECT output when no issues are found:
586
+ SUMMARY:
587
+ Based on the respondent feedback, the question appears well-designed and clear. Respondents understood what was being asked, found the wording unambiguous, and had no difficulty formulating responses. No significant issues or concerns were raised that would require revision."""
588
+ else:
589
+ coding_prompt = f"""You are a qualitative researcher conducting thematic analysis on open-ended survey responses.
590
+
591
+ Question asked: "{stored_question}"
592
+
593
+ Here are all the responses:
594
+
595
+ {responses_text}
596
+
597
+ Task:
598
+ 1. Identify the main themes present in these responses (aim for 4-8 themes)
599
+ 2. For each theme, provide:
600
+ - Theme name (2-4 words)
601
+ - Brief description (1 sentence)
602
+ - Count of how many responses express this theme
603
+ - Percentage of total responses
604
+
605
+ Format your response as:
606
+ THEME: [Name]
607
+ DESCRIPTION: [Description]
608
+ COUNT: [Number]
609
+ PERCENTAGE: [Percentage]
610
+
611
+ [Repeat for each theme]"""
612
+
613
+ # Send to API for analysis
614
+ if "Claude" in model_option:
615
+ import anthropic
616
+ client = anthropic.Anthropic(api_key=api_key)
617
+ message = client.messages.create(
618
+ model="claude-3-5-sonnet-20241022",
619
+ max_tokens=2000,
620
+ temperature=0.3,
621
+ system="You are a qualitative research expert analyzing survey responses.",
622
+ messages=[{"role": "user", "content": coding_prompt}]
623
+ )
624
+ analysis_result = message.content[0].text.strip()
625
+ else: # OpenAI
626
+ from openai import OpenAI
627
+ client = OpenAI(api_key=api_key)
628
+ response = client.chat.completions.create(
629
+ model="gpt-4o",
630
+ max_tokens=2000,
631
+ temperature=0.3,
632
+ messages=[
633
+ {"role": "system", "content": "You are a qualitative research expert analyzing survey responses."},
634
+ {"role": "user", "content": coding_prompt}
635
+ ]
636
+ )
637
+ analysis_result = response.choices[0].message.content.strip()
638
+
639
+ # Display results
640
+ st.markdown("### Thematic Coding Results")
641
+ st.text_area("Analysis", analysis_result, height=400)
642
+
643
+ # For Question Testing mode, add problem summary and offer to suggest improved wording
644
+ if "Question Testing" in stored_mode:
645
+ # Parse the analysis to extract problem counts
646
+ import re
647
+
648
+ # Extract counts from the analysis (looks for patterns like "Count: X" or "X respondents" or "X mentions")
649
+ count_patterns = [
650
+ r'Count:\s*(\d+)',
651
+ r'(\d+)\s+respondents?',
652
+ r'(\d+)\s+mentions?',
653
+ r'(\d+)/\d+', # X/total format
654
+ r'\((\d+)\s+respondents?\)',
655
+ ]
656
+
657
+ problem_counts = []
658
+ for pattern in count_patterns:
659
+ matches = re.findall(pattern, analysis_result, re.IGNORECASE)
660
+ if matches:
661
+ problem_counts.extend([int(m) for m in matches])
662
+
663
+ # Calculate summary statistics
664
+ if problem_counts:
665
+ num_problems = len(problem_counts) # a) distinct problems
666
+ total_mentions = sum(problem_counts) # c) total problem mentions
667
+ n_respondents = len(valid_responses)
668
+ avg_problems_per_respondent = total_mentions / n_respondents if n_respondents > 0 else 0 # d) average
669
+
670
+ # Display problem summary
671
+ st.markdown("---")
672
+ st.markdown("### Problem Summary")
673
+
674
+ col1, col2, col3, col4 = st.columns(4)
675
+ with col1:
676
+ st.metric("Distinct Problems", num_problems)
677
+ with col2:
678
+ st.metric("Total Mentions", total_mentions)
679
+ with col3:
680
+ st.metric("Respondents", n_respondents)
681
+ with col4:
682
+ st.metric("Avg Problems/Respondent", f"{avg_problems_per_respondent:.2f}")
683
+
684
+ # Show breakdown
685
+ with st.expander("📊 Problem Breakdown"):
686
+ st.markdown("**Problems by frequency:**")
687
+ for i, count in enumerate(sorted(problem_counts, reverse=True), 1):
688
+ pct = (count / n_respondents * 100) if n_respondents > 0 else 0
689
+ st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
690
+
691
+ st.markdown("---")
692
+ st.markdown("### Suggest Improved Question Wording")
693
+
694
+ if st.button("✨ Generate Improved Question", type="secondary"):
695
+ with st.spinner("Generating improved question wording..."):
696
+ try:
697
+ # Get response options and concept description if they exist
698
+ stored_options = st.session_state.get('response_options_text', '')
699
+ stored_concept = st.session_state.get('concept_description', '')
700
+
701
+ # Create improvement prompt
702
+ # Build the sections separately to avoid f-string backslash issue
703
+ options_section = f"\nOriginal Response Options: {stored_options}\n" if stored_options else ""
704
+ concept_section = f"\nConcept Being Measured: {stored_concept}\n" if stored_concept else ""
705
+ improved_options_section = "\n\nIMPROVED RESPONSE OPTIONS:\n[Your improved options]\n" if stored_options else ""
706
+
707
+ improvement_prompt = f"""You are a survey methodology expert. Based on the respondent feedback analysis below, suggest an improved version of the survey question that addresses the identified issues.
708
+
709
+ Original Question: "{stored_question}"{options_section}{concept_section}
710
+
711
+ Respondent Feedback Analysis:
712
+ {analysis_result}
713
+
714
+ Task:
715
+ 1. Identify the main problems with the current question based on respondent feedback
716
+ 2. Provide an improved version of the question that addresses these problems
717
+ 3. If response options were provided, suggest improved response options as well
718
+ 4. Explain specifically how the new version improves on the original based on the respondent feedback
719
+
720
+ Format your response as:
721
+
722
+ PROBLEMS IDENTIFIED:
723
+ [List the specific problems with the current question based on respondent feedback]
724
+
725
+ IMPROVED QUESTION:
726
+ [Your improved question text]{improved_options_section}
727
+
728
+ HOW THE NEW VERSION IMPROVES:
729
+ [Explain how each change addresses the problems identified in respondent feedback]"""
730
+
731
+ # Send to API for improvement
732
+ if "Claude" in model_option:
733
+ import anthropic
734
+ client = anthropic.Anthropic(api_key=api_key)
735
+ message = client.messages.create(
736
+ model="claude-3-5-sonnet-20241022",
737
+ max_tokens=1000,
738
+ temperature=0.3,
739
+ system="You are a survey methodology expert specializing in question wording and design.",
740
+ messages=[{"role": "user", "content": improvement_prompt}]
741
+ )
742
+ improvement_result = message.content[0].text.strip()
743
+ else: # OpenAI
744
+ from openai import OpenAI
745
+ client = OpenAI(api_key=api_key)
746
+ response = client.chat.completions.create(
747
+ model="gpt-4o",
748
+ max_tokens=1000,
749
+ temperature=0.3,
750
+ messages=[
751
+ {"role": "system", "content": "You are a survey methodology expert specializing in question wording and design."},
752
+ {"role": "user", "content": improvement_prompt}
753
+ ]
754
+ )
755
+ improvement_result = response.choices[0].message.content.strip()
756
+
757
+ # Display improved version
758
+ st.markdown("### Improved Question Suggestion")
759
+ st.text_area("Suggested Improvements", improvement_result, height=300)
760
+
761
+ # Store improvement result for potential re-run
762
+ st.session_state['last_improvement'] = improvement_result
763
+
764
+ # Store current iteration results for comparison
765
+ if problem_counts:
766
+ iteration_data = {
767
+ 'iteration': st.session_state.current_iteration,
768
+ 'question': stored_question,
769
+ 'response_options': stored_options,
770
+ 'num_problems': num_problems,
771
+ 'total_mentions': total_mentions,
772
+ 'n_respondents': n_respondents,
773
+ 'avg_problems': avg_problems_per_respondent,
774
+ 'problem_counts': problem_counts
775
+ }
776
+ # Only add if this iteration isn't already stored
777
+ if not any(d['iteration'] == st.session_state.current_iteration for d in st.session_state.iteration_results):
778
+ st.session_state.iteration_results.append(iteration_data)
779
+
780
+ st.info("💡 Review the suggested improvements and adapt them as needed for your research context.")
781
+
782
+ # Add re-run button for Question Testing mode
783
+ if "Question Testing" in stored_mode:
784
+ st.markdown("---")
785
+ if st.button("🔄 Re-run COGbot on Improved Question", type="primary"):
786
+ # Get improvement result from session state (more reliable than local variable)
787
+ stored_improvement = st.session_state.get('last_improvement', improvement_result)
788
+
789
+ if not stored_improvement:
790
+ st.error("No improvement suggestion found. Please click 'Generate Improved Question' first.")
791
+ else:
792
+ # Extract improved question from the result
793
+ import re
794
+
795
+ # Try multiple patterns to extract improved question
796
+ patterns = [
797
+ r'IMPROVED QUESTION:\s*\n+([^\n].*?)(?:\n\n+IMPROVED RESPONSE OPTIONS:|\n\n+HOW THE NEW VERSION IMPROVES:|$)',
798
+ r'IMPROVED QUESTION:\s*\n+([^\n][^\n]+)', # Just get first line after
799
+ r'improved question[:\s]+([^\n]+)', # More flexible
800
+ ]
801
+
802
+ new_question = None
803
+ for pattern in patterns:
804
+ match = re.search(pattern, stored_improvement, re.DOTALL | re.IGNORECASE)
805
+ if match:
806
+ new_question = match.group(1).strip()
807
+ # Remove any leading quotes or markers
808
+ new_question = new_question.strip('"\'')
809
+ if len(new_question) > 10: # Valid question should be longer than 10 chars
810
+ break
811
+
812
+ if new_question:
813
+ # Try to extract improved response options if present
814
+ options_match = re.search(r'IMPROVED RESPONSE OPTIONS:\s*\n+(.*?)(?:\n\n+HOW THE NEW VERSION IMPROVES:|$)',
815
+ stored_improvement, re.DOTALL | re.IGNORECASE)
816
+ new_options = options_match.group(1).strip() if options_match else stored_options
817
+
818
+ # Debug: Show what was extracted
819
+ st.info(f"✅ Extracted question: {new_question[:100]}...")
820
+
821
+ # Store the new question and options for next run FIRST
822
+ st.session_state.next_question = new_question
823
+ st.session_state.next_options = new_options
824
+
825
+ # Increment iteration counter
826
+ st.session_state.current_iteration += 1
827
+
828
+ # Clear old results to force regeneration
829
+ st.session_state.results = None
830
+ if 'last_improvement' in st.session_state:
831
+ del st.session_state['last_improvement']
832
+
833
+ # Set flag to show message after rerun
834
+ st.session_state.show_rerun_message = True
835
+
836
+ # Immediately rerun
837
+ st.rerun()
838
+ else:
839
+ st.error("❌ Could not extract improved question from the output.")
840
+ st.warning("💡 Please manually copy the improved question and paste it into the question box above.")
841
+
842
+ except Exception as e:
843
+ st.error(f"Error generating improved question: {str(e)}")
844
+
845
+ except Exception as e:
846
+ st.error(f"Error during thematic analysis: {str(e)}")
847
+ else:
848
+ st.info("No valid open text responses to analyze")
849
+
850
+ # Display iteration comparison table for Question Testing mode
851
+ if len(st.session_state.iteration_results) > 0 and "Question Testing" in st.session_state.get('mode', ''):
852
+ st.markdown("---")
853
+ st.markdown("## 📊 Iteration Comparison")
854
+ st.markdown(f"**Total iterations completed:** {len(st.session_state.iteration_results)}")
855
+
856
+ # Create comparison table
857
+ comparison_data = []
858
+ for iteration in st.session_state.iteration_results:
859
+ comparison_data.append({
860
+ "Iteration": iteration['iteration'] + 1,
861
+ "Question": iteration['question'][:100] + "..." if len(iteration['question']) > 100 else iteration['question'],
862
+ "# Problems": iteration['num_problems'],
863
+ "Total Mentions": iteration['total_mentions'],
864
+ "Respondents": iteration['n_respondents'],
865
+ "Avg Problems/Resp": f"{iteration['avg_problems']:.2f}"
866
+ })
867
+
868
+ if comparison_data:
869
+ import pandas as pd
870
+ df_comparison = pd.DataFrame(comparison_data)
871
+ st.dataframe(df_comparison, use_container_width=True)
872
+
873
+ # Show detailed stats for each iteration
874
+ with st.expander("🔍 View Detailed Stats for Each Iteration"):
875
+ for iteration in st.session_state.iteration_results:
876
+ st.markdown(f"### Iteration {iteration['iteration'] + 1}")
877
+ st.markdown(f"**Question:** {iteration['question']}")
878
+ if iteration['response_options']:
879
+ st.markdown(f"**Response Options:** {iteration['response_options']}")
880
+
881
+ col1, col2, col3, col4 = st.columns(4)
882
+ with col1:
883
+ st.metric("Distinct Problems", iteration['num_problems'])
884
+ with col2:
885
+ st.metric("Total Mentions", iteration['total_mentions'])
886
+ with col3:
887
+ st.metric("Respondents", iteration['n_respondents'])
888
+ with col4:
889
+ st.metric("Avg Problems/Respondent", f"{iteration['avg_problems']:.2f}")
890
+
891
+ st.markdown("**Problem Breakdown:**")
892
+ for i, count in enumerate(sorted(iteration['problem_counts'], reverse=True), 1):
893
+ pct = (count / iteration['n_respondents'] * 100) if iteration['n_respondents'] > 0 else 0
894
+ st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
895
+ st.markdown("---")
896
+
897
+ # Add reset button
898
+ if st.button("🔄 Reset Iterations", help="Clear all iteration history and start fresh"):
899
+ st.session_state.iteration_results = []
900
+ st.session_state.current_iteration = 0
901
+ st.session_state.next_question = ''
902
+ st.session_state.next_options = ''
903
+ st.success("✅ Iteration history cleared!")
904
+ st.rerun()
905
+
906
+ # Footer
907
+ st.sidebar.markdown("---")
908
+ st.sidebar.markdown("""
909
+ **Need Help?**
910
+ - [Documentation](WINSTON_README.md)
911
+ - [GitHub](https://github.com/PatrickSturgis/Silicon_samples)
912
+ """)
dashboard_backend.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Dashboard Backend - API Only (HF Spaces)
4
+
5
+ Handles LLM querying via Anthropic and OpenAI APIs.
6
+ """
7
+
8
+ import pandas as pd
9
+ from typing import Callable, Optional
10
+
11
+
12
+ class AnthropicSampler:
13
+ """
14
+ Anthropic Claude API sampler for validation studies
15
+ """
16
+
17
+ def __init__(self, config: dict):
18
+ self.config = config
19
+ self.api_key = config.get('anthropic_api_key')
20
+ self.temperature = config.get('temperature', 0.7)
21
+ self.question = config.get('question', '')
22
+
23
+ def generate_responses(
24
+ self,
25
+ df: pd.DataFrame,
26
+ progress_callback=None
27
+ ) -> pd.DataFrame:
28
+ """Generate responses using Claude API"""
29
+ import anthropic
30
+
31
+ if 'backstory' not in df.columns:
32
+ raise ValueError("DataFrame must have 'backstory' column")
33
+
34
+ client = anthropic.Anthropic(api_key=self.api_key)
35
+
36
+ results = df.copy()
37
+ results['response'] = ""
38
+ total = len(df)
39
+
40
+ for i, (idx, row) in enumerate(df.iterrows()):
41
+ backstory = row['backstory']
42
+
43
+ if pd.isna(backstory) or str(backstory).strip() == "":
44
+ results.loc[idx, 'response'] = "[EMPTY]"
45
+ continue
46
+
47
+ try:
48
+ message = client.messages.create(
49
+ model="claude-3-5-sonnet-20241022",
50
+ max_tokens=100,
51
+ temperature=self.temperature,
52
+ system=(
53
+ "Adopt the following persona and answer only based on it. "
54
+ "Do not invent details beyond the provided attributes.\n\n"
55
+ f"{backstory}"
56
+ ),
57
+ messages=[
58
+ {"role": "user", "content": self.question}
59
+ ]
60
+ )
61
+ results.loc[idx, 'response'] = message.content[0].text.strip()
62
+ except Exception as e:
63
+ results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
64
+
65
+ if progress_callback:
66
+ progress_callback(i + 1, total)
67
+
68
+ return results
69
+
70
+
71
+ class OpenAISampler:
72
+ """
73
+ OpenAI ChatGPT API sampler for validation studies
74
+ """
75
+
76
+ def __init__(self, config: dict):
77
+ self.config = config
78
+ self.api_key = config.get('openai_api_key')
79
+ self.temperature = config.get('temperature', 0.7)
80
+ self.question = config.get('question', '')
81
+
82
+ def generate_responses(
83
+ self,
84
+ df: pd.DataFrame,
85
+ progress_callback=None
86
+ ) -> pd.DataFrame:
87
+ """Generate responses using ChatGPT API"""
88
+ from openai import OpenAI
89
+
90
+ if 'backstory' not in df.columns:
91
+ raise ValueError("DataFrame must have 'backstory' column")
92
+
93
+ client = OpenAI(api_key=self.api_key)
94
+
95
+ results = df.copy()
96
+ results['response'] = ""
97
+ total = len(df)
98
+
99
+ for i, (idx, row) in enumerate(df.iterrows()):
100
+ backstory = row['backstory']
101
+
102
+ if pd.isna(backstory) or str(backstory).strip() == "":
103
+ results.loc[idx, 'response'] = "[EMPTY]"
104
+ continue
105
+
106
+ try:
107
+ response = client.chat.completions.create(
108
+ model="gpt-4o",
109
+ max_tokens=100,
110
+ temperature=self.temperature,
111
+ messages=[
112
+ {
113
+ "role": "system",
114
+ "content": (
115
+ "Adopt the following persona and answer only based on it. "
116
+ "Do not invent details beyond the provided attributes.\n\n"
117
+ f"{backstory}"
118
+ )
119
+ },
120
+ {"role": "user", "content": self.question}
121
+ ]
122
+ )
123
+ results.loc[idx, 'response'] = response.choices[0].message.content.strip()
124
+ except Exception as e:
125
+ results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
126
+
127
+ if progress_callback:
128
+ progress_callback(i + 1, total)
129
+
130
+ return results
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit>=1.30.0
2
+ pandas>=2.0.0
3
+ anthropic>=0.25.0
4
+ openai>=1.0.0