Upload 3 files
Browse files- dashboard.py +912 -0
- dashboard_backend.py +130 -0
- requirements.txt +4 -0
dashboard.py
ADDED
|
@@ -0,0 +1,912 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Silicon Sampling Dashboard
|
| 4 |
+
|
| 5 |
+
Interactive web interface for generating synthetic survey responses.
|
| 6 |
+
Users can input custom questions and get silicon sample data without coding.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
streamlit run dashboard.py
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import streamlit as st
|
| 13 |
+
import pandas as pd
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
import json
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
|
| 18 |
+
# Page configuration
|
| 19 |
+
st.set_page_config(
|
| 20 |
+
page_title="COGbot Dashboard",
|
| 21 |
+
page_icon="🤖",
|
| 22 |
+
layout="wide"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Initialize session state
|
| 26 |
+
if 'results' not in st.session_state:
|
| 27 |
+
st.session_state.results = None
|
| 28 |
+
if 'processing' not in st.session_state:
|
| 29 |
+
st.session_state.processing = False
|
| 30 |
+
if 'mode' not in st.session_state:
|
| 31 |
+
st.session_state.mode = "Response Generation"
|
| 32 |
+
if 'question_text' not in st.session_state:
|
| 33 |
+
st.session_state.question_text = ""
|
| 34 |
+
if 'response_options_text' not in st.session_state:
|
| 35 |
+
st.session_state.response_options_text = ""
|
| 36 |
+
if 'iteration_results' not in st.session_state:
|
| 37 |
+
st.session_state.iteration_results = []
|
| 38 |
+
if 'current_iteration' not in st.session_state:
|
| 39 |
+
st.session_state.current_iteration = 0
|
| 40 |
+
|
| 41 |
+
# Title and description
|
| 42 |
+
st.title("🤖 COGbot Dashboard")
|
| 43 |
+
st.markdown("""
|
| 44 |
+
Generate synthetic survey responses using LLM-based persona simulation.
|
| 45 |
+
""")
|
| 46 |
+
|
| 47 |
+
# Sidebar - Logo and Configuration
|
| 48 |
+
# Display LSE logo at top of sidebar
|
| 49 |
+
logo_path = "LSE_logo.jpg"
|
| 50 |
+
if Path(logo_path).exists():
|
| 51 |
+
st.sidebar.image(logo_path, width=180)
|
| 52 |
+
st.sidebar.markdown("---")
|
| 53 |
+
|
| 54 |
+
st.sidebar.header("⚙️ Configuration")
|
| 55 |
+
|
| 56 |
+
# Data source
|
| 57 |
+
data_source = st.sidebar.radio(
|
| 58 |
+
"Data Source",
|
| 59 |
+
["Default ESS UK (1,286 respondents)", "Upload CSV (not available yet)"]
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Backstory length option
|
| 63 |
+
backstory_length = st.sidebar.radio(
|
| 64 |
+
"Backstory Length",
|
| 65 |
+
["Long (detailed)", "Short (concise)"],
|
| 66 |
+
help="Choose between detailed backstories with full demographic info or concise versions"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
if "Upload CSV" in data_source:
|
| 70 |
+
uploaded_file = st.sidebar.file_uploader(
|
| 71 |
+
"Upload backstories CSV",
|
| 72 |
+
type=['csv'],
|
| 73 |
+
help="CSV must have 'backstory' column"
|
| 74 |
+
)
|
| 75 |
+
if uploaded_file:
|
| 76 |
+
df_backstories = pd.read_csv(uploaded_file)
|
| 77 |
+
else:
|
| 78 |
+
df_backstories = None
|
| 79 |
+
else:
|
| 80 |
+
# Load default ESS data
|
| 81 |
+
default_path = Path("ess_uk_with_backstories.csv")
|
| 82 |
+
if default_path.exists():
|
| 83 |
+
df_backstories = pd.read_csv(default_path)
|
| 84 |
+
else:
|
| 85 |
+
df_backstories = None
|
| 86 |
+
st.sidebar.warning("⚠️ Default file not found: ess_uk_with_backstories.csv")
|
| 87 |
+
|
| 88 |
+
# Show data info
|
| 89 |
+
if df_backstories is not None:
|
| 90 |
+
st.sidebar.success(f"✅ Loaded {len(df_backstories):,} respondents")
|
| 91 |
+
|
| 92 |
+
# Sample size
|
| 93 |
+
max_size = len(df_backstories)
|
| 94 |
+
sample_size = st.sidebar.slider(
|
| 95 |
+
"Sample Size",
|
| 96 |
+
min_value=10,
|
| 97 |
+
max_value=max_size,
|
| 98 |
+
value=min(50, max_size),
|
| 99 |
+
step=10,
|
| 100 |
+
help="Start with small sample for testing"
|
| 101 |
+
)
|
| 102 |
+
else:
|
| 103 |
+
sample_size = 0
|
| 104 |
+
|
| 105 |
+
# Model settings
|
| 106 |
+
st.sidebar.subheader("Model Settings")
|
| 107 |
+
|
| 108 |
+
model_option = st.sidebar.selectbox(
|
| 109 |
+
"Model",
|
| 110 |
+
["Claude (Anthropic)", "GPT-4 (OpenAI)"],
|
| 111 |
+
help="API-based models. Provide your API key below."
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# API key input
|
| 115 |
+
if "Claude" in model_option:
|
| 116 |
+
api_key = st.sidebar.text_input(
|
| 117 |
+
"Anthropic API Key",
|
| 118 |
+
type="password",
|
| 119 |
+
help="Get your key from https://console.anthropic.com/"
|
| 120 |
+
)
|
| 121 |
+
else:
|
| 122 |
+
api_key = st.sidebar.text_input(
|
| 123 |
+
"OpenAI API Key",
|
| 124 |
+
type="password",
|
| 125 |
+
help="Get your key from https://platform.openai.com/"
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
temperature = st.sidebar.slider(
|
| 129 |
+
"Temperature",
|
| 130 |
+
min_value=0.0,
|
| 131 |
+
max_value=1.0,
|
| 132 |
+
value=0.7,
|
| 133 |
+
step=0.1,
|
| 134 |
+
help="Higher = more creative, Lower = more consistent"
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# Main panel - Question configuration
|
| 138 |
+
st.header("📋 Step 1: Configure Question")
|
| 139 |
+
|
| 140 |
+
# Mode selection: Response Generation vs Question Testing
|
| 141 |
+
mode = st.radio(
|
| 142 |
+
"Mode",
|
| 143 |
+
["Response Generation", "Question Testing", "Question Testing (Continuous Improvement)"],
|
| 144 |
+
help="Response Generation: Get synthetic survey responses. Question Testing: Get feedback on question quality. Continuous Improvement: Iteratively improve question through multiple rounds of testing - manually control each iteration."
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
col1, col2 = st.columns([2, 1])
|
| 148 |
+
|
| 149 |
+
with col1:
|
| 150 |
+
# Use next_question from session state if available (from re-run)
|
| 151 |
+
default_question = st.session_state.get('next_question', '')
|
| 152 |
+
if default_question:
|
| 153 |
+
# Clear next_question after using it
|
| 154 |
+
st.session_state.next_question = ''
|
| 155 |
+
|
| 156 |
+
question_text = st.text_area(
|
| 157 |
+
"Survey Question",
|
| 158 |
+
value=default_question,
|
| 159 |
+
height=80,
|
| 160 |
+
placeholder="Enter your survey question here...",
|
| 161 |
+
help="The question your synthetic respondents will answer" if mode == "Response Generation" else "The draft question you want to test for clarity and quality"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
# Add concept description for Question Testing mode
|
| 165 |
+
if "Question Testing" in mode:
|
| 166 |
+
# Use stored concept description if available (persists across re-runs)
|
| 167 |
+
default_concept = st.session_state.get('concept_description', '')
|
| 168 |
+
|
| 169 |
+
concept_description = st.text_area(
|
| 170 |
+
"Concept Description",
|
| 171 |
+
value=default_concept,
|
| 172 |
+
height=100,
|
| 173 |
+
placeholder="Describe what you are trying to measure with this question...",
|
| 174 |
+
help="Describe in as much detail as you can what you are trying to measure with this question. This helps the LLM understand your intent and provide better feedback."
|
| 175 |
+
)
|
| 176 |
+
else:
|
| 177 |
+
concept_description = ""
|
| 178 |
+
|
| 179 |
+
with col2:
|
| 180 |
+
if mode == "Response Generation":
|
| 181 |
+
response_format = st.selectbox(
|
| 182 |
+
"Response Format",
|
| 183 |
+
["Scale (0-10)", "Scale (1-5)", "Multiple Choice", "Yes/No", "Open Text"]
|
| 184 |
+
)
|
| 185 |
+
else: # Question Testing mode
|
| 186 |
+
response_format = "Open Text"
|
| 187 |
+
st.info("📝 Question Testing uses open text responses to gather feedback on question quality.")
|
| 188 |
+
|
| 189 |
+
# Configure prompt based on mode
|
| 190 |
+
# Initialize variables that will be used in preview
|
| 191 |
+
mc_options = ""
|
| 192 |
+
response_options_text = ""
|
| 193 |
+
|
| 194 |
+
if "Question Testing" in mode:
|
| 195 |
+
# Question Testing mode: Create critique prompt
|
| 196 |
+
st.subheader("Response Options/Instructions")
|
| 197 |
+
|
| 198 |
+
# Use next_options from session state if available (from re-run)
|
| 199 |
+
default_options = st.session_state.get('next_options', '')
|
| 200 |
+
if default_options:
|
| 201 |
+
# Clear next_options after using it
|
| 202 |
+
st.session_state.next_options = ''
|
| 203 |
+
|
| 204 |
+
response_options_text = st.text_area(
|
| 205 |
+
"Response Options (if applicable)",
|
| 206 |
+
value=default_options,
|
| 207 |
+
height=100,
|
| 208 |
+
placeholder="e.g., Scale from 0-10 where 0=Not at all, 10=Extremely, or Multiple choice options A, B, C, D",
|
| 209 |
+
help="Include any response options or scales that are part of the question being tested"
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
# Show message if this is a re-run iteration
|
| 213 |
+
if st.session_state.get('show_rerun_message', False):
|
| 214 |
+
st.info(f"🔄 **Iteration {st.session_state.current_iteration + 1}:** Improved question loaded. Click 'Generate Responses' below to test the new version.")
|
| 215 |
+
st.session_state.show_rerun_message = False
|
| 216 |
+
|
| 217 |
+
# Build the testing prompt
|
| 218 |
+
instructions = f"""Please provide feedback on this survey question. Comment on:
|
| 219 |
+
|
| 220 |
+
1. Are there any parts of the question that are ambiguous or unclear?
|
| 221 |
+
2. Are there any parts that are difficult to understand?
|
| 222 |
+
3. Did you have any problems thinking about how to answer?
|
| 223 |
+
4. Are the response options (if provided) appropriate and complete?
|
| 224 |
+
|
| 225 |
+
Provide your feedback in 2-3 sentences, being specific about any issues you identify."""
|
| 226 |
+
|
| 227 |
+
# Automatically enable thematic coding for Question Testing
|
| 228 |
+
enable_thematic_coding = True
|
| 229 |
+
st.info("🔍 Thematic analysis will automatically run to identify common issues in the question.")
|
| 230 |
+
|
| 231 |
+
else:
|
| 232 |
+
# Response Generation mode: Original behavior
|
| 233 |
+
# Scale anchor labels (if scale selected)
|
| 234 |
+
if "Scale" in response_format:
|
| 235 |
+
st.subheader("Scale Labels")
|
| 236 |
+
|
| 237 |
+
if "0-10" in response_format:
|
| 238 |
+
# 10-point scale: just endpoints
|
| 239 |
+
col_low, col_high = st.columns(2)
|
| 240 |
+
with col_low:
|
| 241 |
+
low_label = st.text_input(
|
| 242 |
+
"0 means",
|
| 243 |
+
value="Not at all",
|
| 244 |
+
help="What does the lowest value mean?"
|
| 245 |
+
)
|
| 246 |
+
with col_high:
|
| 247 |
+
high_label = st.text_input(
|
| 248 |
+
"10 means",
|
| 249 |
+
value="Extremely",
|
| 250 |
+
help="What does the highest value mean?"
|
| 251 |
+
)
|
| 252 |
+
instructions = f"Respond with a single integer from 0 to 10, where 0 means '{low_label}' and 10 means '{high_label}'. Only output the number."
|
| 253 |
+
|
| 254 |
+
else: # 1-5 scale: label all 5 points
|
| 255 |
+
label_1 = st.text_input("1 means", value="Strongly disagree")
|
| 256 |
+
label_2 = st.text_input("2 means", value="Disagree")
|
| 257 |
+
label_3 = st.text_input("3 means", value="Neither agree nor disagree")
|
| 258 |
+
label_4 = st.text_input("4 means", value="Agree")
|
| 259 |
+
label_5 = st.text_input("5 means", value="Strongly agree")
|
| 260 |
+
|
| 261 |
+
instructions = f"""Respond with a single integer from 1 to 5 based on these labels:
|
| 262 |
+
1 = {label_1}
|
| 263 |
+
2 = {label_2}
|
| 264 |
+
3 = {label_3}
|
| 265 |
+
4 = {label_4}
|
| 266 |
+
5 = {label_5}
|
| 267 |
+
|
| 268 |
+
Only output the number."""
|
| 269 |
+
else:
|
| 270 |
+
# Non-scale formats
|
| 271 |
+
format_instructions = {
|
| 272 |
+
"Multiple Choice": "Choose one option and respond with only the letter (A, B, C, or D).",
|
| 273 |
+
"Yes/No": "Respond with only 'Yes' or 'No'.",
|
| 274 |
+
"Open Text": "Provide a brief 1-2 sentence response based on your persona."
|
| 275 |
+
}
|
| 276 |
+
instructions = format_instructions.get(response_format, "")
|
| 277 |
+
|
| 278 |
+
# Multiple choice options (if selected)
|
| 279 |
+
if response_format == "Multiple Choice":
|
| 280 |
+
st.subheader("Response Options")
|
| 281 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 282 |
+
with col1:
|
| 283 |
+
option_a = st.text_input("Option A", "Strongly agree")
|
| 284 |
+
with col2:
|
| 285 |
+
option_b = st.text_input("Option B", "Agree")
|
| 286 |
+
with col3:
|
| 287 |
+
option_c = st.text_input("Option C", "Disagree")
|
| 288 |
+
with col4:
|
| 289 |
+
option_d = st.text_input("Option D", "Strongly disagree")
|
| 290 |
+
|
| 291 |
+
mc_options = f"\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}"
|
| 292 |
+
else:
|
| 293 |
+
mc_options = ""
|
| 294 |
+
|
| 295 |
+
# Thematic coding option (if open text selected)
|
| 296 |
+
enable_thematic_coding = False
|
| 297 |
+
if response_format == "Open Text":
|
| 298 |
+
st.subheader("Thematic Coding")
|
| 299 |
+
enable_thematic_coding = st.checkbox(
|
| 300 |
+
"Perform automated thematic analysis after generating responses",
|
| 301 |
+
value=False,
|
| 302 |
+
help="Uses LLM to identify themes, counts, and percentages in open text responses. Runs automatically after response generation."
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# Preview full prompt
|
| 306 |
+
with st.expander("🔍 Preview Full Prompt"):
|
| 307 |
+
st.markdown("**System Prompt:**")
|
| 308 |
+
st.code("""Adopt the following persona and answer only based on it.
|
| 309 |
+
Do not invent details beyond the provided attributes.
|
| 310 |
+
|
| 311 |
+
[Backstory will be inserted here for each respondent]""")
|
| 312 |
+
|
| 313 |
+
st.markdown("**User Prompt:**")
|
| 314 |
+
if mode == "Question Testing":
|
| 315 |
+
# Include response options in the question display for testing
|
| 316 |
+
full_question = f"Question: {question_text}\n"
|
| 317 |
+
if response_options_text.strip():
|
| 318 |
+
full_question += f"\nResponse Options: {response_options_text}\n"
|
| 319 |
+
full_question += f"\n{instructions}"
|
| 320 |
+
else:
|
| 321 |
+
full_question = question_text + mc_options + "\n\n" + instructions
|
| 322 |
+
st.code(full_question)
|
| 323 |
+
|
| 324 |
+
# Generate button
|
| 325 |
+
if mode == "Question Testing":
|
| 326 |
+
st.header("🧪 Step 2: Test Question")
|
| 327 |
+
button_text = "🧪 Test Question with Synthetic Respondents"
|
| 328 |
+
else:
|
| 329 |
+
st.header("🚀 Step 2: Generate Responses")
|
| 330 |
+
button_text = "🎯 Generate Responses"
|
| 331 |
+
|
| 332 |
+
can_generate = (
|
| 333 |
+
df_backstories is not None
|
| 334 |
+
and question_text.strip() != ""
|
| 335 |
+
and not st.session_state.processing
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
if st.button(
|
| 339 |
+
button_text,
|
| 340 |
+
disabled=not can_generate,
|
| 341 |
+
type="primary",
|
| 342 |
+
use_container_width=True
|
| 343 |
+
):
|
| 344 |
+
st.session_state.processing = True
|
| 345 |
+
st.session_state.results = None
|
| 346 |
+
st.session_state.mode = mode # Store mode for results display
|
| 347 |
+
st.session_state.question_text = question_text # Store for thematic analysis
|
| 348 |
+
if mode == "Question Testing":
|
| 349 |
+
st.session_state.response_options_text = response_options_text # Store for improved version
|
| 350 |
+
st.session_state.concept_description = concept_description # Store concept description
|
| 351 |
+
|
| 352 |
+
# Prepare configuration
|
| 353 |
+
config = {
|
| 354 |
+
"question": full_question,
|
| 355 |
+
"temperature": temperature,
|
| 356 |
+
"sample_size": sample_size
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
# Validate API key
|
| 360 |
+
if not api_key:
|
| 361 |
+
st.error(f"⚠️ Please provide your {'Anthropic' if 'Claude' in model_option else 'OpenAI'} API key in the sidebar.")
|
| 362 |
+
st.stop()
|
| 363 |
+
|
| 364 |
+
# Create sampler based on model selection
|
| 365 |
+
if "Claude" in model_option:
|
| 366 |
+
from dashboard_backend import AnthropicSampler
|
| 367 |
+
config["anthropic_api_key"] = api_key
|
| 368 |
+
sampler = AnthropicSampler(config)
|
| 369 |
+
else: # OpenAI
|
| 370 |
+
from dashboard_backend import OpenAISampler
|
| 371 |
+
config["openai_api_key"] = api_key
|
| 372 |
+
sampler = OpenAISampler(config)
|
| 373 |
+
|
| 374 |
+
# Progress bar
|
| 375 |
+
progress_bar = st.progress(0)
|
| 376 |
+
status_text = st.empty()
|
| 377 |
+
|
| 378 |
+
# Sample backstories (random sample)
|
| 379 |
+
df_sample = df_backstories.sample(n=sample_size, random_state=42).copy()
|
| 380 |
+
|
| 381 |
+
# Apply backstory length preference
|
| 382 |
+
if "Short" in backstory_length and 'backstory' in df_sample.columns:
|
| 383 |
+
# Truncate to first 150 characters for short version
|
| 384 |
+
df_sample['backstory'] = df_sample['backstory'].apply(
|
| 385 |
+
lambda x: x[:150] + "..." if isinstance(x, str) and len(x) > 150 else x
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
# Process
|
| 389 |
+
try:
|
| 390 |
+
results = sampler.generate_responses(
|
| 391 |
+
df_sample,
|
| 392 |
+
progress_callback=lambda i, total: (
|
| 393 |
+
progress_bar.progress(i / total),
|
| 394 |
+
status_text.text(f"Processing: {i}/{total} respondents ({100*i/total:.1f}%)")
|
| 395 |
+
)
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
st.session_state.results = results
|
| 399 |
+
st.session_state.processing = False
|
| 400 |
+
st.success(f"✅ Generated {len(results)} responses!")
|
| 401 |
+
st.rerun()
|
| 402 |
+
|
| 403 |
+
except Exception as e:
|
| 404 |
+
st.error(f"❌ Error: {str(e)}")
|
| 405 |
+
st.session_state.processing = False
|
| 406 |
+
|
| 407 |
+
# Show results
|
| 408 |
+
if st.session_state.results is not None:
|
| 409 |
+
st.header("📊 Step 3: Results")
|
| 410 |
+
|
| 411 |
+
results_df = st.session_state.results
|
| 412 |
+
|
| 413 |
+
# Summary stats
|
| 414 |
+
col1, col2, col3 = st.columns(3)
|
| 415 |
+
with col1:
|
| 416 |
+
st.metric("Total Responses", len(results_df))
|
| 417 |
+
with col2:
|
| 418 |
+
valid_responses = results_df['response'].notna().sum()
|
| 419 |
+
st.metric("Valid Responses", valid_responses)
|
| 420 |
+
with col3:
|
| 421 |
+
completion_rate = 100 * valid_responses / len(results_df)
|
| 422 |
+
st.metric("Completion Rate", f"{completion_rate:.1f}%")
|
| 423 |
+
|
| 424 |
+
# Preview
|
| 425 |
+
st.subheader("Preview (First 10 rows)")
|
| 426 |
+
st.dataframe(results_df.head(10), use_container_width=True)
|
| 427 |
+
|
| 428 |
+
# Download
|
| 429 |
+
st.subheader("Download Results")
|
| 430 |
+
|
| 431 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 432 |
+
filename = f"silicon_sample_{timestamp}.csv"
|
| 433 |
+
|
| 434 |
+
csv = results_df.to_csv(index=False)
|
| 435 |
+
st.download_button(
|
| 436 |
+
label="📥 Download CSV",
|
| 437 |
+
data=csv,
|
| 438 |
+
file_name=filename,
|
| 439 |
+
mime="text/csv",
|
| 440 |
+
use_container_width=True
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
# Response distribution and statistics
|
| 444 |
+
if response_format in ["Scale (0-10)", "Scale (1-5)", "Yes/No", "Multiple Choice"]:
|
| 445 |
+
st.subheader(f"Response Distribution: {question_text}")
|
| 446 |
+
try:
|
| 447 |
+
# For numeric formats, convert to numbers
|
| 448 |
+
if response_format.startswith("Scale"):
|
| 449 |
+
numeric_responses = pd.to_numeric(results_df['response'], errors='coerce')
|
| 450 |
+
valid_responses = numeric_responses.dropna()
|
| 451 |
+
elif response_format == "Yes/No":
|
| 452 |
+
# For Yes/No, show frequency distribution
|
| 453 |
+
valid_responses = results_df['response'].dropna()
|
| 454 |
+
elif response_format == "Multiple Choice":
|
| 455 |
+
# For Multiple Choice, show frequency distribution
|
| 456 |
+
valid_responses = results_df['response'].dropna()
|
| 457 |
+
|
| 458 |
+
if len(valid_responses) > 0:
|
| 459 |
+
# Show statistics for numeric scales
|
| 460 |
+
if response_format.startswith("Scale"):
|
| 461 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 462 |
+
|
| 463 |
+
with col1:
|
| 464 |
+
st.metric("Mean", f"{valid_responses.mean():.2f}")
|
| 465 |
+
with col2:
|
| 466 |
+
st.metric("Median", f"{valid_responses.median():.2f}")
|
| 467 |
+
with col3:
|
| 468 |
+
st.metric("Std Dev", f"{valid_responses.std():.2f}")
|
| 469 |
+
with col4:
|
| 470 |
+
mode_val = valid_responses.mode()
|
| 471 |
+
mode_display = f"{mode_val.iloc[0]:.0f}" if len(mode_val) > 0 else "N/A"
|
| 472 |
+
st.metric("Mode", mode_display)
|
| 473 |
+
with col5:
|
| 474 |
+
st.metric("Valid N", f"{len(valid_responses)}")
|
| 475 |
+
|
| 476 |
+
# Distribution chart
|
| 477 |
+
st.bar_chart(pd.to_numeric(results_df['response'], errors='coerce').value_counts().sort_index())
|
| 478 |
+
|
| 479 |
+
# Show frequency counts for categorical
|
| 480 |
+
else:
|
| 481 |
+
value_counts = valid_responses.value_counts()
|
| 482 |
+
|
| 483 |
+
# Display as metrics
|
| 484 |
+
cols = st.columns(min(len(value_counts), 5))
|
| 485 |
+
for idx, (value, count) in enumerate(value_counts.items()):
|
| 486 |
+
if idx < 5: # Limit to 5 columns
|
| 487 |
+
with cols[idx]:
|
| 488 |
+
pct = 100 * count / len(valid_responses)
|
| 489 |
+
st.metric(f"{value}", f"{count} ({pct:.1f}%)")
|
| 490 |
+
|
| 491 |
+
# Also show total N
|
| 492 |
+
st.metric("Total Valid N", f"{len(valid_responses)}")
|
| 493 |
+
|
| 494 |
+
# Distribution chart
|
| 495 |
+
st.bar_chart(value_counts)
|
| 496 |
+
else:
|
| 497 |
+
st.info("No valid responses to analyze")
|
| 498 |
+
except Exception as e:
|
| 499 |
+
st.info(f"Could not generate statistics: {str(e)}")
|
| 500 |
+
|
| 501 |
+
# Thematic coding for open text responses
|
| 502 |
+
elif response_format == "Open Text" and enable_thematic_coding:
|
| 503 |
+
# Get the stored mode and question text
|
| 504 |
+
stored_mode = st.session_state.get('mode', 'Response Generation')
|
| 505 |
+
stored_question = st.session_state.get('question_text', question_text)
|
| 506 |
+
|
| 507 |
+
# Different heading based on mode
|
| 508 |
+
if stored_mode == "Question Testing":
|
| 509 |
+
st.subheader(f"Question Testing Results: {stored_question}")
|
| 510 |
+
else:
|
| 511 |
+
st.subheader(f"Thematic Analysis: {stored_question}")
|
| 512 |
+
|
| 513 |
+
# Get valid text responses
|
| 514 |
+
valid_responses = results_df['response'].dropna()
|
| 515 |
+
valid_responses = valid_responses[valid_responses.str.strip() != ""]
|
| 516 |
+
|
| 517 |
+
if len(valid_responses) > 0:
|
| 518 |
+
st.info(f"Analyzing {len(valid_responses)} open text responses...")
|
| 519 |
+
|
| 520 |
+
# Automatically run thematic coding
|
| 521 |
+
if True: # Changed from button to automatic
|
| 522 |
+
with st.spinner("Analyzing themes with LLM..."):
|
| 523 |
+
try:
|
| 524 |
+
# Prepare responses for analysis
|
| 525 |
+
responses_text = "\n\n".join([f"Response {i+1}: {resp}" for i, resp in enumerate(valid_responses)])
|
| 526 |
+
|
| 527 |
+
# Create thematic analysis prompt - different for Question Testing
|
| 528 |
+
if stored_mode == "Question Testing":
|
| 529 |
+
coding_prompt = f"""You are a survey methodology expert analyzing feedback from respondents who tested a draft survey question.
|
| 530 |
+
|
| 531 |
+
Question being tested: "{stored_question}"
|
| 532 |
+
|
| 533 |
+
Here is the feedback from respondents (total of {len(valid_responses)} responses):
|
| 534 |
+
|
| 535 |
+
{responses_text}
|
| 536 |
+
|
| 537 |
+
CRITICAL INSTRUCTIONS:
|
| 538 |
+
- DO NOT list individual responses
|
| 539 |
+
- DO NOT copy feedback verbatim
|
| 540 |
+
- DO NOT fabricate or hallucinate problems that aren't genuinely present in the feedback
|
| 541 |
+
- DO NOT feel pressured to find a specific number of issues
|
| 542 |
+
- ONLY report genuine problems, ambiguities, or concerns that respondents actually raised
|
| 543 |
+
- If the question and response scales are clear and well-designed, say so - it's perfectly acceptable to find zero issues
|
| 544 |
+
- DO group similar issues together and count how many respondents mentioned each
|
| 545 |
+
|
| 546 |
+
Task:
|
| 547 |
+
1. Read ALL responses carefully and identify ONLY genuine recurring issues and concerns
|
| 548 |
+
2. If respondents found the question clear and had no problems, state that the question appears well-designed
|
| 549 |
+
3. Group similar problems together (e.g., all mentions of "unclear terminology" should be one issue)
|
| 550 |
+
4. For each distinct issue that was genuinely raised, provide:
|
| 551 |
+
- Issue name (2-4 words, e.g., "Ambiguous wording", "Unclear scale", "Missing context")
|
| 552 |
+
- Brief description (1 sentence explaining the specific problem)
|
| 553 |
+
- Count of how many respondents mentioned this issue
|
| 554 |
+
- Percentage of total respondents
|
| 555 |
+
|
| 556 |
+
REQUIRED FORMAT (follow exactly):
|
| 557 |
+
|
| 558 |
+
ISSUE 1: [Name]
|
| 559 |
+
DESCRIPTION: [One sentence explaining the problem]
|
| 560 |
+
COUNT: [Number of respondents who mentioned this]
|
| 561 |
+
PERCENTAGE: [Percentage]
|
| 562 |
+
|
| 563 |
+
ISSUE 2: [Name]
|
| 564 |
+
DESCRIPTION: [One sentence explaining the problem]
|
| 565 |
+
COUNT: [Number]
|
| 566 |
+
PERCENTAGE: [Percentage]
|
| 567 |
+
|
| 568 |
+
[Continue for all distinct issues]
|
| 569 |
+
|
| 570 |
+
SUMMARY:
|
| 571 |
+
[If issues were identified: 2-3 sentence summary of the most critical problems requiring attention]
|
| 572 |
+
[If no significant issues were found: Statement confirming the question appears clear and well-designed based on respondent feedback]
|
| 573 |
+
|
| 574 |
+
Example of CORRECT output when issues are found:
|
| 575 |
+
ISSUE 1: Ambiguous term "partner"
|
| 576 |
+
DESCRIPTION: Respondents were unclear whether "partner" refers to romantic partner, business partner, or roommate
|
| 577 |
+
COUNT: 15
|
| 578 |
+
PERCENTAGE: 75%
|
| 579 |
+
|
| 580 |
+
ISSUE 2: Vague timeframe
|
| 581 |
+
DESCRIPTION: The phrase "these days" lacks specificity about the time period being asked about
|
| 582 |
+
COUNT: 8
|
| 583 |
+
PERCENTAGE: 40%
|
| 584 |
+
|
| 585 |
+
Example of CORRECT output when no issues are found:
|
| 586 |
+
SUMMARY:
|
| 587 |
+
Based on the respondent feedback, the question appears well-designed and clear. Respondents understood what was being asked, found the wording unambiguous, and had no difficulty formulating responses. No significant issues or concerns were raised that would require revision."""
|
| 588 |
+
else:
|
| 589 |
+
coding_prompt = f"""You are a qualitative researcher conducting thematic analysis on open-ended survey responses.
|
| 590 |
+
|
| 591 |
+
Question asked: "{stored_question}"
|
| 592 |
+
|
| 593 |
+
Here are all the responses:
|
| 594 |
+
|
| 595 |
+
{responses_text}
|
| 596 |
+
|
| 597 |
+
Task:
|
| 598 |
+
1. Identify the main themes present in these responses (aim for 4-8 themes)
|
| 599 |
+
2. For each theme, provide:
|
| 600 |
+
- Theme name (2-4 words)
|
| 601 |
+
- Brief description (1 sentence)
|
| 602 |
+
- Count of how many responses express this theme
|
| 603 |
+
- Percentage of total responses
|
| 604 |
+
|
| 605 |
+
Format your response as:
|
| 606 |
+
THEME: [Name]
|
| 607 |
+
DESCRIPTION: [Description]
|
| 608 |
+
COUNT: [Number]
|
| 609 |
+
PERCENTAGE: [Percentage]
|
| 610 |
+
|
| 611 |
+
[Repeat for each theme]"""
|
| 612 |
+
|
| 613 |
+
# Send to API for analysis
|
| 614 |
+
if "Claude" in model_option:
|
| 615 |
+
import anthropic
|
| 616 |
+
client = anthropic.Anthropic(api_key=api_key)
|
| 617 |
+
message = client.messages.create(
|
| 618 |
+
model="claude-3-5-sonnet-20241022",
|
| 619 |
+
max_tokens=2000,
|
| 620 |
+
temperature=0.3,
|
| 621 |
+
system="You are a qualitative research expert analyzing survey responses.",
|
| 622 |
+
messages=[{"role": "user", "content": coding_prompt}]
|
| 623 |
+
)
|
| 624 |
+
analysis_result = message.content[0].text.strip()
|
| 625 |
+
else: # OpenAI
|
| 626 |
+
from openai import OpenAI
|
| 627 |
+
client = OpenAI(api_key=api_key)
|
| 628 |
+
response = client.chat.completions.create(
|
| 629 |
+
model="gpt-4o",
|
| 630 |
+
max_tokens=2000,
|
| 631 |
+
temperature=0.3,
|
| 632 |
+
messages=[
|
| 633 |
+
{"role": "system", "content": "You are a qualitative research expert analyzing survey responses."},
|
| 634 |
+
{"role": "user", "content": coding_prompt}
|
| 635 |
+
]
|
| 636 |
+
)
|
| 637 |
+
analysis_result = response.choices[0].message.content.strip()
|
| 638 |
+
|
| 639 |
+
# Display results
|
| 640 |
+
st.markdown("### Thematic Coding Results")
|
| 641 |
+
st.text_area("Analysis", analysis_result, height=400)
|
| 642 |
+
|
| 643 |
+
# For Question Testing mode, add problem summary and offer to suggest improved wording
|
| 644 |
+
if "Question Testing" in stored_mode:
|
| 645 |
+
# Parse the analysis to extract problem counts
|
| 646 |
+
import re
|
| 647 |
+
|
| 648 |
+
# Extract counts from the analysis (looks for patterns like "Count: X" or "X respondents" or "X mentions")
|
| 649 |
+
count_patterns = [
|
| 650 |
+
r'Count:\s*(\d+)',
|
| 651 |
+
r'(\d+)\s+respondents?',
|
| 652 |
+
r'(\d+)\s+mentions?',
|
| 653 |
+
r'(\d+)/\d+', # X/total format
|
| 654 |
+
r'\((\d+)\s+respondents?\)',
|
| 655 |
+
]
|
| 656 |
+
|
| 657 |
+
problem_counts = []
|
| 658 |
+
for pattern in count_patterns:
|
| 659 |
+
matches = re.findall(pattern, analysis_result, re.IGNORECASE)
|
| 660 |
+
if matches:
|
| 661 |
+
problem_counts.extend([int(m) for m in matches])
|
| 662 |
+
|
| 663 |
+
# Calculate summary statistics
|
| 664 |
+
if problem_counts:
|
| 665 |
+
num_problems = len(problem_counts) # a) distinct problems
|
| 666 |
+
total_mentions = sum(problem_counts) # c) total problem mentions
|
| 667 |
+
n_respondents = len(valid_responses)
|
| 668 |
+
avg_problems_per_respondent = total_mentions / n_respondents if n_respondents > 0 else 0 # d) average
|
| 669 |
+
|
| 670 |
+
# Display problem summary
|
| 671 |
+
st.markdown("---")
|
| 672 |
+
st.markdown("### Problem Summary")
|
| 673 |
+
|
| 674 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 675 |
+
with col1:
|
| 676 |
+
st.metric("Distinct Problems", num_problems)
|
| 677 |
+
with col2:
|
| 678 |
+
st.metric("Total Mentions", total_mentions)
|
| 679 |
+
with col3:
|
| 680 |
+
st.metric("Respondents", n_respondents)
|
| 681 |
+
with col4:
|
| 682 |
+
st.metric("Avg Problems/Respondent", f"{avg_problems_per_respondent:.2f}")
|
| 683 |
+
|
| 684 |
+
# Show breakdown
|
| 685 |
+
with st.expander("📊 Problem Breakdown"):
|
| 686 |
+
st.markdown("**Problems by frequency:**")
|
| 687 |
+
for i, count in enumerate(sorted(problem_counts, reverse=True), 1):
|
| 688 |
+
pct = (count / n_respondents * 100) if n_respondents > 0 else 0
|
| 689 |
+
st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
|
| 690 |
+
|
| 691 |
+
st.markdown("---")
|
| 692 |
+
st.markdown("### Suggest Improved Question Wording")
|
| 693 |
+
|
| 694 |
+
if st.button("✨ Generate Improved Question", type="secondary"):
|
| 695 |
+
with st.spinner("Generating improved question wording..."):
|
| 696 |
+
try:
|
| 697 |
+
# Get response options and concept description if they exist
|
| 698 |
+
stored_options = st.session_state.get('response_options_text', '')
|
| 699 |
+
stored_concept = st.session_state.get('concept_description', '')
|
| 700 |
+
|
| 701 |
+
# Create improvement prompt
|
| 702 |
+
# Build the sections separately to avoid f-string backslash issue
|
| 703 |
+
options_section = f"\nOriginal Response Options: {stored_options}\n" if stored_options else ""
|
| 704 |
+
concept_section = f"\nConcept Being Measured: {stored_concept}\n" if stored_concept else ""
|
| 705 |
+
improved_options_section = "\n\nIMPROVED RESPONSE OPTIONS:\n[Your improved options]\n" if stored_options else ""
|
| 706 |
+
|
| 707 |
+
improvement_prompt = f"""You are a survey methodology expert. Based on the respondent feedback analysis below, suggest an improved version of the survey question that addresses the identified issues.
|
| 708 |
+
|
| 709 |
+
Original Question: "{stored_question}"{options_section}{concept_section}
|
| 710 |
+
|
| 711 |
+
Respondent Feedback Analysis:
|
| 712 |
+
{analysis_result}
|
| 713 |
+
|
| 714 |
+
Task:
|
| 715 |
+
1. Identify the main problems with the current question based on respondent feedback
|
| 716 |
+
2. Provide an improved version of the question that addresses these problems
|
| 717 |
+
3. If response options were provided, suggest improved response options as well
|
| 718 |
+
4. Explain specifically how the new version improves on the original based on the respondent feedback
|
| 719 |
+
|
| 720 |
+
Format your response as:
|
| 721 |
+
|
| 722 |
+
PROBLEMS IDENTIFIED:
|
| 723 |
+
[List the specific problems with the current question based on respondent feedback]
|
| 724 |
+
|
| 725 |
+
IMPROVED QUESTION:
|
| 726 |
+
[Your improved question text]{improved_options_section}
|
| 727 |
+
|
| 728 |
+
HOW THE NEW VERSION IMPROVES:
|
| 729 |
+
[Explain how each change addresses the problems identified in respondent feedback]"""
|
| 730 |
+
|
| 731 |
+
# Send to API for improvement
|
| 732 |
+
if "Claude" in model_option:
|
| 733 |
+
import anthropic
|
| 734 |
+
client = anthropic.Anthropic(api_key=api_key)
|
| 735 |
+
message = client.messages.create(
|
| 736 |
+
model="claude-3-5-sonnet-20241022",
|
| 737 |
+
max_tokens=1000,
|
| 738 |
+
temperature=0.3,
|
| 739 |
+
system="You are a survey methodology expert specializing in question wording and design.",
|
| 740 |
+
messages=[{"role": "user", "content": improvement_prompt}]
|
| 741 |
+
)
|
| 742 |
+
improvement_result = message.content[0].text.strip()
|
| 743 |
+
else: # OpenAI
|
| 744 |
+
from openai import OpenAI
|
| 745 |
+
client = OpenAI(api_key=api_key)
|
| 746 |
+
response = client.chat.completions.create(
|
| 747 |
+
model="gpt-4o",
|
| 748 |
+
max_tokens=1000,
|
| 749 |
+
temperature=0.3,
|
| 750 |
+
messages=[
|
| 751 |
+
{"role": "system", "content": "You are a survey methodology expert specializing in question wording and design."},
|
| 752 |
+
{"role": "user", "content": improvement_prompt}
|
| 753 |
+
]
|
| 754 |
+
)
|
| 755 |
+
improvement_result = response.choices[0].message.content.strip()
|
| 756 |
+
|
| 757 |
+
# Display improved version
|
| 758 |
+
st.markdown("### Improved Question Suggestion")
|
| 759 |
+
st.text_area("Suggested Improvements", improvement_result, height=300)
|
| 760 |
+
|
| 761 |
+
# Store improvement result for potential re-run
|
| 762 |
+
st.session_state['last_improvement'] = improvement_result
|
| 763 |
+
|
| 764 |
+
# Store current iteration results for comparison
|
| 765 |
+
if problem_counts:
|
| 766 |
+
iteration_data = {
|
| 767 |
+
'iteration': st.session_state.current_iteration,
|
| 768 |
+
'question': stored_question,
|
| 769 |
+
'response_options': stored_options,
|
| 770 |
+
'num_problems': num_problems,
|
| 771 |
+
'total_mentions': total_mentions,
|
| 772 |
+
'n_respondents': n_respondents,
|
| 773 |
+
'avg_problems': avg_problems_per_respondent,
|
| 774 |
+
'problem_counts': problem_counts
|
| 775 |
+
}
|
| 776 |
+
# Only add if this iteration isn't already stored
|
| 777 |
+
if not any(d['iteration'] == st.session_state.current_iteration for d in st.session_state.iteration_results):
|
| 778 |
+
st.session_state.iteration_results.append(iteration_data)
|
| 779 |
+
|
| 780 |
+
st.info("💡 Review the suggested improvements and adapt them as needed for your research context.")
|
| 781 |
+
|
| 782 |
+
# Add re-run button for Question Testing mode
|
| 783 |
+
if "Question Testing" in stored_mode:
|
| 784 |
+
st.markdown("---")
|
| 785 |
+
if st.button("🔄 Re-run COGbot on Improved Question", type="primary"):
|
| 786 |
+
# Get improvement result from session state (more reliable than local variable)
|
| 787 |
+
stored_improvement = st.session_state.get('last_improvement', improvement_result)
|
| 788 |
+
|
| 789 |
+
if not stored_improvement:
|
| 790 |
+
st.error("No improvement suggestion found. Please click 'Generate Improved Question' first.")
|
| 791 |
+
else:
|
| 792 |
+
# Extract improved question from the result
|
| 793 |
+
import re
|
| 794 |
+
|
| 795 |
+
# Try multiple patterns to extract improved question
|
| 796 |
+
patterns = [
|
| 797 |
+
r'IMPROVED QUESTION:\s*\n+([^\n].*?)(?:\n\n+IMPROVED RESPONSE OPTIONS:|\n\n+HOW THE NEW VERSION IMPROVES:|$)',
|
| 798 |
+
r'IMPROVED QUESTION:\s*\n+([^\n][^\n]+)', # Just get first line after
|
| 799 |
+
r'improved question[:\s]+([^\n]+)', # More flexible
|
| 800 |
+
]
|
| 801 |
+
|
| 802 |
+
new_question = None
|
| 803 |
+
for pattern in patterns:
|
| 804 |
+
match = re.search(pattern, stored_improvement, re.DOTALL | re.IGNORECASE)
|
| 805 |
+
if match:
|
| 806 |
+
new_question = match.group(1).strip()
|
| 807 |
+
# Remove any leading quotes or markers
|
| 808 |
+
new_question = new_question.strip('"\'')
|
| 809 |
+
if len(new_question) > 10: # Valid question should be longer than 10 chars
|
| 810 |
+
break
|
| 811 |
+
|
| 812 |
+
if new_question:
|
| 813 |
+
# Try to extract improved response options if present
|
| 814 |
+
options_match = re.search(r'IMPROVED RESPONSE OPTIONS:\s*\n+(.*?)(?:\n\n+HOW THE NEW VERSION IMPROVES:|$)',
|
| 815 |
+
stored_improvement, re.DOTALL | re.IGNORECASE)
|
| 816 |
+
new_options = options_match.group(1).strip() if options_match else stored_options
|
| 817 |
+
|
| 818 |
+
# Debug: Show what was extracted
|
| 819 |
+
st.info(f"✅ Extracted question: {new_question[:100]}...")
|
| 820 |
+
|
| 821 |
+
# Store the new question and options for next run FIRST
|
| 822 |
+
st.session_state.next_question = new_question
|
| 823 |
+
st.session_state.next_options = new_options
|
| 824 |
+
|
| 825 |
+
# Increment iteration counter
|
| 826 |
+
st.session_state.current_iteration += 1
|
| 827 |
+
|
| 828 |
+
# Clear old results to force regeneration
|
| 829 |
+
st.session_state.results = None
|
| 830 |
+
if 'last_improvement' in st.session_state:
|
| 831 |
+
del st.session_state['last_improvement']
|
| 832 |
+
|
| 833 |
+
# Set flag to show message after rerun
|
| 834 |
+
st.session_state.show_rerun_message = True
|
| 835 |
+
|
| 836 |
+
# Immediately rerun
|
| 837 |
+
st.rerun()
|
| 838 |
+
else:
|
| 839 |
+
st.error("❌ Could not extract improved question from the output.")
|
| 840 |
+
st.warning("💡 Please manually copy the improved question and paste it into the question box above.")
|
| 841 |
+
|
| 842 |
+
except Exception as e:
|
| 843 |
+
st.error(f"Error generating improved question: {str(e)}")
|
| 844 |
+
|
| 845 |
+
except Exception as e:
|
| 846 |
+
st.error(f"Error during thematic analysis: {str(e)}")
|
| 847 |
+
else:
|
| 848 |
+
st.info("No valid open text responses to analyze")
|
| 849 |
+
|
| 850 |
+
# Display iteration comparison table for Question Testing mode
|
| 851 |
+
if len(st.session_state.iteration_results) > 0 and "Question Testing" in st.session_state.get('mode', ''):
|
| 852 |
+
st.markdown("---")
|
| 853 |
+
st.markdown("## 📊 Iteration Comparison")
|
| 854 |
+
st.markdown(f"**Total iterations completed:** {len(st.session_state.iteration_results)}")
|
| 855 |
+
|
| 856 |
+
# Create comparison table
|
| 857 |
+
comparison_data = []
|
| 858 |
+
for iteration in st.session_state.iteration_results:
|
| 859 |
+
comparison_data.append({
|
| 860 |
+
"Iteration": iteration['iteration'] + 1,
|
| 861 |
+
"Question": iteration['question'][:100] + "..." if len(iteration['question']) > 100 else iteration['question'],
|
| 862 |
+
"# Problems": iteration['num_problems'],
|
| 863 |
+
"Total Mentions": iteration['total_mentions'],
|
| 864 |
+
"Respondents": iteration['n_respondents'],
|
| 865 |
+
"Avg Problems/Resp": f"{iteration['avg_problems']:.2f}"
|
| 866 |
+
})
|
| 867 |
+
|
| 868 |
+
if comparison_data:
|
| 869 |
+
import pandas as pd
|
| 870 |
+
df_comparison = pd.DataFrame(comparison_data)
|
| 871 |
+
st.dataframe(df_comparison, use_container_width=True)
|
| 872 |
+
|
| 873 |
+
# Show detailed stats for each iteration
|
| 874 |
+
with st.expander("🔍 View Detailed Stats for Each Iteration"):
|
| 875 |
+
for iteration in st.session_state.iteration_results:
|
| 876 |
+
st.markdown(f"### Iteration {iteration['iteration'] + 1}")
|
| 877 |
+
st.markdown(f"**Question:** {iteration['question']}")
|
| 878 |
+
if iteration['response_options']:
|
| 879 |
+
st.markdown(f"**Response Options:** {iteration['response_options']}")
|
| 880 |
+
|
| 881 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 882 |
+
with col1:
|
| 883 |
+
st.metric("Distinct Problems", iteration['num_problems'])
|
| 884 |
+
with col2:
|
| 885 |
+
st.metric("Total Mentions", iteration['total_mentions'])
|
| 886 |
+
with col3:
|
| 887 |
+
st.metric("Respondents", iteration['n_respondents'])
|
| 888 |
+
with col4:
|
| 889 |
+
st.metric("Avg Problems/Respondent", f"{iteration['avg_problems']:.2f}")
|
| 890 |
+
|
| 891 |
+
st.markdown("**Problem Breakdown:**")
|
| 892 |
+
for i, count in enumerate(sorted(iteration['problem_counts'], reverse=True), 1):
|
| 893 |
+
pct = (count / iteration['n_respondents'] * 100) if iteration['n_respondents'] > 0 else 0
|
| 894 |
+
st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
|
| 895 |
+
st.markdown("---")
|
| 896 |
+
|
| 897 |
+
# Add reset button
|
| 898 |
+
if st.button("🔄 Reset Iterations", help="Clear all iteration history and start fresh"):
|
| 899 |
+
st.session_state.iteration_results = []
|
| 900 |
+
st.session_state.current_iteration = 0
|
| 901 |
+
st.session_state.next_question = ''
|
| 902 |
+
st.session_state.next_options = ''
|
| 903 |
+
st.success("✅ Iteration history cleared!")
|
| 904 |
+
st.rerun()
|
| 905 |
+
|
| 906 |
+
# Footer
|
| 907 |
+
st.sidebar.markdown("---")
|
| 908 |
+
st.sidebar.markdown("""
|
| 909 |
+
**Need Help?**
|
| 910 |
+
- [Documentation](WINSTON_README.md)
|
| 911 |
+
- [GitHub](https://github.com/PatrickSturgis/Silicon_samples)
|
| 912 |
+
""")
|
dashboard_backend.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Dashboard Backend - API Only (HF Spaces)
|
| 4 |
+
|
| 5 |
+
Handles LLM querying via Anthropic and OpenAI APIs.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from typing import Callable, Optional
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class AnthropicSampler:
|
| 13 |
+
"""
|
| 14 |
+
Anthropic Claude API sampler for validation studies
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, config: dict):
|
| 18 |
+
self.config = config
|
| 19 |
+
self.api_key = config.get('anthropic_api_key')
|
| 20 |
+
self.temperature = config.get('temperature', 0.7)
|
| 21 |
+
self.question = config.get('question', '')
|
| 22 |
+
|
| 23 |
+
def generate_responses(
|
| 24 |
+
self,
|
| 25 |
+
df: pd.DataFrame,
|
| 26 |
+
progress_callback=None
|
| 27 |
+
) -> pd.DataFrame:
|
| 28 |
+
"""Generate responses using Claude API"""
|
| 29 |
+
import anthropic
|
| 30 |
+
|
| 31 |
+
if 'backstory' not in df.columns:
|
| 32 |
+
raise ValueError("DataFrame must have 'backstory' column")
|
| 33 |
+
|
| 34 |
+
client = anthropic.Anthropic(api_key=self.api_key)
|
| 35 |
+
|
| 36 |
+
results = df.copy()
|
| 37 |
+
results['response'] = ""
|
| 38 |
+
total = len(df)
|
| 39 |
+
|
| 40 |
+
for i, (idx, row) in enumerate(df.iterrows()):
|
| 41 |
+
backstory = row['backstory']
|
| 42 |
+
|
| 43 |
+
if pd.isna(backstory) or str(backstory).strip() == "":
|
| 44 |
+
results.loc[idx, 'response'] = "[EMPTY]"
|
| 45 |
+
continue
|
| 46 |
+
|
| 47 |
+
try:
|
| 48 |
+
message = client.messages.create(
|
| 49 |
+
model="claude-3-5-sonnet-20241022",
|
| 50 |
+
max_tokens=100,
|
| 51 |
+
temperature=self.temperature,
|
| 52 |
+
system=(
|
| 53 |
+
"Adopt the following persona and answer only based on it. "
|
| 54 |
+
"Do not invent details beyond the provided attributes.\n\n"
|
| 55 |
+
f"{backstory}"
|
| 56 |
+
),
|
| 57 |
+
messages=[
|
| 58 |
+
{"role": "user", "content": self.question}
|
| 59 |
+
]
|
| 60 |
+
)
|
| 61 |
+
results.loc[idx, 'response'] = message.content[0].text.strip()
|
| 62 |
+
except Exception as e:
|
| 63 |
+
results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
|
| 64 |
+
|
| 65 |
+
if progress_callback:
|
| 66 |
+
progress_callback(i + 1, total)
|
| 67 |
+
|
| 68 |
+
return results
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class OpenAISampler:
|
| 72 |
+
"""
|
| 73 |
+
OpenAI ChatGPT API sampler for validation studies
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
def __init__(self, config: dict):
|
| 77 |
+
self.config = config
|
| 78 |
+
self.api_key = config.get('openai_api_key')
|
| 79 |
+
self.temperature = config.get('temperature', 0.7)
|
| 80 |
+
self.question = config.get('question', '')
|
| 81 |
+
|
| 82 |
+
def generate_responses(
|
| 83 |
+
self,
|
| 84 |
+
df: pd.DataFrame,
|
| 85 |
+
progress_callback=None
|
| 86 |
+
) -> pd.DataFrame:
|
| 87 |
+
"""Generate responses using ChatGPT API"""
|
| 88 |
+
from openai import OpenAI
|
| 89 |
+
|
| 90 |
+
if 'backstory' not in df.columns:
|
| 91 |
+
raise ValueError("DataFrame must have 'backstory' column")
|
| 92 |
+
|
| 93 |
+
client = OpenAI(api_key=self.api_key)
|
| 94 |
+
|
| 95 |
+
results = df.copy()
|
| 96 |
+
results['response'] = ""
|
| 97 |
+
total = len(df)
|
| 98 |
+
|
| 99 |
+
for i, (idx, row) in enumerate(df.iterrows()):
|
| 100 |
+
backstory = row['backstory']
|
| 101 |
+
|
| 102 |
+
if pd.isna(backstory) or str(backstory).strip() == "":
|
| 103 |
+
results.loc[idx, 'response'] = "[EMPTY]"
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
response = client.chat.completions.create(
|
| 108 |
+
model="gpt-4o",
|
| 109 |
+
max_tokens=100,
|
| 110 |
+
temperature=self.temperature,
|
| 111 |
+
messages=[
|
| 112 |
+
{
|
| 113 |
+
"role": "system",
|
| 114 |
+
"content": (
|
| 115 |
+
"Adopt the following persona and answer only based on it. "
|
| 116 |
+
"Do not invent details beyond the provided attributes.\n\n"
|
| 117 |
+
f"{backstory}"
|
| 118 |
+
)
|
| 119 |
+
},
|
| 120 |
+
{"role": "user", "content": self.question}
|
| 121 |
+
]
|
| 122 |
+
)
|
| 123 |
+
results.loc[idx, 'response'] = response.choices[0].message.content.strip()
|
| 124 |
+
except Exception as e:
|
| 125 |
+
results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
|
| 126 |
+
|
| 127 |
+
if progress_callback:
|
| 128 |
+
progress_callback(i + 1, total)
|
| 129 |
+
|
| 130 |
+
return results
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit>=1.30.0
|
| 2 |
+
pandas>=2.0.0
|
| 3 |
+
anthropic>=0.25.0
|
| 4 |
+
openai>=1.0.0
|