Spaces:

Patricksturg
/

silicon-sampling-dashboard

Running

App Files Files Community

Patricksturg commited on Dec 4, 2025

Commit

d3fd0f4

verified ·

1 Parent(s): f1861eb

Upload 3 files

Browse files

Files changed (3) hide show

dashboard.py +912 -0
dashboard_backend.py +130 -0
requirements.txt +4 -0

dashboard.py ADDED Viewed

	@@ -0,0 +1,912 @@

+#!/usr/bin/env python3
+"""
+Silicon Sampling Dashboard
+Interactive web interface for generating synthetic survey responses.
+Users can input custom questions and get silicon sample data without coding.
+Usage:
+    streamlit run dashboard.py
+"""
+import streamlit as st
+import pandas as pd
+from pathlib import Path
+import json
+from datetime import datetime
+# Page configuration
+st.set_page_config(
+    page_title="COGbot Dashboard",
+    page_icon="🤖",
+    layout="wide"
+)
+# Initialize session state
+if 'results' not in st.session_state:
+    st.session_state.results = None
+if 'processing' not in st.session_state:
+    st.session_state.processing = False
+if 'mode' not in st.session_state:
+    st.session_state.mode = "Response Generation"
+if 'question_text' not in st.session_state:
+    st.session_state.question_text = ""
+if 'response_options_text' not in st.session_state:
+    st.session_state.response_options_text = ""
+if 'iteration_results' not in st.session_state:
+    st.session_state.iteration_results = []
+if 'current_iteration' not in st.session_state:
+    st.session_state.current_iteration = 0
+# Title and description
+st.title("🤖 COGbot Dashboard")
+st.markdown("""
+Generate synthetic survey responses using LLM-based persona simulation.
+""")
+# Sidebar - Logo and Configuration
+# Display LSE logo at top of sidebar
+logo_path = "LSE_logo.jpg"
+if Path(logo_path).exists():
+    st.sidebar.image(logo_path, width=180)
+    st.sidebar.markdown("---")
+st.sidebar.header("⚙️ Configuration")
+# Data source
+data_source = st.sidebar.radio(
+    "Data Source",
+    ["Default ESS UK (1,286 respondents)", "Upload CSV (not available yet)"]
+)
+# Backstory length option
+backstory_length = st.sidebar.radio(
+    "Backstory Length",
+    ["Long (detailed)", "Short (concise)"],
+    help="Choose between detailed backstories with full demographic info or concise versions"
+)
+if "Upload CSV" in data_source:
+    uploaded_file = st.sidebar.file_uploader(
+        "Upload backstories CSV",
+        type=['csv'],
+        help="CSV must have 'backstory' column"
+    )
+    if uploaded_file:
+        df_backstories = pd.read_csv(uploaded_file)
+    else:
+        df_backstories = None
+else:
+    # Load default ESS data
+    default_path = Path("ess_uk_with_backstories.csv")
+    if default_path.exists():
+        df_backstories = pd.read_csv(default_path)
+    else:
+        df_backstories = None
+        st.sidebar.warning("⚠️ Default file not found: ess_uk_with_backstories.csv")
+# Show data info
+if df_backstories is not None:
+    st.sidebar.success(f"✅ Loaded {len(df_backstories):,} respondents")
+    # Sample size
+    max_size = len(df_backstories)
+    sample_size = st.sidebar.slider(
+        "Sample Size",
+        min_value=10,
+        max_value=max_size,
+        value=min(50, max_size),
+        step=10,
+        help="Start with small sample for testing"
+    )
+else:
+    sample_size = 0
+# Model settings
+st.sidebar.subheader("Model Settings")
+model_option = st.sidebar.selectbox(
+    "Model",
+    ["Claude (Anthropic)", "GPT-4 (OpenAI)"],
+    help="API-based models. Provide your API key below."
+)
+# API key input
+if "Claude" in model_option:
+    api_key = st.sidebar.text_input(
+        "Anthropic API Key",
+        type="password",
+        help="Get your key from https://console.anthropic.com/"
+    )
+else:
+    api_key = st.sidebar.text_input(
+        "OpenAI API Key",
+        type="password",
+        help="Get your key from https://platform.openai.com/"
+    )
+temperature = st.sidebar.slider(
+    "Temperature",
+    min_value=0.0,
+    max_value=1.0,
+    value=0.7,
+    step=0.1,
+    help="Higher = more creative, Lower = more consistent"
+)
+# Main panel - Question configuration
+st.header("📋 Step 1: Configure Question")
+# Mode selection: Response Generation vs Question Testing
+mode = st.radio(
+    "Mode",
+    ["Response Generation", "Question Testing", "Question Testing (Continuous Improvement)"],
+    help="Response Generation: Get synthetic survey responses. Question Testing: Get feedback on question quality. Continuous Improvement: Iteratively improve question through multiple rounds of testing - manually control each iteration."
+)
+col1, col2 = st.columns([2, 1])
+with col1:
+    # Use next_question from session state if available (from re-run)
+    default_question = st.session_state.get('next_question', '')
+    if default_question:
+        # Clear next_question after using it
+        st.session_state.next_question = ''
+    question_text = st.text_area(
+        "Survey Question",
+        value=default_question,
+        height=80,
+        placeholder="Enter your survey question here...",
+        help="The question your synthetic respondents will answer" if mode == "Response Generation" else "The draft question you want to test for clarity and quality"
+    )
+    # Add concept description for Question Testing mode
+    if "Question Testing" in mode:
+        # Use stored concept description if available (persists across re-runs)
+        default_concept = st.session_state.get('concept_description', '')
+        concept_description = st.text_area(
+            "Concept Description",
+            value=default_concept,
+            height=100,
+            placeholder="Describe what you are trying to measure with this question...",
+            help="Describe in as much detail as you can what you are trying to measure with this question. This helps the LLM understand your intent and provide better feedback."
+        )
+    else:
+        concept_description = ""
+with col2:
+    if mode == "Response Generation":
+        response_format = st.selectbox(
+            "Response Format",
+            ["Scale (0-10)", "Scale (1-5)", "Multiple Choice", "Yes/No", "Open Text"]
+        )
+    else:  # Question Testing mode
+        response_format = "Open Text"
+        st.info("📝 Question Testing uses open text responses to gather feedback on question quality.")
+# Configure prompt based on mode
+# Initialize variables that will be used in preview
+mc_options = ""
+response_options_text = ""
+if "Question Testing" in mode:
+    # Question Testing mode: Create critique prompt
+    st.subheader("Response Options/Instructions")
+    # Use next_options from session state if available (from re-run)
+    default_options = st.session_state.get('next_options', '')
+    if default_options:
+        # Clear next_options after using it
+        st.session_state.next_options = ''
+    response_options_text = st.text_area(
+        "Response Options (if applicable)",
+        value=default_options,
+        height=100,
+        placeholder="e.g., Scale from 0-10 where 0=Not at all, 10=Extremely, or Multiple choice options A, B, C, D",
+        help="Include any response options or scales that are part of the question being tested"
+    )
+    # Show message if this is a re-run iteration
+    if st.session_state.get('show_rerun_message', False):
+        st.info(f"🔄 **Iteration {st.session_state.current_iteration + 1}:** Improved question loaded. Click 'Generate Responses' below to test the new version.")
+        st.session_state.show_rerun_message = False
+    # Build the testing prompt
+    instructions = f"""Please provide feedback on this survey question. Comment on:
+1. Are there any parts of the question that are ambiguous or unclear?
+2. Are there any parts that are difficult to understand?
+3. Did you have any problems thinking about how to answer?
+4. Are the response options (if provided) appropriate and complete?
+Provide your feedback in 2-3 sentences, being specific about any issues you identify."""
+    # Automatically enable thematic coding for Question Testing
+    enable_thematic_coding = True
+    st.info("🔍 Thematic analysis will automatically run to identify common issues in the question.")
+else:
+    # Response Generation mode: Original behavior
+    # Scale anchor labels (if scale selected)
+    if "Scale" in response_format:
+        st.subheader("Scale Labels")
+        if "0-10" in response_format:
+            # 10-point scale: just endpoints
+            col_low, col_high = st.columns(2)
+            with col_low:
+                low_label = st.text_input(
+                    "0 means",
+                    value="Not at all",
+                    help="What does the lowest value mean?"
+                )
+            with col_high:
+                high_label = st.text_input(
+                    "10 means",
+                    value="Extremely",
+                    help="What does the highest value mean?"
+                )
+            instructions = f"Respond with a single integer from 0 to 10, where 0 means '{low_label}' and 10 means '{high_label}'. Only output the number."
+        else:  # 1-5 scale: label all 5 points
+            label_1 = st.text_input("1 means", value="Strongly disagree")
+            label_2 = st.text_input("2 means", value="Disagree")
+            label_3 = st.text_input("3 means", value="Neither agree nor disagree")
+            label_4 = st.text_input("4 means", value="Agree")
+            label_5 = st.text_input("5 means", value="Strongly agree")
+            instructions = f"""Respond with a single integer from 1 to 5 based on these labels:
+1 = {label_1}
+2 = {label_2}
+3 = {label_3}
+4 = {label_4}
+5 = {label_5}
+Only output the number."""
+    else:
+        # Non-scale formats
+        format_instructions = {
+            "Multiple Choice": "Choose one option and respond with only the letter (A, B, C, or D).",
+            "Yes/No": "Respond with only 'Yes' or 'No'.",
+            "Open Text": "Provide a brief 1-2 sentence response based on your persona."
+        }
+        instructions = format_instructions.get(response_format, "")
+    # Multiple choice options (if selected)
+    if response_format == "Multiple Choice":
+        st.subheader("Response Options")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            option_a = st.text_input("Option A", "Strongly agree")
+        with col2:
+            option_b = st.text_input("Option B", "Agree")
+        with col3:
+            option_c = st.text_input("Option C", "Disagree")
+        with col4:
+            option_d = st.text_input("Option D", "Strongly disagree")
+        mc_options = f"\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}"
+    else:
+        mc_options = ""
+    # Thematic coding option (if open text selected)
+    enable_thematic_coding = False
+    if response_format == "Open Text":
+        st.subheader("Thematic Coding")
+        enable_thematic_coding = st.checkbox(
+            "Perform automated thematic analysis after generating responses",
+            value=False,
+            help="Uses LLM to identify themes, counts, and percentages in open text responses. Runs automatically after response generation."
+        )
+# Preview full prompt
+with st.expander("🔍 Preview Full Prompt"):
+    st.markdown("**System Prompt:**")
+    st.code("""Adopt the following persona and answer only based on it.
+Do not invent details beyond the provided attributes.
+[Backstory will be inserted here for each respondent]""")
+    st.markdown("**User Prompt:**")
+    if mode == "Question Testing":
+        # Include response options in the question display for testing
+        full_question = f"Question: {question_text}\n"
+        if response_options_text.strip():
+            full_question += f"\nResponse Options: {response_options_text}\n"
+        full_question += f"\n{instructions}"
+    else:
+        full_question = question_text + mc_options + "\n\n" + instructions
+    st.code(full_question)
+# Generate button
+if mode == "Question Testing":
+    st.header("🧪 Step 2: Test Question")
+    button_text = "🧪 Test Question with Synthetic Respondents"
+else:
+    st.header("🚀 Step 2: Generate Responses")
+    button_text = "🎯 Generate Responses"
+can_generate = (
+    df_backstories is not None
+    and question_text.strip() != ""
+    and not st.session_state.processing
+)
+if st.button(
+    button_text,
+    disabled=not can_generate,
+    type="primary",
+    use_container_width=True
+):
+    st.session_state.processing = True
+    st.session_state.results = None
+    st.session_state.mode = mode  # Store mode for results display
+    st.session_state.question_text = question_text  # Store for thematic analysis
+    if mode == "Question Testing":
+        st.session_state.response_options_text = response_options_text  # Store for improved version
+        st.session_state.concept_description = concept_description  # Store concept description
+    # Prepare configuration
+    config = {
+        "question": full_question,
+        "temperature": temperature,
+        "sample_size": sample_size
+    }
+    # Validate API key
+    if not api_key:
+        st.error(f"⚠️ Please provide your {'Anthropic' if 'Claude' in model_option else 'OpenAI'} API key in the sidebar.")
+        st.stop()
+    # Create sampler based on model selection
+    if "Claude" in model_option:
+        from dashboard_backend import AnthropicSampler
+        config["anthropic_api_key"] = api_key
+        sampler = AnthropicSampler(config)
+    else:  # OpenAI
+        from dashboard_backend import OpenAISampler
+        config["openai_api_key"] = api_key
+        sampler = OpenAISampler(config)
+    # Progress bar
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    # Sample backstories (random sample)
+    df_sample = df_backstories.sample(n=sample_size, random_state=42).copy()
+    # Apply backstory length preference
+    if "Short" in backstory_length and 'backstory' in df_sample.columns:
+        # Truncate to first 150 characters for short version
+        df_sample['backstory'] = df_sample['backstory'].apply(
+            lambda x: x[:150] + "..." if isinstance(x, str) and len(x) > 150 else x
+        )
+    # Process
+    try:
+        results = sampler.generate_responses(
+            df_sample,
+            progress_callback=lambda i, total: (
+                progress_bar.progress(i / total),
+                status_text.text(f"Processing: {i}/{total} respondents ({100*i/total:.1f}%)")
+            )
+        )
+        st.session_state.results = results
+        st.session_state.processing = False
+        st.success(f"✅ Generated {len(results)} responses!")
+        st.rerun()
+    except Exception as e:
+        st.error(f"❌ Error: {str(e)}")
+        st.session_state.processing = False
+# Show results
+if st.session_state.results is not None:
+    st.header("📊 Step 3: Results")
+    results_df = st.session_state.results
+    # Summary stats
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Total Responses", len(results_df))
+    with col2:
+        valid_responses = results_df['response'].notna().sum()
+        st.metric("Valid Responses", valid_responses)
+    with col3:
+        completion_rate = 100 * valid_responses / len(results_df)
+        st.metric("Completion Rate", f"{completion_rate:.1f}%")
+    # Preview
+    st.subheader("Preview (First 10 rows)")
+    st.dataframe(results_df.head(10), use_container_width=True)
+    # Download
+    st.subheader("Download Results")
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"silicon_sample_{timestamp}.csv"
+    csv = results_df.to_csv(index=False)
+    st.download_button(
+        label="📥 Download CSV",
+        data=csv,
+        file_name=filename,
+        mime="text/csv",
+        use_container_width=True
+    )
+    # Response distribution and statistics
+    if response_format in ["Scale (0-10)", "Scale (1-5)", "Yes/No", "Multiple Choice"]:
+        st.subheader(f"Response Distribution: {question_text}")
+        try:
+            # For numeric formats, convert to numbers
+            if response_format.startswith("Scale"):
+                numeric_responses = pd.to_numeric(results_df['response'], errors='coerce')
+                valid_responses = numeric_responses.dropna()
+            elif response_format == "Yes/No":
+                # For Yes/No, show frequency distribution
+                valid_responses = results_df['response'].dropna()
+            elif response_format == "Multiple Choice":
+                # For Multiple Choice, show frequency distribution
+                valid_responses = results_df['response'].dropna()
+            if len(valid_responses) > 0:
+                # Show statistics for numeric scales
+                if response_format.startswith("Scale"):
+                    col1, col2, col3, col4, col5 = st.columns(5)
+                    with col1:
+                        st.metric("Mean", f"{valid_responses.mean():.2f}")
+                    with col2:
+                        st.metric("Median", f"{valid_responses.median():.2f}")
+                    with col3:
+                        st.metric("Std Dev", f"{valid_responses.std():.2f}")
+                    with col4:
+                        mode_val = valid_responses.mode()
+                        mode_display = f"{mode_val.iloc[0]:.0f}" if len(mode_val) > 0 else "N/A"
+                        st.metric("Mode", mode_display)
+                    with col5:
+                        st.metric("Valid N", f"{len(valid_responses)}")
+                    # Distribution chart
+                    st.bar_chart(pd.to_numeric(results_df['response'], errors='coerce').value_counts().sort_index())
+                # Show frequency counts for categorical
+                else:
+                    value_counts = valid_responses.value_counts()
+                    # Display as metrics
+                    cols = st.columns(min(len(value_counts), 5))
+                    for idx, (value, count) in enumerate(value_counts.items()):
+                        if idx < 5:  # Limit to 5 columns
+                            with cols[idx]:
+                                pct = 100 * count / len(valid_responses)
+                                st.metric(f"{value}", f"{count} ({pct:.1f}%)")
+                    # Also show total N
+                    st.metric("Total Valid N", f"{len(valid_responses)}")
+                    # Distribution chart
+                    st.bar_chart(value_counts)
+            else:
+                st.info("No valid responses to analyze")
+        except Exception as e:
+            st.info(f"Could not generate statistics: {str(e)}")
+    # Thematic coding for open text responses
+    elif response_format == "Open Text" and enable_thematic_coding:
+        # Get the stored mode and question text
+        stored_mode = st.session_state.get('mode', 'Response Generation')
+        stored_question = st.session_state.get('question_text', question_text)
+        # Different heading based on mode
+        if stored_mode == "Question Testing":
+            st.subheader(f"Question Testing Results: {stored_question}")
+        else:
+            st.subheader(f"Thematic Analysis: {stored_question}")
+        # Get valid text responses
+        valid_responses = results_df['response'].dropna()
+        valid_responses = valid_responses[valid_responses.str.strip() != ""]
+        if len(valid_responses) > 0:
+            st.info(f"Analyzing {len(valid_responses)} open text responses...")
+            # Automatically run thematic coding
+            if True:  # Changed from button to automatic
+                with st.spinner("Analyzing themes with LLM..."):
+                    try:
+                        # Prepare responses for analysis
+                        responses_text = "\n\n".join([f"Response {i+1}: {resp}" for i, resp in enumerate(valid_responses)])
+                        # Create thematic analysis prompt - different for Question Testing
+                        if stored_mode == "Question Testing":
+                            coding_prompt = f"""You are a survey methodology expert analyzing feedback from respondents who tested a draft survey question.
+Question being tested: "{stored_question}"
+Here is the feedback from respondents (total of {len(valid_responses)} responses):
+{responses_text}
+CRITICAL INSTRUCTIONS:
+- DO NOT list individual responses
+- DO NOT copy feedback verbatim
+- DO NOT fabricate or hallucinate problems that aren't genuinely present in the feedback
+- DO NOT feel pressured to find a specific number of issues
+- ONLY report genuine problems, ambiguities, or concerns that respondents actually raised
+- If the question and response scales are clear and well-designed, say so - it's perfectly acceptable to find zero issues
+- DO group similar issues together and count how many respondents mentioned each
+Task:
+1. Read ALL responses carefully and identify ONLY genuine recurring issues and concerns
+2. If respondents found the question clear and had no problems, state that the question appears well-designed
+3. Group similar problems together (e.g., all mentions of "unclear terminology" should be one issue)
+4. For each distinct issue that was genuinely raised, provide:
+   - Issue name (2-4 words, e.g., "Ambiguous wording", "Unclear scale", "Missing context")
+   - Brief description (1 sentence explaining the specific problem)
+   - Count of how many respondents mentioned this issue
+   - Percentage of total respondents
+REQUIRED FORMAT (follow exactly):
+ISSUE 1: [Name]
+DESCRIPTION: [One sentence explaining the problem]
+COUNT: [Number of respondents who mentioned this]
+PERCENTAGE: [Percentage]
+ISSUE 2: [Name]
+DESCRIPTION: [One sentence explaining the problem]
+COUNT: [Number]
+PERCENTAGE: [Percentage]
+[Continue for all distinct issues]
+SUMMARY:
+[If issues were identified: 2-3 sentence summary of the most critical problems requiring attention]
+[If no significant issues were found: Statement confirming the question appears clear and well-designed based on respondent feedback]
+Example of CORRECT output when issues are found:
+ISSUE 1: Ambiguous term "partner"
+DESCRIPTION: Respondents were unclear whether "partner" refers to romantic partner, business partner, or roommate
+COUNT: 15
+PERCENTAGE: 75%
+ISSUE 2: Vague timeframe
+DESCRIPTION: The phrase "these days" lacks specificity about the time period being asked about
+COUNT: 8
+PERCENTAGE: 40%
+Example of CORRECT output when no issues are found:
+SUMMARY:
+Based on the respondent feedback, the question appears well-designed and clear. Respondents understood what was being asked, found the wording unambiguous, and had no difficulty formulating responses. No significant issues or concerns were raised that would require revision."""
+                        else:
+                            coding_prompt = f"""You are a qualitative researcher conducting thematic analysis on open-ended survey responses.
+Question asked: "{stored_question}"
+Here are all the responses:
+{responses_text}
+Task:
+1. Identify the main themes present in these responses (aim for 4-8 themes)
+2. For each theme, provide:
+   - Theme name (2-4 words)
+   - Brief description (1 sentence)
+   - Count of how many responses express this theme
+   - Percentage of total responses
+Format your response as:
+THEME: [Name]
+DESCRIPTION: [Description]
+COUNT: [Number]
+PERCENTAGE: [Percentage]
+[Repeat for each theme]"""
+                        # Send to API for analysis
+                        if "Claude" in model_option:
+                            import anthropic
+                            client = anthropic.Anthropic(api_key=api_key)
+                            message = client.messages.create(
+                                model="claude-3-5-sonnet-20241022",
+                                max_tokens=2000,
+                                temperature=0.3,
+                                system="You are a qualitative research expert analyzing survey responses.",
+                                messages=[{"role": "user", "content": coding_prompt}]
+                            )
+                            analysis_result = message.content[0].text.strip()
+                        else:  # OpenAI
+                            from openai import OpenAI
+                            client = OpenAI(api_key=api_key)
+                            response = client.chat.completions.create(
+                                model="gpt-4o",
+                                max_tokens=2000,
+                                temperature=0.3,
+                                messages=[
+                                    {"role": "system", "content": "You are a qualitative research expert analyzing survey responses."},
+                                    {"role": "user", "content": coding_prompt}
+                                ]
+                            )
+                            analysis_result = response.choices[0].message.content.strip()
+                        # Display results
+                        st.markdown("### Thematic Coding Results")
+                        st.text_area("Analysis", analysis_result, height=400)
+                        # For Question Testing mode, add problem summary and offer to suggest improved wording
+                        if "Question Testing" in stored_mode:
+                            # Parse the analysis to extract problem counts
+                            import re
+                            # Extract counts from the analysis (looks for patterns like "Count: X" or "X respondents" or "X mentions")
+                            count_patterns = [
+                                r'Count:\s*(\d+)',
+                                r'(\d+)\s+respondents?',
+                                r'(\d+)\s+mentions?',
+                                r'(\d+)/\d+',  # X/total format
+                                r'\((\d+)\s+respondents?\)',
+                            ]
+                            problem_counts = []
+                            for pattern in count_patterns:
+                                matches = re.findall(pattern, analysis_result, re.IGNORECASE)
+                                if matches:
+                                    problem_counts.extend([int(m) for m in matches])
+                            # Calculate summary statistics
+                            if problem_counts:
+                                num_problems = len(problem_counts)  # a) distinct problems
+                                total_mentions = sum(problem_counts)  # c) total problem mentions
+                                n_respondents = len(valid_responses)
+                                avg_problems_per_respondent = total_mentions / n_respondents if n_respondents > 0 else 0  # d) average
+                                # Display problem summary
+                                st.markdown("---")
+                                st.markdown("### Problem Summary")
+                                col1, col2, col3, col4 = st.columns(4)
+                                with col1:
+                                    st.metric("Distinct Problems", num_problems)
+                                with col2:
+                                    st.metric("Total Mentions", total_mentions)
+                                with col3:
+                                    st.metric("Respondents", n_respondents)
+                                with col4:
+                                    st.metric("Avg Problems/Respondent", f"{avg_problems_per_respondent:.2f}")
+                                # Show breakdown
+                                with st.expander("📊 Problem Breakdown"):
+                                    st.markdown("**Problems by frequency:**")
+                                    for i, count in enumerate(sorted(problem_counts, reverse=True), 1):
+                                        pct = (count / n_respondents * 100) if n_respondents > 0 else 0
+                                        st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
+                            st.markdown("---")
+                            st.markdown("### Suggest Improved Question Wording")
+                            if st.button("✨ Generate Improved Question", type="secondary"):
+                                with st.spinner("Generating improved question wording..."):
+                                    try:
+                                        # Get response options and concept description if they exist
+                                        stored_options = st.session_state.get('response_options_text', '')
+                                        stored_concept = st.session_state.get('concept_description', '')
+                                        # Create improvement prompt
+                                        # Build the sections separately to avoid f-string backslash issue
+                                        options_section = f"\nOriginal Response Options: {stored_options}\n" if stored_options else ""
+                                        concept_section = f"\nConcept Being Measured: {stored_concept}\n" if stored_concept else ""
+                                        improved_options_section = "\n\nIMPROVED RESPONSE OPTIONS:\n[Your improved options]\n" if stored_options else ""
+                                        improvement_prompt = f"""You are a survey methodology expert. Based on the respondent feedback analysis below, suggest an improved version of the survey question that addresses the identified issues.
+Original Question: "{stored_question}"{options_section}{concept_section}
+Respondent Feedback Analysis:
+{analysis_result}
+Task:
+1. Identify the main problems with the current question based on respondent feedback
+2. Provide an improved version of the question that addresses these problems
+3. If response options were provided, suggest improved response options as well
+4. Explain specifically how the new version improves on the original based on the respondent feedback
+Format your response as:
+PROBLEMS IDENTIFIED:
+[List the specific problems with the current question based on respondent feedback]
+IMPROVED QUESTION:
+[Your improved question text]{improved_options_section}
+HOW THE NEW VERSION IMPROVES:
+[Explain how each change addresses the problems identified in respondent feedback]"""
+                                        # Send to API for improvement
+                                        if "Claude" in model_option:
+                                            import anthropic
+                                            client = anthropic.Anthropic(api_key=api_key)
+                                            message = client.messages.create(
+                                                model="claude-3-5-sonnet-20241022",
+                                                max_tokens=1000,
+                                                temperature=0.3,
+                                                system="You are a survey methodology expert specializing in question wording and design.",
+                                                messages=[{"role": "user", "content": improvement_prompt}]
+                                            )
+                                            improvement_result = message.content[0].text.strip()
+                                        else:  # OpenAI
+                                            from openai import OpenAI
+                                            client = OpenAI(api_key=api_key)
+                                            response = client.chat.completions.create(
+                                                model="gpt-4o",
+                                                max_tokens=1000,
+                                                temperature=0.3,
+                                                messages=[
+                                                    {"role": "system", "content": "You are a survey methodology expert specializing in question wording and design."},
+                                                    {"role": "user", "content": improvement_prompt}
+                                                ]
+                                            )
+                                            improvement_result = response.choices[0].message.content.strip()
+                                        # Display improved version
+                                        st.markdown("### Improved Question Suggestion")
+                                        st.text_area("Suggested Improvements", improvement_result, height=300)
+                                        # Store improvement result for potential re-run
+                                        st.session_state['last_improvement'] = improvement_result
+                                        # Store current iteration results for comparison
+                                        if problem_counts:
+                                            iteration_data = {
+                                                'iteration': st.session_state.current_iteration,
+                                                'question': stored_question,
+                                                'response_options': stored_options,
+                                                'num_problems': num_problems,
+                                                'total_mentions': total_mentions,
+                                                'n_respondents': n_respondents,
+                                                'avg_problems': avg_problems_per_respondent,
+                                                'problem_counts': problem_counts
+                                            }
+                                            # Only add if this iteration isn't already stored
+                                            if not any(d['iteration'] == st.session_state.current_iteration for d in st.session_state.iteration_results):
+                                                st.session_state.iteration_results.append(iteration_data)
+                                        st.info("💡 Review the suggested improvements and adapt them as needed for your research context.")
+                                        # Add re-run button for Question Testing mode
+                                        if "Question Testing" in stored_mode:
+                                            st.markdown("---")
+                                            if st.button("🔄 Re-run COGbot on Improved Question", type="primary"):
+                                                # Get improvement result from session state (more reliable than local variable)
+                                                stored_improvement = st.session_state.get('last_improvement', improvement_result)
+                                                if not stored_improvement:
+                                                    st.error("No improvement suggestion found. Please click 'Generate Improved Question' first.")
+                                                else:
+                                                    # Extract improved question from the result
+                                                    import re
+                                                    # Try multiple patterns to extract improved question
+                                                    patterns = [
+                                                        r'IMPROVED QUESTION:\s*\n+([^\n].*?)(?:\n\n+IMPROVED RESPONSE OPTIONS:|\n\n+HOW THE NEW VERSION IMPROVES:|$)',
+                                                        r'IMPROVED QUESTION:\s*\n+([^\n][^\n]+)',  # Just get first line after
+                                                        r'improved question[:\s]+([^\n]+)',  # More flexible
+                                                    ]
+                                                    new_question = None
+                                                    for pattern in patterns:
+                                                        match = re.search(pattern, stored_improvement, re.DOTALL | re.IGNORECASE)
+                                                        if match:
+                                                            new_question = match.group(1).strip()
+                                                            # Remove any leading quotes or markers
+                                                            new_question = new_question.strip('"\'')
+                                                            if len(new_question) > 10:  # Valid question should be longer than 10 chars
+                                                                break
+                                                    if new_question:
+                                                        # Try to extract improved response options if present
+                                                        options_match = re.search(r'IMPROVED RESPONSE OPTIONS:\s*\n+(.*?)(?:\n\n+HOW THE NEW VERSION IMPROVES:|$)',
+                                                                                 stored_improvement, re.DOTALL | re.IGNORECASE)
+                                                        new_options = options_match.group(1).strip() if options_match else stored_options
+                                                        # Debug: Show what was extracted
+                                                        st.info(f"✅ Extracted question: {new_question[:100]}...")
+                                                        # Store the new question and options for next run FIRST
+                                                        st.session_state.next_question = new_question
+                                                        st.session_state.next_options = new_options
+                                                        # Increment iteration counter
+                                                        st.session_state.current_iteration += 1
+                                                        # Clear old results to force regeneration
+                                                        st.session_state.results = None
+                                                        if 'last_improvement' in st.session_state:
+                                                            del st.session_state['last_improvement']
+                                                        # Set flag to show message after rerun
+                                                        st.session_state.show_rerun_message = True
+                                                        # Immediately rerun
+                                                        st.rerun()
+                                                    else:
+                                                        st.error("❌ Could not extract improved question from the output.")
+                                                        st.warning("💡 Please manually copy the improved question and paste it into the question box above.")
+                                    except Exception as e:
+                                        st.error(f"Error generating improved question: {str(e)}")
+                    except Exception as e:
+                        st.error(f"Error during thematic analysis: {str(e)}")
+        else:
+            st.info("No valid open text responses to analyze")
+# Display iteration comparison table for Question Testing mode
+if len(st.session_state.iteration_results) > 0 and "Question Testing" in st.session_state.get('mode', ''):
+    st.markdown("---")
+    st.markdown("## 📊 Iteration Comparison")
+    st.markdown(f"**Total iterations completed:** {len(st.session_state.iteration_results)}")
+    # Create comparison table
+    comparison_data = []
+    for iteration in st.session_state.iteration_results:
+        comparison_data.append({
+            "Iteration": iteration['iteration'] + 1,
+            "Question": iteration['question'][:100] + "..." if len(iteration['question']) > 100 else iteration['question'],
+            "# Problems": iteration['num_problems'],
+            "Total Mentions": iteration['total_mentions'],
+            "Respondents": iteration['n_respondents'],
+            "Avg Problems/Resp": f"{iteration['avg_problems']:.2f}"
+        })
+    if comparison_data:
+        import pandas as pd
+        df_comparison = pd.DataFrame(comparison_data)
+        st.dataframe(df_comparison, use_container_width=True)
+        # Show detailed stats for each iteration
+        with st.expander("🔍 View Detailed Stats for Each Iteration"):
+            for iteration in st.session_state.iteration_results:
+                st.markdown(f"### Iteration {iteration['iteration'] + 1}")
+                st.markdown(f"**Question:** {iteration['question']}")
+                if iteration['response_options']:
+                    st.markdown(f"**Response Options:** {iteration['response_options']}")
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("Distinct Problems", iteration['num_problems'])
+                with col2:
+                    st.metric("Total Mentions", iteration['total_mentions'])
+                with col3:
+                    st.metric("Respondents", iteration['n_respondents'])
+                with col4:
+                    st.metric("Avg Problems/Respondent", f"{iteration['avg_problems']:.2f}")
+                st.markdown("**Problem Breakdown:**")
+                for i, count in enumerate(sorted(iteration['problem_counts'], reverse=True), 1):
+                    pct = (count / iteration['n_respondents'] * 100) if iteration['n_respondents'] > 0 else 0
+                    st.write(f"Problem {i}: {count} mentions ({pct:.1f}% of respondents)")
+                st.markdown("---")
+    # Add reset button
+    if st.button("🔄 Reset Iterations", help="Clear all iteration history and start fresh"):
+        st.session_state.iteration_results = []
+        st.session_state.current_iteration = 0
+        st.session_state.next_question = ''
+        st.session_state.next_options = ''
+        st.success("✅ Iteration history cleared!")
+        st.rerun()
+# Footer
+st.sidebar.markdown("---")
+st.sidebar.markdown("""
+**Need Help?**
+- [Documentation](WINSTON_README.md)
+- [GitHub](https://github.com/PatrickSturgis/Silicon_samples)
+""")

dashboard_backend.py ADDED Viewed

	@@ -0,0 +1,130 @@

+#!/usr/bin/env python3
+"""
+Dashboard Backend - API Only (HF Spaces)
+Handles LLM querying via Anthropic and OpenAI APIs.
+"""
+import pandas as pd
+from typing import Callable, Optional
+class AnthropicSampler:
+    """
+    Anthropic Claude API sampler for validation studies
+    """
+    def __init__(self, config: dict):
+        self.config = config
+        self.api_key = config.get('anthropic_api_key')
+        self.temperature = config.get('temperature', 0.7)
+        self.question = config.get('question', '')
+    def generate_responses(
+        self,
+        df: pd.DataFrame,
+        progress_callback=None
+    ) -> pd.DataFrame:
+        """Generate responses using Claude API"""
+        import anthropic
+        if 'backstory' not in df.columns:
+            raise ValueError("DataFrame must have 'backstory' column")
+        client = anthropic.Anthropic(api_key=self.api_key)
+        results = df.copy()
+        results['response'] = ""
+        total = len(df)
+        for i, (idx, row) in enumerate(df.iterrows()):
+            backstory = row['backstory']
+            if pd.isna(backstory) or str(backstory).strip() == "":
+                results.loc[idx, 'response'] = "[EMPTY]"
+                continue
+            try:
+                message = client.messages.create(
+                    model="claude-3-5-sonnet-20241022",
+                    max_tokens=100,
+                    temperature=self.temperature,
+                    system=(
+                        "Adopt the following persona and answer only based on it. "
+                        "Do not invent details beyond the provided attributes.\n\n"
+                        f"{backstory}"
+                    ),
+                    messages=[
+                        {"role": "user", "content": self.question}
+                    ]
+                )
+                results.loc[idx, 'response'] = message.content[0].text.strip()
+            except Exception as e:
+                results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
+            if progress_callback:
+                progress_callback(i + 1, total)
+        return results
+class OpenAISampler:
+    """
+    OpenAI ChatGPT API sampler for validation studies
+    """
+    def __init__(self, config: dict):
+        self.config = config
+        self.api_key = config.get('openai_api_key')
+        self.temperature = config.get('temperature', 0.7)
+        self.question = config.get('question', '')
+    def generate_responses(
+        self,
+        df: pd.DataFrame,
+        progress_callback=None
+    ) -> pd.DataFrame:
+        """Generate responses using ChatGPT API"""
+        from openai import OpenAI
+        if 'backstory' not in df.columns:
+            raise ValueError("DataFrame must have 'backstory' column")
+        client = OpenAI(api_key=self.api_key)
+        results = df.copy()
+        results['response'] = ""
+        total = len(df)
+        for i, (idx, row) in enumerate(df.iterrows()):
+            backstory = row['backstory']
+            if pd.isna(backstory) or str(backstory).strip() == "":
+                results.loc[idx, 'response'] = "[EMPTY]"
+                continue
+            try:
+                response = client.chat.completions.create(
+                    model="gpt-4o",
+                    max_tokens=100,
+                    temperature=self.temperature,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": (
+                                "Adopt the following persona and answer only based on it. "
+                                "Do not invent details beyond the provided attributes.\n\n"
+                                f"{backstory}"
+                            )
+                        },
+                        {"role": "user", "content": self.question}
+                    ]
+                )
+                results.loc[idx, 'response'] = response.choices[0].message.content.strip()
+            except Exception as e:
+                results.loc[idx, 'response'] = f"[ERROR: {str(e)[:50]}]"
+            if progress_callback:
+                progress_callback(i + 1, total)
+        return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit>=1.30.0
+pandas>=2.0.0
+anthropic>=0.25.0
+openai>=1.0.0