Spaces:
Running
Running
| """ | |
| Streamlit app - CatLLM Survey Response Summarizer | |
| Based on the classifier app but focused on text/PDF summarization | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import tempfile | |
| import os | |
| import time | |
| import sys | |
| from datetime import datetime | |
| # Import catllm | |
| try: | |
| import catllm | |
| CATLLM_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"Warning: Could not import catllm: {e}") | |
| CATLLM_AVAILABLE = False | |
| MAX_FILE_SIZE_MB = 100 | |
| def count_pdf_pages(pdf_path): | |
| """Count the number of pages in a PDF file.""" | |
| try: | |
| import fitz # PyMuPDF | |
| doc = fitz.open(pdf_path) | |
| page_count = len(doc) | |
| doc.close() | |
| return page_count | |
| except Exception: | |
| return 1 # Default to 1 if can't read | |
| # Free models - display name -> actual API model name | |
| FREE_MODELS_MAP = { | |
| "Qwen3 235B": "Qwen/Qwen3-VL-235B-A22B-Instruct:novita", | |
| "DeepSeek V3.1": "deepseek-ai/DeepSeek-V3.1:novita", | |
| "Llama 3.3 70B": "meta-llama/Llama-3.3-70B-Instruct:groq", | |
| "Gemini 2.5 Flash": "gemini-2.5-flash", | |
| "GPT-4o Mini": "gpt-4o-mini", | |
| "Mistral Medium": "mistral-medium-2505", | |
| "Claude 3 Haiku": "claude-3-haiku-20240307", | |
| "Grok 4 Fast": "grok-4-fast-non-reasoning", | |
| } | |
| FREE_MODEL_DISPLAY_NAMES = list(FREE_MODELS_MAP.keys()) | |
| # Paid models (user provides their own API key) | |
| PAID_MODEL_CHOICES = [ | |
| "gpt-4.1", | |
| "gpt-4o", | |
| "gpt-4o-mini", | |
| "claude-sonnet-4-5-20250929", | |
| "claude-opus-4-20250514", | |
| "claude-3-5-haiku-20241022", | |
| "gemini-2.5-pro", | |
| "gemini-2.5-flash", | |
| "mistral-large-latest", | |
| ] | |
| # Models routed through HuggingFace | |
| HF_ROUTED_MODELS = [ | |
| "Qwen/Qwen3-VL-235B-A22B-Instruct:novita", | |
| "deepseek-ai/DeepSeek-V3.1:novita", | |
| "meta-llama/Llama-3.3-70B-Instruct:groq", | |
| ] | |
| def is_free_model(model, model_tier): | |
| """Check if using free tier (Space pays for API).""" | |
| return model_tier == "Free Models" | |
| def get_model_source(model): | |
| """Auto-detect model source.""" | |
| model_lower = model.lower() | |
| if "gpt" in model_lower: | |
| return "openai" | |
| elif "claude" in model_lower: | |
| return "anthropic" | |
| elif "gemini" in model_lower: | |
| return "google" | |
| elif "mistral" in model_lower and ":novita" not in model_lower: | |
| return "mistral" | |
| elif any(x in model_lower for x in [":novita", ":groq", "qwen", "llama", "deepseek"]): | |
| return "huggingface" | |
| elif "sonar" in model_lower: | |
| return "perplexity" | |
| elif "grok" in model_lower: | |
| return "xai" | |
| return "huggingface" | |
| def get_api_key(model, model_tier, api_key_input): | |
| """Get the appropriate API key based on model and tier.""" | |
| if is_free_model(model, model_tier): | |
| if model in HF_ROUTED_MODELS: | |
| return os.environ.get("HF_API_KEY", ""), "HuggingFace" | |
| elif "gpt" in model.lower(): | |
| return os.environ.get("OPENAI_API_KEY", ""), "OpenAI" | |
| elif "gemini" in model.lower(): | |
| return os.environ.get("GOOGLE_API_KEY", ""), "Google" | |
| elif "mistral" in model.lower(): | |
| return os.environ.get("MISTRAL_API_KEY", ""), "Mistral" | |
| elif "claude" in model.lower(): | |
| return os.environ.get("ANTHROPIC_API_KEY", ""), "Anthropic" | |
| elif "sonar" in model.lower(): | |
| return os.environ.get("PERPLEXITY_API_KEY", ""), "Perplexity" | |
| elif "grok" in model.lower(): | |
| return os.environ.get("XAI_API_KEY", ""), "xAI" | |
| else: | |
| return os.environ.get("HF_API_KEY", ""), "HuggingFace" | |
| else: | |
| if api_key_input and api_key_input.strip(): | |
| return api_key_input.strip(), "User" | |
| return "", "User" | |
| def generate_summarize_code(input_type, description, model, model_source, focus=None, max_length=None, instructions=None, mode=None): | |
| """Generate Python code for summarization.""" | |
| focus_param = f',\n focus="{focus}"' if focus else '' | |
| length_param = f',\n max_length={max_length}' if max_length else '' | |
| instructions_param = f',\n instructions="{instructions}"' if instructions else '' | |
| if input_type == "text": | |
| return f'''import catllm | |
| import pandas as pd | |
| # Load your data | |
| df = pd.read_csv("your_data.csv") | |
| # Summarize the text column | |
| result = catllm.summarize( | |
| input_data=df["your_column"].tolist(), | |
| api_key="YOUR_API_KEY", | |
| description="{description}", | |
| user_model="{model}", | |
| model_source="{model_source}"{focus_param}{length_param}{instructions_param} | |
| ) | |
| # View results | |
| print(result) | |
| result.to_csv("summarized_results.csv", index=False) | |
| ''' | |
| else: # pdf | |
| mode_param = f',\n mode="{mode}"' if mode else '' | |
| return f'''import catllm | |
| # Summarize PDF documents | |
| result = catllm.summarize( | |
| input_data="path/to/your/pdfs/", | |
| api_key="YOUR_API_KEY", | |
| description="{description}", | |
| user_model="{model}", | |
| model_source="{model_source}"{mode_param}{focus_param}{length_param}{instructions_param} | |
| ) | |
| # View results | |
| print(result) | |
| result.to_csv("summarized_results.csv", index=False) | |
| ''' | |
| def generate_methodology_report_pdf(model, column_name, num_rows, model_source, filename, success_rate, | |
| result_df=None, processing_time=None, | |
| catllm_version=None, python_version=None, | |
| input_type="text", description=None, focus=None, max_length=None): | |
| """Generate a PDF methodology report for summarization.""" | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib import colors | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak | |
| pdf_file = tempfile.NamedTemporaryFile(mode='wb', suffix='_methodology_report.pdf', delete=False) | |
| doc = SimpleDocTemplate(pdf_file.name, pagesize=letter) | |
| styles = getSampleStyleSheet() | |
| title_style = ParagraphStyle('Title', parent=styles['Heading1'], fontSize=18, spaceAfter=20) | |
| heading_style = ParagraphStyle('Heading', parent=styles['Heading2'], fontSize=14, spaceAfter=10, spaceBefore=15) | |
| normal_style = styles['Normal'] | |
| story = [] | |
| report_title = "CatLLM Summarization Report" | |
| story.append(Paragraph(report_title, title_style)) | |
| story.append(Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", normal_style)) | |
| story.append(Spacer(1, 15)) | |
| story.append(Paragraph("About This Report", heading_style)) | |
| about_text = """This methodology report documents the automated summarization process. \ | |
| CatLLM uses LLMs to generate concise summaries of text or PDF documents, providing \ | |
| consistent and reproducible results.""" | |
| story.append(Paragraph(about_text, normal_style)) | |
| story.append(Spacer(1, 15)) | |
| # Summary section | |
| story.append(Paragraph("Summarization Summary", heading_style)) | |
| story.append(Spacer(1, 10)) | |
| summary_data = [ | |
| ["Source File", filename], | |
| ["Source Column/Type", column_name], | |
| ["Model Used", model], | |
| ["Model Source", model_source], | |
| ["Items Summarized", str(num_rows)], | |
| ["Success Rate", f"{success_rate:.2f}%"], | |
| ] | |
| if focus: | |
| summary_data.append(["Focus", focus]) | |
| if max_length: | |
| summary_data.append(["Max Length", f"{max_length} words"]) | |
| summary_table = Table(summary_data, colWidths=[150, 300]) | |
| summary_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('PADDING', (0, 0), (-1, -1), 6), | |
| ('FONTSIZE', (0, 0), (-1, -1), 9), | |
| ])) | |
| story.append(summary_table) | |
| story.append(Spacer(1, 15)) | |
| if processing_time is not None: | |
| story.append(Paragraph("Processing Time", heading_style)) | |
| rows_per_min = (num_rows / processing_time) * 60 if processing_time > 0 else 0 | |
| avg_time = processing_time / num_rows if num_rows > 0 else 0 | |
| time_data = [ | |
| ["Total Processing Time", f"{processing_time:.1f} seconds"], | |
| ["Average Time per Item", f"{avg_time:.2f} seconds"], | |
| ["Processing Rate", f"{rows_per_min:.1f} items/minute"], | |
| ] | |
| time_table = Table(time_data, colWidths=[180, 270]) | |
| time_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('PADDING', (0, 0), (-1, -1), 6), | |
| ('FONTSIZE', (0, 0), (-1, -1), 9), | |
| ])) | |
| story.append(time_table) | |
| story.append(Spacer(1, 15)) | |
| story.append(Paragraph("Version Information", heading_style)) | |
| version_data = [ | |
| ["CatLLM Version", catllm_version or "unknown"], | |
| ["Python Version", python_version or "unknown"], | |
| ["Timestamp", datetime.now().strftime('%Y-%m-%d %H:%M:%S')], | |
| ] | |
| version_table = Table(version_data, colWidths=[180, 270]) | |
| version_table.setStyle(TableStyle([ | |
| ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), | |
| ('GRID', (0, 0), (-1, -1), 1, colors.black), | |
| ('PADDING', (0, 0), (-1, -1), 6), | |
| ('FONTSIZE', (0, 0), (-1, -1), 9), | |
| ])) | |
| story.append(version_table) | |
| story.append(Spacer(1, 30)) | |
| story.append(Paragraph("Citation", heading_style)) | |
| story.append(Paragraph("If you use CatLLM in your research, please cite:", normal_style)) | |
| story.append(Spacer(1, 5)) | |
| story.append(Paragraph("Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DOI: 10.5281/zenodo.15532316", normal_style)) | |
| doc.build(story) | |
| return pdf_file.name | |
| # Page config | |
| st.set_page_config( | |
| page_title="CatLLM - Research Data Summarizer", | |
| page_icon="🐱", | |
| layout="wide" | |
| ) | |
| # Initialize session state | |
| if 'results' not in st.session_state: | |
| st.session_state.results = None | |
| if 'survey_data' not in st.session_state: | |
| st.session_state.survey_data = None | |
| if 'pdf_data' not in st.session_state: | |
| st.session_state.pdf_data = None | |
| # Logo and title | |
| col_logo, col_title = st.columns([1, 6]) | |
| with col_logo: | |
| st.image("logo.png", width=100) | |
| with col_title: | |
| st.title("CatLLM - Research Data Summarizer") | |
| st.markdown("Generate concise summaries of survey responses and PDF documents using LLMs.") | |
| # About section | |
| with st.expander("About This App"): | |
| st.markdown(""" | |
| **Privacy Notice:** Your data is sent to third-party LLM APIs for summarization. Do not upload sensitive, confidential, or personally identifiable information (PII). | |
| --- | |
| **CatLLM** is an open-source Python package for processing text and document data using Large Language Models. | |
| ### What It Does | |
| - **Summarize Text**: Generate concise summaries of survey responses or text data | |
| - **Summarize PDFs**: Extract key information from PDF documents page-by-page | |
| - **Focus Summaries**: Guide the model to focus on specific aspects of your data | |
| ### Beta Test - We Want Your Feedback! | |
| This app is currently in **beta** and **free to use** while CatLLM is under review for publication, made possible by **Bashir Ahmed's generous fellowship support**. | |
| - Found a bug? Have a feature request? Please open an issue on [GitHub](https://github.com/chrissoria/cat-llm) | |
| - Reach out directly: [chrissoria@berkeley.edu](mailto:chrissoria@berkeley.edu) | |
| ### Links | |
| - **PyPI**: [pip install cat-llm](https://pypi.org/project/cat-llm/) | |
| - **GitHub**: [github.com/chrissoria/cat-llm](https://github.com/chrissoria/cat-llm) | |
| - **Classifier App**: [CatLLM Survey Classifier](https://huggingface.co/spaces/CatLLM/survey-classifier) | |
| ### Citation | |
| If you use CatLLM in your research, please cite: | |
| ``` | |
| Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DOI: 10.5281/zenodo.15532316 | |
| ``` | |
| """) | |
| # Main layout | |
| col_input, col_output = st.columns([1, 1]) | |
| with col_input: | |
| # Input type selector | |
| input_type_choice = st.radio( | |
| "Input Type", | |
| options=["Survey Responses", "PDF Documents"], | |
| horizontal=True, | |
| key="input_type_radio" | |
| ) | |
| # Initialize variables | |
| input_data = None | |
| input_type_selected = "text" | |
| description = "" | |
| original_filename = "data" | |
| pdf_mode = "Image (visual documents)" | |
| if input_type_choice == "Survey Responses": | |
| input_type_selected = "text" | |
| uploaded_file = st.file_uploader( | |
| "Upload Data (CSV or Excel)", | |
| type=['csv', 'xlsx', 'xls'], | |
| key="survey_file" | |
| ) | |
| if st.button("Try Example Dataset", key="example_btn"): | |
| st.session_state.example_loaded = True | |
| columns = [] | |
| df = None | |
| if uploaded_file is not None: | |
| try: | |
| if uploaded_file.name.endswith('.csv'): | |
| df = pd.read_csv(uploaded_file) | |
| else: | |
| df = pd.read_excel(uploaded_file) | |
| columns = df.columns.tolist() | |
| st.success(f"Loaded {len(df):,} rows") | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| elif hasattr(st.session_state, 'example_loaded') and st.session_state.example_loaded: | |
| try: | |
| df = pd.read_csv("example_data.csv") | |
| columns = df.columns.tolist() | |
| st.success(f"Loaded example dataset ({len(df)} rows)") | |
| except: | |
| pass | |
| selected_column = st.selectbox( | |
| "Column to Summarize", | |
| options=columns if columns else ["Upload a file first"], | |
| disabled=not columns, | |
| key="survey_column" | |
| ) | |
| description = selected_column if columns else "" | |
| original_filename = uploaded_file.name if uploaded_file else "example_data.csv" | |
| if df is not None and columns and selected_column in columns: | |
| input_data = df[selected_column].tolist() | |
| else: # PDF Documents | |
| input_type_selected = "pdf" | |
| pdf_files = st.file_uploader( | |
| "Upload PDF Document(s)", | |
| type=['pdf'], | |
| accept_multiple_files=True, | |
| key="pdf_files" | |
| ) | |
| pdf_description = st.text_input( | |
| "Document Description", | |
| placeholder="e.g., 'research papers', 'interview transcripts'", | |
| help="Helps the LLM understand context", | |
| key="pdf_desc" | |
| ) | |
| pdf_mode = st.radio( | |
| "Processing Mode", | |
| options=["Image (visual documents)", "Text (text-heavy)", "Both (comprehensive)"], | |
| key="pdf_mode" | |
| ) | |
| if pdf_files: | |
| input_data = [] | |
| pdf_name_map = {} # Map temp paths to original filenames | |
| for f in pdf_files: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp: | |
| tmp.write(f.read()) | |
| input_data.append(tmp.name) | |
| pdf_name_map[tmp.name] = f.name.replace('.pdf', '') | |
| st.session_state.pdf_name_map = pdf_name_map | |
| description = pdf_description or "document" | |
| original_filename = "pdf_files" | |
| st.success(f"Uploaded {len(pdf_files)} PDF file(s)") | |
| st.markdown("---") | |
| # Summarization options | |
| st.markdown("### Summarization Options") | |
| focus = st.text_input( | |
| "Focus (optional)", | |
| placeholder="e.g., 'main arguments', 'emotional content', 'key findings'", | |
| help="Guide the model to focus on specific aspects" | |
| ) | |
| max_length = st.number_input( | |
| "Maximum Summary Length (words, optional)", | |
| min_value=0, | |
| max_value=1000, | |
| value=0, | |
| help="Leave at 0 for no limit" | |
| ) | |
| max_length = max_length if max_length > 0 else None | |
| instructions = st.text_input( | |
| "Additional Instructions (optional)", | |
| placeholder="e.g., 'use bullet points', 'include quotes'", | |
| help="Custom instructions for the summarization" | |
| ) | |
| st.markdown("---") | |
| # Model selection | |
| st.markdown("### Model Selection") | |
| model_tier = st.radio( | |
| "Model Tier", | |
| options=["Free Models", "Bring Your Own Key"], | |
| key="model_tier" | |
| ) | |
| if model_tier == "Free Models": | |
| model_display = st.selectbox("Model", options=FREE_MODEL_DISPLAY_NAMES, key="model") | |
| model = FREE_MODELS_MAP[model_display] | |
| api_key = "" | |
| else: | |
| model = st.selectbox("Model", options=PAID_MODEL_CHOICES, key="model_paid") | |
| api_key = st.text_input("API Key", type="password", key="api_key") | |
| # Summarize button | |
| if st.button("Summarize Data", type="primary", use_container_width=True): | |
| if input_data is None: | |
| st.error("Please upload data first") | |
| else: | |
| mode = None | |
| if input_type_selected == "pdf": | |
| mode_mapping = { | |
| "Image (visual documents)": "image", | |
| "Text (text-heavy)": "text", | |
| "Both (comprehensive)": "both" | |
| } | |
| mode = mode_mapping.get(pdf_mode, "image") | |
| actual_api_key, provider = get_api_key(model, model_tier, api_key) | |
| if not actual_api_key: | |
| st.error(f"{provider} API key not configured") | |
| else: | |
| model_source = get_model_source(model) | |
| items_list = input_data if isinstance(input_data, list) else [input_data] | |
| # Calculate estimated time | |
| num_items = len(items_list) | |
| if input_type_selected == "pdf": | |
| total_pages = sum(count_pdf_pages(p) for p in items_list) | |
| est_seconds = total_pages * 5 | |
| else: | |
| est_seconds = max(10, num_items * 2) | |
| est_time_str = f"{est_seconds:.0f}s" if est_seconds < 60 else f"{est_seconds/60:.1f}m" | |
| # Progress UI | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| start_time = time.time() | |
| def progress_callback(current_idx, total, label=None): | |
| progress = current_idx / total if total > 0 else 0 | |
| progress_bar.progress(min(progress, 1.0)) | |
| elapsed = time.time() - start_time | |
| if current_idx > 0: | |
| avg_time = elapsed / current_idx | |
| eta_seconds = avg_time * (total - current_idx) | |
| eta_str = f" | ETA: {eta_seconds:.0f}s" if eta_seconds < 60 else f" | ETA: {eta_seconds/60:.1f}m" | |
| else: | |
| eta_str = "" | |
| label_str = f" ({label})" if label else "" | |
| status_text.text(f"Processing item {current_idx+1} of {total}{label_str} ({progress*100:.0f}%){eta_str}") | |
| try: | |
| # Build kwargs for summarize | |
| summarize_kwargs = { | |
| "input_data": items_list, | |
| "api_key": actual_api_key, | |
| "description": description, | |
| "user_model": model, | |
| "model_source": model_source, | |
| "progress_callback": progress_callback, | |
| } | |
| if mode: | |
| summarize_kwargs["mode"] = mode | |
| if focus and focus.strip(): | |
| summarize_kwargs["focus"] = focus.strip() | |
| if max_length: | |
| summarize_kwargs["max_length"] = max_length | |
| if instructions and instructions.strip(): | |
| summarize_kwargs["instructions"] = instructions.strip() | |
| result_df = catllm.summarize(**summarize_kwargs) | |
| processing_time = time.time() - start_time | |
| total_items = len(result_df) | |
| progress_bar.progress(1.0) | |
| status_text.text(f"Completed {total_items} items in {processing_time:.1f}s") | |
| # Replace temp paths with original filenames for PDF input | |
| if input_type_selected == "pdf" and 'pdf_path' in result_df.columns: | |
| pdf_name_map = st.session_state.get('pdf_name_map', {}) | |
| def replace_temp_path(val): | |
| if pd.isna(val): | |
| return val | |
| val_str = str(val) | |
| for temp_path, orig_name in pdf_name_map.items(): | |
| if temp_path in val_str: | |
| return val_str.replace(temp_path, orig_name + '.pdf') | |
| return val_str | |
| result_df['pdf_path'] = result_df['pdf_path'].apply(replace_temp_path) | |
| # Save CSV | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='_summarized.csv', delete=False) as f: | |
| result_df.to_csv(f.name, index=False) | |
| csv_path = f.name | |
| # Calculate success rate | |
| if 'processing_status' in result_df.columns: | |
| success_count = (result_df['processing_status'] == 'success').sum() | |
| success_rate = (success_count / len(result_df)) * 100 | |
| else: | |
| success_rate = 100.0 | |
| # Get version info | |
| try: | |
| catllm_version = catllm.__version__ | |
| except AttributeError: | |
| catllm_version = "unknown" | |
| python_version = sys.version.split()[0] | |
| # Generate methodology report | |
| pdf_path = generate_methodology_report_pdf( | |
| model=model, | |
| column_name=description, | |
| num_rows=total_items, | |
| model_source=model_source, | |
| filename=original_filename, | |
| success_rate=success_rate, | |
| result_df=result_df, | |
| processing_time=processing_time, | |
| catllm_version=catllm_version, | |
| python_version=python_version, | |
| input_type=input_type_selected, | |
| description=description, | |
| focus=focus if focus else None, | |
| max_length=max_length | |
| ) | |
| # Generate code | |
| code = generate_summarize_code( | |
| input_type_selected, description, model, model_source, | |
| focus=focus if focus else None, | |
| max_length=max_length, | |
| instructions=instructions if instructions else None, | |
| mode=mode | |
| ) | |
| st.session_state.results = { | |
| 'df': result_df, | |
| 'csv_path': csv_path, | |
| 'pdf_path': pdf_path, | |
| 'code': code, | |
| 'status': f"Summarized {total_items} items in {processing_time:.1f}s", | |
| } | |
| st.success(f"Summarized {total_items} items in {processing_time:.1f}s") | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error: {str(e)}") | |
| with col_output: | |
| st.markdown("### Results") | |
| if st.session_state.results: | |
| results = st.session_state.results | |
| # Placeholder for future chart | |
| st.info("Summary visualization coming soon!") | |
| # Results dataframe | |
| display_df = results['df'].copy() | |
| cols_to_hide = ['model_response', 'json', 'raw_response', 'raw_json'] | |
| display_df = display_df.drop(columns=[c for c in cols_to_hide if c in display_df.columns]) | |
| st.dataframe(display_df, use_container_width=True) | |
| # Downloads | |
| col_dl1, col_dl2 = st.columns(2) | |
| with col_dl1: | |
| with open(results['csv_path'], 'rb') as f: | |
| st.download_button( | |
| "Download Results (CSV)", | |
| data=f, | |
| file_name="summarized_results.csv", | |
| mime="text/csv" | |
| ) | |
| with col_dl2: | |
| with open(results['pdf_path'], 'rb') as f: | |
| st.download_button( | |
| "Download Methodology Report (PDF)", | |
| data=f, | |
| file_name="methodology_report.pdf", | |
| mime="application/pdf" | |
| ) | |
| # Code | |
| with st.expander("See the Code"): | |
| st.code(results['code'], language='python') | |
| else: | |
| st.info("Upload data and click 'Summarize Data' to see results here.") | |
| # Bottom buttons | |
| col_reset, col_code = st.columns(2) | |
| with col_reset: | |
| if st.button("Reset", type="secondary", use_container_width=True): | |
| st.session_state.results = None | |
| if hasattr(st.session_state, 'example_loaded'): | |
| del st.session_state.example_loaded | |
| st.rerun() | |
| with col_code: | |
| if st.session_state.results: | |
| if st.button("See in Code", use_container_width=True): | |
| st.session_state.show_code_modal = True | |
| # Code modal/dialog | |
| if st.session_state.get('show_code_modal') and st.session_state.results: | |
| st.markdown("---") | |
| st.markdown("### Reproducibility Code") | |
| st.markdown("Use this code to reproduce the summarization with the CatLLM Python package:") | |
| st.code(st.session_state.results['code'], language='python') | |
| if st.button("Close"): | |
| st.session_state.show_code_modal = False | |
| st.rerun() | |