Spaces:
Configuration error
Configuration error
| """ | |
| Auto-FineTune-Ops: Streamlit Dashboard | |
| ====================================== | |
| Premium interactive dashboard for ML fine-tuning pipeline. | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from pathlib import Path | |
| import sys | |
| import os | |
| import json | |
| import time | |
| from datetime import datetime | |
| # Add project root to path | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Auto-FineTune-Ops", | |
| page_icon="π€", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Premium CSS styling | |
| st.markdown(""" | |
| <style> | |
| /* Main container */ | |
| .main .block-container { | |
| padding-top: 2rem; | |
| padding-bottom: 2rem; | |
| } | |
| /* Cards */ | |
| .stMetric { | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); | |
| padding: 1rem; | |
| border-radius: 12px; | |
| border: 1px solid rgba(99, 102, 241, 0.2); | |
| box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3); | |
| } | |
| /* Gradient headers */ | |
| .gradient-header { | |
| background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-size: 2.5rem; | |
| font-weight: 700; | |
| margin-bottom: 1rem; | |
| } | |
| /* Info cards */ | |
| .info-card { | |
| background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); | |
| padding: 1.5rem; | |
| border-radius: 16px; | |
| border: 1px solid rgba(99, 102, 241, 0.3); | |
| margin: 1rem 0; | |
| } | |
| /* Success badge */ | |
| .success-badge { | |
| background: linear-gradient(90deg, #10b981, #059669); | |
| color: white; | |
| padding: 0.5rem 1rem; | |
| border-radius: 20px; | |
| font-weight: 600; | |
| display: inline-block; | |
| } | |
| /* Warning badge */ | |
| .warning-badge { | |
| background: linear-gradient(90deg, #f59e0b, #d97706); | |
| color: white; | |
| padding: 0.5rem 1rem; | |
| border-radius: 20px; | |
| font-weight: 600; | |
| display: inline-block; | |
| } | |
| /* Sidebar styling */ | |
| section[data-testid="stSidebar"] { | |
| background: linear-gradient(180deg, #0f0f23 0%, #1a1a2e 100%); | |
| } | |
| /* Button styling */ | |
| .stButton > button { | |
| background: linear-gradient(90deg, #6366f1, #8b5cf6); | |
| color: white; | |
| border: none; | |
| border-radius: 8px; | |
| padding: 0.5rem 2rem; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton > button:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 20px rgba(99, 102, 241, 0.4); | |
| } | |
| /* Progress bar */ | |
| .stProgress > div > div { | |
| background: linear-gradient(90deg, #6366f1, #8b5cf6, #a855f7); | |
| } | |
| /* Tab styling */ | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 8px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| background: rgba(99, 102, 241, 0.1); | |
| border-radius: 8px; | |
| padding: 0.5rem 1rem; | |
| } | |
| .stTabs [aria-selected="true"] { | |
| background: linear-gradient(90deg, #6366f1, #8b5cf6); | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state | |
| if 'current_page' not in st.session_state: | |
| st.session_state.current_page = 'home' | |
| if 'uploaded_data' not in st.session_state: | |
| st.session_state.uploaded_data = None | |
| if 'processed_data_path' not in st.session_state: | |
| st.session_state.processed_data_path = None | |
| if 'model_path' not in st.session_state: | |
| st.session_state.model_path = None | |
| if 'training_goal' not in st.session_state: | |
| st.session_state.training_goal = None | |
| if 'pipeline_status' not in st.session_state: | |
| st.session_state.pipeline_status = { | |
| 'data': 'pending', | |
| 'training': 'pending', | |
| 'evaluation': 'pending', | |
| 'deployment': 'pending' | |
| } | |
| # Sidebar navigation | |
| with st.sidebar: | |
| st.markdown('<p class="gradient-header" style="font-size: 1.5rem;">π€ Auto-FineTune-Ops</p>', unsafe_allow_html=True) | |
| st.markdown("---") | |
| # Navigation | |
| pages = { | |
| 'home': ('π ', 'Dashboard'), | |
| 'data': ('π', 'Data Upload'), | |
| 'process': ('π§Ή', 'Processing'), | |
| 'training': ('π', 'Training'), | |
| 'evaluation': ('βοΈ', 'Evaluation'), | |
| 'deploy': ('π', 'Deploy') | |
| } | |
| for key, (icon, label) in pages.items(): | |
| if st.button(f"{icon} {label}", key=f"nav_{key}", use_container_width=True): | |
| st.session_state.current_page = key | |
| st.markdown("---") | |
| # Pipeline status | |
| st.markdown("### π Pipeline Status") | |
| status_icons = {'pending': 'β³', 'running': 'π', 'complete': 'β ', 'error': 'β'} | |
| for stage, status in st.session_state.pipeline_status.items(): | |
| st.markdown(f"{status_icons.get(status, 'β³')} **{stage.title()}**: {status}") | |
| st.markdown("---") | |
| st.markdown("*Built with β€οΈ using Streamlit*") | |
| # ============================================================================ | |
| # PAGE: HOME DASHBOARD | |
| # ============================================================================ | |
| def render_home(): | |
| st.markdown('<p class="gradient-header">π Pipeline Dashboard</p>', unsafe_allow_html=True) | |
| st.markdown("**One-click autonomous ML fine-tuning pipeline**") | |
| # Status cards | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| label="π Dataset", | |
| value="Ready" if st.session_state.uploaded_data is not None else "Not Loaded", | |
| delta="Uploaded" if st.session_state.uploaded_data is not None else None | |
| ) | |
| with col2: | |
| st.metric( | |
| label="π§Ή Processing", | |
| value=st.session_state.pipeline_status['data'].title(), | |
| delta="Complete" if st.session_state.pipeline_status['data'] == 'complete' else None | |
| ) | |
| with col3: | |
| st.metric( | |
| label="π Training", | |
| value=st.session_state.pipeline_status['training'].title(), | |
| delta="Complete" if st.session_state.pipeline_status['training'] == 'complete' else None | |
| ) | |
| with col4: | |
| st.metric( | |
| label="βοΈ Evaluation", | |
| value=st.session_state.pipeline_status['evaluation'].title(), | |
| delta="Complete" if st.session_state.pipeline_status['evaluation'] == 'complete' else None | |
| ) | |
| st.markdown("---") | |
| # Quick start guide | |
| st.markdown("### π Quick Start Guide") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>π Step 1: Upload Data</h4> | |
| <p>Upload your CSV/JSON dataset with instruction-response pairs.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>π§Ή Step 2: Process Data</h4> | |
| <p>The DataArchitectAgent will clean and format your data.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with col2: | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>π Step 3: Train Model</h4> | |
| <p>Fine-tune with auto-configured hyperparameters.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>βοΈ Step 4: Evaluate</h4> | |
| <p>Run Model Arena with LLM-as-Judge evaluation.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Recent output files | |
| st.markdown("---") | |
| st.markdown("### π Output Files") | |
| output_dir = Path("./output") | |
| if output_dir.exists(): | |
| tabs = st.tabs(["π Models", "π Reports", "π Logs"]) | |
| with tabs[0]: | |
| models_dir = output_dir / "models" | |
| if models_dir.exists(): | |
| models = list(models_dir.glob("*")) | |
| if models: | |
| for model in models[:5]: | |
| st.markdown(f"- π€ `{model.name}`") | |
| else: | |
| st.info("No trained models yet.") | |
| else: | |
| st.info("Models directory not found.") | |
| with tabs[1]: | |
| reports_dir = output_dir / "reports" | |
| if reports_dir.exists(): | |
| reports = list(reports_dir.glob("*.json")) | |
| if reports: | |
| for report in reports[:5]: | |
| st.markdown(f"- π `{report.name}`") | |
| else: | |
| st.info("No evaluation reports yet.") | |
| else: | |
| st.info("Reports directory not found.") | |
| with tabs[2]: | |
| logs_dir = output_dir / "logs" | |
| if logs_dir.exists(): | |
| logs = list(logs_dir.glob("*.yaml")) | |
| if logs: | |
| for log in logs[:5]: | |
| st.markdown(f"- π `{log.name}`") | |
| else: | |
| st.info("No log files yet.") | |
| else: | |
| st.info("Logs directory not found.") | |
| else: | |
| st.info("Output directory will be created when you run the pipeline.") | |
| # ============================================================================ | |
| # PAGE: DATA UPLOAD | |
| # ============================================================================ | |
| def render_data_upload(): | |
| st.markdown('<p class="gradient-header">π Data Upload & Preview</p>', unsafe_allow_html=True) | |
| # ββ File Management Bar ββ | |
| if st.session_state.uploaded_data is not None: | |
| fm1, fm2, fm3 = st.columns([3, 1, 1]) | |
| with fm1: | |
| st.info(f"π Currently loaded: **{st.session_state.get('uploaded_filename', 'dataset')}** ({len(st.session_state.uploaded_data):,} rows)") | |
| with fm2: | |
| if st.button("ποΈ Remove Dataset", type="secondary"): | |
| st.session_state.uploaded_data = None | |
| st.session_state.uploaded_filename = None | |
| st.session_state.processed_data_path = None | |
| st.session_state.pipeline_status['data'] = 'pending' | |
| st.rerun() | |
| with fm3: | |
| if st.button("π Add More Data"): | |
| st.session_state['show_add_file'] = True | |
| # ββ File Uploader ββ | |
| show_uploader = (st.session_state.uploaded_data is None) or st.session_state.get('show_add_file', False) | |
| if show_uploader: | |
| upload_label = "Upload your dataset (CSV, JSON, or JSONL)" if st.session_state.uploaded_data is None else "Upload additional file to merge with current dataset" | |
| uploaded_file = st.file_uploader( | |
| upload_label, | |
| type=['csv', 'json', 'jsonl'], | |
| help="Your dataset should contain instruction-response pairs.", | |
| key=f"uploader_{st.session_state.get('upload_counter', 0)}" | |
| ) | |
| if uploaded_file: | |
| try: | |
| if uploaded_file.name.endswith('.csv'): | |
| new_df = pd.read_csv(uploaded_file) | |
| elif uploaded_file.name.endswith('.jsonl'): | |
| new_df = pd.read_json(uploaded_file, lines=True) | |
| else: | |
| new_df = pd.read_json(uploaded_file) | |
| # Merge or replace | |
| if st.session_state.uploaded_data is not None and st.session_state.get('show_add_file', False): | |
| existing_df = st.session_state.uploaded_data | |
| if list(new_df.columns) == list(existing_df.columns): | |
| st.session_state.uploaded_data = pd.concat([existing_df, new_df], ignore_index=True) | |
| st.session_state.uploaded_filename = f"{st.session_state.get('uploaded_filename', 'data')} + {uploaded_file.name}" | |
| st.success(f"β Merged **{uploaded_file.name}** ({len(new_df):,} rows) β Total: **{len(st.session_state.uploaded_data):,}** rows") | |
| else: | |
| st.error(f"β Column mismatch! Existing: {list(existing_df.columns)} vs New: {list(new_df.columns)}") | |
| else: | |
| st.session_state.uploaded_data = new_df | |
| st.session_state.uploaded_filename = uploaded_file.name | |
| st.success(f"β Successfully loaded **{uploaded_file.name}**") | |
| st.session_state['show_add_file'] = False | |
| st.session_state['upload_counter'] = st.session_state.get('upload_counter', 0) + 1 | |
| except Exception as e: | |
| st.error(f"Error loading file: {str(e)}") | |
| # ββ Data Display ββ | |
| if st.session_state.uploaded_data is not None: | |
| df = st.session_state.uploaded_data | |
| # Dataset statistics | |
| st.markdown("### π Dataset Statistics") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Rows", f"{len(df):,}") | |
| with col2: | |
| st.metric("Total Columns", len(df.columns)) | |
| with col3: | |
| total_bytes = df.memory_usage(deep=True).sum() | |
| st.metric("Memory Size", f"{total_bytes / 1024:.1f} KB") | |
| with col4: | |
| missing = df.isnull().sum().sum() | |
| st.metric("Missing Values", missing) | |
| st.markdown("---") | |
| # Column detection | |
| st.markdown("### π Auto-Detected Columns") | |
| instruction_patterns = ['instruction', 'prompt', 'question', 'query', 'user', 'input_text'] | |
| output_patterns = ['output', 'response', 'answer', 'completion', 'assistant', 'target'] | |
| detected_instruction = None | |
| detected_output = None | |
| for col in df.columns: | |
| col_lower = col.lower() | |
| for pattern in instruction_patterns: | |
| if pattern in col_lower and not detected_instruction: | |
| detected_instruction = col | |
| for pattern in output_patterns: | |
| if pattern in col_lower and not detected_output: | |
| detected_output = col | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if detected_instruction: | |
| st.markdown(f'<span class="success-badge">Instruction: {detected_instruction}</span>', unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<span class="warning-badge">Instruction: Not detected</span>', unsafe_allow_html=True) | |
| with col2: | |
| if detected_output: | |
| st.markdown(f'<span class="success-badge">Output: {detected_output}</span>', unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<span class="warning-badge">Output: Not detected</span>', unsafe_allow_html=True) | |
| st.markdown("---") | |
| # Full data preview (scrollable) | |
| st.markdown("### π Complete Data Preview") | |
| st.caption(f"Showing all **{len(df):,}** rows. Scroll to browse the full dataset.") | |
| st.dataframe(df, use_container_width=True, height=450) | |
| # Download raw data | |
| st.markdown("### π₯ Download Dataset") | |
| dl1, dl2 = st.columns(2) | |
| with dl1: | |
| csv_data = df.to_csv(index=False).encode('utf-8') | |
| st.download_button("β¬οΈ Download as CSV", csv_data, | |
| file_name=f"{st.session_state.get('uploaded_filename', 'dataset').rsplit('.', 1)[0]}.csv", | |
| mime="text/csv") | |
| with dl2: | |
| json_data = df.to_json(orient='records', indent=2).encode('utf-8') | |
| st.download_button("β¬οΈ Download as JSON", json_data, | |
| file_name=f"{st.session_state.get('uploaded_filename', 'dataset').rsplit('.', 1)[0]}.json", | |
| mime="application/json") | |
| # Column summary | |
| st.markdown("### π Column Summary") | |
| col_info = [] | |
| for col in df.columns: | |
| col_info.append({ | |
| 'Column': col, | |
| 'Type': str(df[col].dtype), | |
| 'Non-Null': df[col].notna().sum(), | |
| 'Unique': df[col].nunique(), | |
| 'Sample': str(df[col].iloc[0])[:80] + '...' if len(str(df[col].iloc[0])) > 80 else str(df[col].iloc[0]) | |
| }) | |
| st.dataframe(pd.DataFrame(col_info), use_container_width=True) | |
| # ============================================================================ | |
| # PAGE: DATA PROCESSING | |
| # ============================================================================ | |
| def render_processing(): | |
| st.markdown('<p class="gradient-header">π§Ή Advanced Data Processing</p>', unsafe_allow_html=True) | |
| if st.session_state.uploaded_data is None: | |
| st.warning("β οΈ Please upload a dataset first!") | |
| if st.button("π Go to Data Upload"): | |
| st.session_state.current_page = 'data' | |
| st.rerun() | |
| return | |
| df = st.session_state.uploaded_data | |
| # ββ Dataset Stats Header ββ | |
| st.markdown("### π Dataset Statistics") | |
| sc1, sc2, sc3, sc4 = st.columns(4) | |
| with sc1: | |
| st.metric("Total Rows", f"{len(df):,}") | |
| with sc2: | |
| st.metric("Columns", len(df.columns)) | |
| with sc3: | |
| avg_len = int(df.iloc[:, 0].astype(str).str.len().mean()) if len(df) > 0 else 0 | |
| st.metric("Avg Text Length", f"{avg_len:,} chars") | |
| with sc4: | |
| est_tokens = int(avg_len * len(df) / 4) if avg_len > 0 else 0 | |
| st.metric("Est. Total Tokens", f"{est_tokens:,}") | |
| st.markdown("---") | |
| # ββ Training Goal ββ | |
| goal = st.text_input( | |
| "Training Goal", | |
| value=st.session_state.training_goal or "assistant", | |
| help="e.g., medical_assistant, customer_support, code_helper" | |
| ) | |
| st.session_state.training_goal = goal | |
| # ββ Column Mapping ββ | |
| st.markdown("### π― Column Mapping") | |
| instruction_patterns = ['instruction', 'prompt', 'question', 'query', 'user', 'input_text', 'human'] | |
| output_patterns = ['output', 'response', 'answer', 'completion', 'assistant', 'target'] | |
| input_patterns = ['context', 'input', 'background', 'reference'] | |
| detected_instruction = detected_output = detected_input = None | |
| available_columns = list(df.columns) | |
| for col in available_columns: | |
| col_lower = col.lower() | |
| for p in instruction_patterns: | |
| if p in col_lower and not detected_instruction: | |
| detected_instruction = col | |
| for p in output_patterns: | |
| if p in col_lower and not detected_output: | |
| detected_output = col | |
| for p in input_patterns: | |
| if p in col_lower and not detected_input: | |
| detected_input = col | |
| mc1, mc2, mc3 = st.columns(3) | |
| with mc1: | |
| instruction_col = st.selectbox("Instruction Column *", options=available_columns, | |
| index=available_columns.index(detected_instruction) if detected_instruction else 0, | |
| help="Column containing instructions/prompts/questions") | |
| with mc2: | |
| output_col = st.selectbox("Output Column *", options=available_columns, | |
| index=available_columns.index(detected_output) if detected_output else (1 if len(available_columns) > 1 else 0), | |
| help="Column containing responses/answers/outputs") | |
| with mc3: | |
| input_col_options = ["None"] + available_columns | |
| default_input_idx = input_col_options.index(detected_input) if detected_input else 0 | |
| input_col_selection = st.selectbox("Input/Context Column (Optional)", options=input_col_options, | |
| index=default_input_idx, help="Optional column containing additional context") | |
| input_col = None if input_col_selection == "None" else input_col_selection | |
| st.markdown("---") | |
| # ββ Safe Preset Button ββ | |
| if st.button("π‘οΈ Load Safe Preset", help="Apply recommended defaults for most datasets"): | |
| st.session_state['safe_preset'] = True | |
| st.rerun() | |
| use_safe = st.session_state.get('safe_preset', False) | |
| # ==================================================================== | |
| # 1οΈβ£ Text Cleaning Controls | |
| # ==================================================================== | |
| with st.expander("1οΈβ£ Text Cleaning Controls", expanded=False): | |
| tc1, tc2 = st.columns(2) | |
| with tc1: | |
| clean_html = st.checkbox("Remove HTML Tags", value=use_safe, help="Strip all HTML/XML tags from text") | |
| clean_urls = st.checkbox("Remove URLs", value=use_safe, help="Remove http/https/www links") | |
| clean_emojis = st.checkbox("Remove Emojis", value=False, help="Strip emoji characters") | |
| clean_whitespace = st.checkbox("Normalize Whitespace", value=True, help="Collapse multiple spaces/tabs into one") | |
| with tc2: | |
| clean_lowercase = st.checkbox("Lowercase All Text", value=False, help="Convert text to lowercase (disable to preserve case)") | |
| clean_special = st.checkbox("Remove Special Characters", value=False, help="Keep only alphanumeric + basic punctuation") | |
| clean_linebreaks = st.checkbox("Strip Extra Line Breaks", value=True, help="Reduce 3+ newlines to double newlines") | |
| # ==================================================================== | |
| # 2οΈβ£ Tokenization Controls | |
| # ==================================================================== | |
| with st.expander("2οΈβ£ Tokenization Controls", expanded=False): | |
| tk1, tk2 = st.columns(2) | |
| with tk1: | |
| tokenizer_choice = st.selectbox("Tokenizer", ["tiktoken", "HuggingFace"], | |
| help="tiktoken = OpenAI-compatible, HuggingFace = model-specific tokenizer") | |
| if tokenizer_choice == "HuggingFace": | |
| hf_model_name = st.text_input("HF Model Name", value="meta-llama/Llama-3-8b", | |
| help="HuggingFace model name for tokenizer") | |
| else: | |
| hf_model_name = "" | |
| max_total_tokens = st.slider("Max Tokens per Sample", 128, 8192, 2048, | |
| help="Maximum total tokens allowed per sample") | |
| with tk2: | |
| truncate_long = st.checkbox("Truncate Long Samples", value=False, | |
| help="Cut text exceeding max tokens") | |
| split_long = st.checkbox("Split Long Samples into Chunks", value=False, | |
| help="Break long texts into overlapping chunks") | |
| if split_long: | |
| split_overlap = st.slider("Chunk Overlap Tokens", 0, 200, 50, | |
| help="Number of overlapping tokens between chunks") | |
| else: | |
| split_overlap = 50 | |
| # Token stats preview | |
| if st.button("π Show Token Stats Preview", key="token_stats_btn"): | |
| with st.spinner("Counting tokens..."): | |
| try: | |
| from preprocessing.tokenization import TokenizationConfig, get_tokenizer, compute_token_stats | |
| tk_cfg = TokenizationConfig( | |
| tokenizer_name="tiktoken" if tokenizer_choice == "tiktoken" else hf_model_name, | |
| ) | |
| tokenizer = get_tokenizer(tk_cfg) | |
| is_tiktoken = tokenizer_choice == "tiktoken" | |
| stats_cols = [c for c in [instruction_col, output_col] if c in df.columns] | |
| stats = compute_token_stats(df.head(200), stats_cols, tokenizer, is_tiktoken) | |
| for col_name, s in stats.items(): | |
| st.markdown(f"**{col_name}**: min={s['min']}, max={s['max']}, mean={s['mean']}, p95={s['p95']}") | |
| except Exception as e: | |
| st.warning(f"Could not compute token stats: {e}") | |
| # ==================================================================== | |
| # 3οΈβ£ System Prompt Configuration | |
| # ==================================================================== | |
| with st.expander("3οΈβ£ System Prompt Configuration", expanded=False): | |
| system_prompt_text = st.text_area("Global System Prompt", | |
| value="You are a helpful AI assistant." if not use_safe else "You are a helpful AI assistant.", | |
| height=100, help="System prompt prepended to every sample in chat format") | |
| prepend_system = st.checkbox("Prepend System Prompt to All Samples", value=True, | |
| help="Include this system prompt in all formatted entries") | |
| if st.button("ποΈ Preview Formatted Chat JSON", key="preview_chat_btn"): | |
| try: | |
| from preprocessing.system_prompt import preview_formatted_json | |
| preview = preview_formatted_json(df, system_prompt_text, instruction_col, output_col, input_col, n=2) | |
| st.code(preview, language="json") | |
| except Exception as e: | |
| st.warning(f"Preview error: {e}") | |
| # ==================================================================== | |
| # 4οΈβ£ Dataset Balancing | |
| # ==================================================================== | |
| with st.expander("4οΈβ£ Dataset Balancing (Classification)", expanded=False): | |
| balance_enabled = st.checkbox("Enable Class Balancing", value=False, | |
| help="Balance class distribution for classification tasks") | |
| if balance_enabled: | |
| label_col_options = available_columns | |
| label_col = st.selectbox("Label Column", options=label_col_options, | |
| help="Column containing class labels") | |
| balance_strategy = st.radio("Strategy", ["none", "oversample", "undersample"], | |
| help="Oversample = duplicate minority, Undersample = drop majority") | |
| # Show distribution chart | |
| if label_col in df.columns: | |
| from preprocessing.dataset_balancing import compute_label_distribution | |
| dist = compute_label_distribution(df, label_col) | |
| if dist: | |
| fig = px.bar(x=list(dist.keys()), y=list(dist.values()), | |
| labels={'x': 'Label', 'y': 'Count'}, title="Label Distribution") | |
| fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', | |
| font_color='#e2e8f0') | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| label_col = None | |
| balance_strategy = "none" | |
| # ==================================================================== | |
| # 5οΈβ£ Quality Filters | |
| # ==================================================================== | |
| with st.expander("5οΈβ£ Quality Filters", expanded=False): | |
| qf1, qf2 = st.columns(2) | |
| with qf1: | |
| min_words = st.number_input("Min Word Count", min_value=0, value=3 if use_safe else 0, | |
| help="Minimum words required per sample (0 = no filter)") | |
| max_words = st.number_input("Max Word Count", min_value=0, value=0, | |
| help="Maximum words allowed per sample (0 = no limit)") | |
| profanity_filter = st.checkbox("Profanity Filter", value=False, | |
| help="Remove samples containing profane language") | |
| with qf2: | |
| language_filter = st.checkbox("Language Detection Filter", value=False, | |
| help="Keep only samples in specified languages") | |
| if language_filter: | |
| allowed_langs = st.text_input("Allowed Languages (comma-separated)", value="en", | |
| help="ISO 639-1 codes, e.g. en,fr,de") | |
| else: | |
| allowed_langs = "en" | |
| remove_low_quality = st.checkbox("Remove Low-Quality Responses", value=use_safe, | |
| help="Remove short / generic / placeholder responses") | |
| # ==================================================================== | |
| # 6οΈβ£ Deduplication Advanced | |
| # ==================================================================== | |
| with st.expander("6οΈβ£ Deduplication", expanded=False): | |
| dedup_exact = st.checkbox("Remove Exact Duplicates", value=True, | |
| help="Remove rows with identical instruction text") | |
| dedup_semantic = st.checkbox("Remove Semantic Duplicates", value=False, | |
| help="Use TF-IDF cosine similarity to find near-duplicates") | |
| if dedup_semantic: | |
| semantic_threshold = st.slider("Similarity Threshold", 0.5, 1.0, 0.90, 0.01, | |
| help="Cosine similarity above this threshold = duplicate (higher = stricter)") | |
| else: | |
| semantic_threshold = 0.90 | |
| # ==================================================================== | |
| # 7οΈβ£ Train / Validation Split | |
| # ==================================================================== | |
| with st.expander("7οΈβ£ Train / Validation Split", expanded=False): | |
| split_enabled = st.checkbox("Enable Train/Val Split", value=True, | |
| help="Split dataset into training and validation sets") | |
| if split_enabled: | |
| train_ratio = st.slider("Train Ratio", 0.5, 0.95, 0.9 if use_safe else 0.8, 0.05, | |
| help="Proportion of data used for training") | |
| st.markdown(f"**Split**: {int(train_ratio*100)}% Train / {int((1-train_ratio)*100)}% Validation") | |
| random_seed = st.number_input("Random Seed", min_value=0, value=42, | |
| help="Seed for reproducible splits") | |
| shuffle_data = st.checkbox("Shuffle Before Split", value=True, | |
| help="Randomly shuffle data before splitting") | |
| else: | |
| train_ratio = 0.8 | |
| random_seed = 42 | |
| shuffle_data = True | |
| # ==================================================================== | |
| # 8οΈβ£ Output Formatting | |
| # ==================================================================== | |
| with st.expander("8οΈβ£ Output Formatting", expanded=False): | |
| format_type = st.selectbox("Export Format", ["openai_chat", "completion", "classification", "custom"], | |
| help="OpenAI Chat = messages format, Completion = prompt/completion, Classification = text/label") | |
| custom_schema = {} | |
| if format_type == "custom": | |
| st.markdown("**Define Custom Schema** (output_key β source_column)") | |
| num_fields = st.number_input("Number of Fields", 1, 10, 2) | |
| for i in range(int(num_fields)): | |
| fc1, fc2 = st.columns(2) | |
| with fc1: | |
| key = st.text_input(f"Output Key {i+1}", value=f"field_{i+1}", key=f"ckey_{i}") | |
| with fc2: | |
| val = st.selectbox(f"Source Column {i+1}", options=available_columns, key=f"cval_{i}") | |
| custom_schema[key] = val | |
| # ==================================================================== | |
| # 9οΈβ£ Safety & PII Filtering | |
| # ==================================================================== | |
| with st.expander("9οΈβ£ Safety & PII Filtering", expanded=False): | |
| pii1, pii2 = st.columns(2) | |
| with pii1: | |
| pii_emails = st.checkbox("Detect & Mask Emails", value=use_safe, | |
| help="Replace email addresses with [REDACTED]") | |
| pii_phones = st.checkbox("Detect & Mask Phone Numbers", value=use_safe, | |
| help="Replace phone numbers with [REDACTED]") | |
| pii_ids = st.checkbox("Detect & Mask CNIC/SSN", value=use_safe, | |
| help="Replace national ID / SSN patterns with [REDACTED]") | |
| with pii2: | |
| pii_keys = st.checkbox("Detect & Mask API Keys", value=use_safe, | |
| help="Replace long hex/base64 strings that look like secrets") | |
| pii_addresses = st.checkbox("Detect & Mask Addresses", value=False, | |
| help="Replace street addresses and zip codes") | |
| # ==================================================================== | |
| # π Augmentation (Optional) | |
| # ==================================================================== | |
| with st.expander("π Augmentation (Optional)", expanded=False): | |
| aug_enabled = st.checkbox("Enable Data Augmentation", value=False, | |
| help="Generate synthetic variations of existing samples") | |
| if aug_enabled: | |
| ag1, ag2 = st.columns(2) | |
| with ag1: | |
| aug_paraphrase = st.checkbox("Paraphrase Instructions", value=True, | |
| help="Synonym-based paraphrasing of instructions") | |
| aug_variations = st.checkbox("Generate Variations", value=False, | |
| help="Minor text variations (punctuation, casing)") | |
| with ag2: | |
| aug_backtranslate = st.checkbox("Back Translation", value=False, | |
| help="Simulate back-translation for diversity") | |
| aug_tone = st.checkbox("Tone Rewriting", value=False, | |
| help="Rewrite instructions in different tones") | |
| aug_factor = st.slider("Augmentation Factor", 1, 5, 1, | |
| help="Number of augmented copies per original sample") | |
| else: | |
| aug_paraphrase = aug_variations = aug_backtranslate = aug_tone = False | |
| aug_factor = 1 | |
| st.markdown("---") | |
| # ββ Run Pipeline Button ββ | |
| if st.button("π Run Advanced Processing Pipeline", type="primary", use_container_width=True): | |
| st.session_state.pipeline_status['data'] = 'running' | |
| with st.spinner("Running preprocessing pipeline..."): | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| try: | |
| from preprocessing.pipeline import PreprocessingPipeline, PreprocessingConfig | |
| from preprocessing.text_cleaning import TextCleaningConfig | |
| from preprocessing.tokenization import TokenizationConfig | |
| from preprocessing.system_prompt import SystemPromptConfig | |
| from preprocessing.dataset_balancing import BalancingConfig | |
| from preprocessing.quality_filters import QualityFilterConfig | |
| from preprocessing.deduplication import DeduplicationConfig | |
| from preprocessing.train_val_split import SplitConfig | |
| from preprocessing.output_formatter import OutputFormatConfig, format_dataset, export_jsonl, generate_preview | |
| from preprocessing.pii_filter import PIIFilterConfig | |
| from preprocessing.augmentation import AugmentationConfig | |
| # Build config from UI values | |
| config = PreprocessingConfig( | |
| instruction_col=instruction_col, | |
| output_col=output_col, | |
| input_col=input_col, | |
| label_col=label_col if balance_enabled else None, | |
| text_cleaning=TextCleaningConfig( | |
| remove_html=clean_html, remove_urls=clean_urls, | |
| remove_emojis=clean_emojis, normalize_whitespace=clean_whitespace, | |
| lowercase=clean_lowercase, remove_special_chars=clean_special, | |
| strip_extra_linebreaks=clean_linebreaks, | |
| ), | |
| tokenization=TokenizationConfig( | |
| tokenizer_name="tiktoken" if tokenizer_choice == "tiktoken" else hf_model_name, | |
| max_total_tokens=max_total_tokens, | |
| truncate_long=truncate_long, split_long=split_long, | |
| split_overlap=split_overlap, | |
| ), | |
| system_prompt=SystemPromptConfig( | |
| system_prompt=system_prompt_text, | |
| prepend_to_all=prepend_system, | |
| ), | |
| balancing=BalancingConfig( | |
| enabled=balance_enabled, | |
| label_column=label_col if balance_enabled else "", | |
| strategy=balance_strategy if balance_enabled else "none", | |
| ), | |
| quality_filters=QualityFilterConfig( | |
| min_word_count=min_words, max_word_count=max_words, | |
| profanity_filter=profanity_filter, | |
| language_filter=language_filter, | |
| allowed_languages=[l.strip() for l in allowed_langs.split(',')], | |
| remove_low_quality=remove_low_quality, | |
| ), | |
| deduplication=DeduplicationConfig( | |
| remove_exact=dedup_exact, remove_semantic=dedup_semantic, | |
| semantic_threshold=semantic_threshold, | |
| ), | |
| split=SplitConfig( | |
| enabled=split_enabled, train_ratio=train_ratio, | |
| random_seed=int(random_seed), shuffle=shuffle_data, | |
| ), | |
| output_format=OutputFormatConfig( | |
| format_type=format_type, custom_schema=custom_schema, | |
| ), | |
| pii_filter=PIIFilterConfig( | |
| filter_emails=pii_emails, filter_phones=pii_phones, | |
| filter_id_numbers=pii_ids, filter_api_keys=pii_keys, | |
| filter_addresses=pii_addresses, | |
| ), | |
| augmentation=AugmentationConfig( | |
| enabled=aug_enabled, paraphrase=aug_paraphrase, | |
| generate_variations=aug_variations, | |
| back_translate=aug_backtranslate, | |
| tone_rewrite=aug_tone, | |
| augmentation_factor=aug_factor, | |
| ), | |
| ) | |
| def progress_cb(stage_name, pct): | |
| status_text.text(f"βοΈ {stage_name}...") | |
| progress_bar.progress(min(pct, 100)) | |
| pipeline = PreprocessingPipeline(config) | |
| train_df, val_df, logs = pipeline.run(df, progress_callback=progress_cb) | |
| # Format output | |
| sys_prompt = system_prompt_text if prepend_system else "" | |
| formatted_data = format_dataset( | |
| train_df, config.output_format, | |
| system_prompt=sys_prompt, | |
| instruction_col=instruction_col, | |
| output_col=output_col, | |
| input_col=input_col, | |
| label_col=label_col if balance_enabled else None, | |
| ) | |
| # Export | |
| output_dir = Path("./output/processed_data") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| train_path = export_jsonl(formatted_data, str(output_dir / f"{goal}_train.jsonl")) | |
| val_path = None | |
| if len(val_df) > 0: | |
| val_formatted = format_dataset( | |
| val_df, config.output_format, | |
| system_prompt=sys_prompt, | |
| instruction_col=instruction_col, | |
| output_col=output_col, | |
| input_col=input_col, | |
| label_col=label_col if balance_enabled else None, | |
| ) | |
| val_path = export_jsonl(val_formatted, str(output_dir / f"{goal}_val.jsonl")) | |
| progress_bar.progress(100) | |
| status_text.text("β Pipeline complete!") | |
| st.session_state.processed_data_path = train_path | |
| st.session_state.pipeline_status['data'] = 'complete' | |
| # ββ Results ββ | |
| st.success(f"β Training data saved to: `{train_path}`") | |
| if val_path: | |
| st.success(f"β Validation data saved to: `{val_path}`") | |
| # Stats | |
| rc1, rc2, rc3, rc4 = st.columns(4) | |
| with rc1: | |
| st.metric("Original Rows", f"{len(df):,}") | |
| with rc2: | |
| st.metric("Train Samples", f"{len(train_df):,}") | |
| with rc3: | |
| st.metric("Val Samples", f"{len(val_df):,}") | |
| with rc4: | |
| removed = len(df) - len(train_df) - len(val_df) | |
| st.metric("Removed", f"{max(0, removed):,}") | |
| # ββ Pipeline Logs ββ | |
| st.markdown("### π Pipeline Logs") | |
| log_data = [] | |
| for log in logs: | |
| log_data.append({ | |
| 'Stage': log.stage, | |
| 'Description': log.description, | |
| 'Rows Before': log.rows_before, | |
| 'Rows After': log.rows_after, | |
| 'Delta': log.rows_delta, | |
| 'Time (ms)': log.duration_ms, | |
| }) | |
| st.dataframe(pd.DataFrame(log_data), use_container_width=True) | |
| # ββ Preview ββ | |
| st.markdown("### ποΈ Output Preview") | |
| preview_json = generate_preview(formatted_data, n=3) | |
| st.code(preview_json, language="json") | |
| # ββ Download ββ | |
| st.markdown("### π₯ Download") | |
| dl1, dl2 = st.columns(2) | |
| with dl1: | |
| with open(train_path, 'r', encoding='utf-8') as f: | |
| st.download_button("β¬οΈ Download Train JSONL", f.read(), | |
| file_name=f"{goal}_train.jsonl", mime="application/jsonl") | |
| with dl2: | |
| if val_path and Path(val_path).exists(): | |
| with open(val_path, 'r', encoding='utf-8') as f: | |
| st.download_button("β¬οΈ Download Val JSONL", f.read(), | |
| file_name=f"{goal}_val.jsonl", mime="application/jsonl") | |
| except Exception as e: | |
| st.session_state.pipeline_status['data'] = 'error' | |
| st.error(f"β Pipeline Error: {str(e)}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| # Show previously processed data | |
| if st.session_state.processed_data_path: | |
| st.markdown("---") | |
| st.markdown("### π Last Processed Data") | |
| try: | |
| processed_path = Path(st.session_state.processed_data_path) | |
| if processed_path.exists(): | |
| with open(processed_path, encoding='utf-8') as f: | |
| samples = [json.loads(line) for line in f.readlines()[:5]] | |
| for i, sample in enumerate(samples): | |
| with st.expander(f"Sample {i+1}"): | |
| st.json(sample) | |
| except Exception as e: | |
| st.warning(f"Could not load preview: {e}") | |
| # ============================================================================ | |
| # PAGE: TRAINING | |
| # ============================================================================ | |
| def render_training(): | |
| st.markdown('<p class="gradient-header">π Model Training</p>', unsafe_allow_html=True) | |
| # Check prerequisites | |
| if st.session_state.processed_data_path is None: | |
| st.warning("β οΈ Please process your data first!") | |
| if st.button("π§Ή Go to Processing"): | |
| st.session_state.current_page = 'process' | |
| st.rerun() | |
| return | |
| # ββ GPU Detection ββ | |
| try: | |
| import torch | |
| has_gpu = torch.cuda.is_available() | |
| if has_gpu: | |
| gpu_name = torch.cuda.get_device_name(0) | |
| gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9 | |
| st.success(f"β GPU Available: **{gpu_name}** ({gpu_memory:.1f} GB)") | |
| except Exception: | |
| has_gpu = False | |
| # ββ Download Preprocessed Data (always available) ββ | |
| st.markdown("### π₯ Preprocessed Training Data") | |
| processed_path = Path(st.session_state.processed_data_path) | |
| if processed_path.exists(): | |
| with open(processed_path, 'r', encoding='utf-8') as f: | |
| processed_content = f.read() | |
| dl1, dl2 = st.columns(2) | |
| with dl1: | |
| st.download_button("β¬οΈ Download Training JSONL", processed_content, | |
| file_name=processed_path.name, mime="application/jsonl") | |
| with dl2: | |
| # Check for validation file | |
| val_path = processed_path.parent / processed_path.name.replace('_train', '_val') | |
| if val_path.exists(): | |
| with open(val_path, 'r', encoding='utf-8') as f: | |
| st.download_button("β¬οΈ Download Validation JSONL", f.read(), | |
| file_name=val_path.name, mime="application/jsonl") | |
| try: | |
| sample_count = sum(1 for _ in processed_content.split('\n') if _.strip()) | |
| except Exception: | |
| sample_count = 0 | |
| st.info(f"π Dataset: **{sample_count:,}** samples ready for training") | |
| else: | |
| st.warning("Processed data file not found.") | |
| st.markdown("---") | |
| # ==================================================================== | |
| # TWO PATHS: GPU Training OR Colab Notebook | |
| # ==================================================================== | |
| if has_gpu: | |
| training_mode = "gpu" | |
| else: | |
| training_mode = st.radio("π₯οΈ Select Training Mode", [ | |
| "βοΈ Use Google Colab (Recommended β Free GPU)", | |
| "π€ Upload Fine-Tuned Model (Already trained externally)" | |
| ], help="No GPU detected on this machine. Choose how to proceed.") | |
| # ==================================================================== | |
| # PATH A: GPU Training (local) | |
| # ==================================================================== | |
| if training_mode == "gpu": | |
| st.markdown("### βοΈ Training Configuration") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| model_source = st.radio("Model Source", ["Preset Models", "Custom HuggingFace Model"]) | |
| if model_source == "Preset Models": | |
| base_model = st.selectbox("Base Model", [ | |
| "unsloth/llama-3-8b-bnb-4bit", | |
| "unsloth/llama-3-70b-bnb-4bit", | |
| "unsloth/mistral-7b-bnb-4bit", | |
| "unsloth/gemma-7b-bnb-4bit", | |
| ]) | |
| else: | |
| base_model = st.text_input("HuggingFace Model ID", | |
| value="unsloth/llama-3-8b-bnb-4bit", | |
| help="Enter any HuggingFace model ID, e.g. 'meta-llama/Llama-3-8b', 'mistralai/Mistral-7B-v0.1'") | |
| max_seq_length = st.slider("Max Sequence Length", 512, 4096, 2048) | |
| with col2: | |
| dataset_size = sample_count if sample_count > 0 else 1000 | |
| if dataset_size < 1000: | |
| auto_rank, auto_alpha, auto_lr, auto_epochs = 8, 16, 2e-4, 5 | |
| size_category = "Small" | |
| elif dataset_size < 10000: | |
| auto_rank, auto_alpha, auto_lr, auto_epochs = 16, 32, 1e-4, 3 | |
| size_category = "Medium" | |
| else: | |
| auto_rank, auto_alpha, auto_lr, auto_epochs = 32, 64, 5e-5, 2 | |
| size_category = "Large" | |
| st.success(f"Auto-configured for **{size_category}** dataset ({dataset_size:,} samples)") | |
| st.markdown("---") | |
| with st.expander("π§ Advanced Hyperparameters"): | |
| hc1, hc2, hc3 = st.columns(3) | |
| with hc1: | |
| lora_rank = st.slider("LoRA Rank", 4, 64, auto_rank) | |
| lora_alpha = st.slider("LoRA Alpha", 8, 128, auto_alpha) | |
| with hc2: | |
| learning_rate = st.select_slider("Learning Rate", | |
| options=[1e-5, 2e-5, 5e-5, 1e-4, 2e-4, 5e-4], value=auto_lr) | |
| num_epochs = st.slider("Epochs", 1, 10, auto_epochs) | |
| with hc3: | |
| batch_size = st.slider("Batch Size", 1, 16, 4) | |
| gradient_accumulation = st.slider("Gradient Accumulation", 1, 8, 4) | |
| st.markdown("---") | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| if st.button("π Start Training", type="primary", use_container_width=True): | |
| st.session_state.pipeline_status['training'] = 'running' | |
| with st.spinner("Training in progress..."): | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| try: | |
| from agents.training_pilot import TrainingPilot, HyperParams | |
| status_text.text("π¦ Loading model...") | |
| progress_bar.progress(10) | |
| pilot = TrainingPilot( | |
| base_model=base_model, | |
| max_seq_length=max_seq_length, | |
| output_dir="./output/models" | |
| ) | |
| status_text.text("π Training...") | |
| progress_bar.progress(30) | |
| result = pilot.run( | |
| data_path=st.session_state.processed_data_path, | |
| output_name=st.session_state.training_goal | |
| ) | |
| progress_bar.progress(100) | |
| status_text.text("β Training complete!") | |
| st.session_state.model_path = result.model_path | |
| st.session_state.pipeline_status['training'] = 'complete' | |
| st.success(f"β Model saved to: `{result.model_path}`") | |
| rc1, rc2, rc3 = st.columns(3) | |
| with rc1: | |
| st.metric("Final Loss", f"{result.final_loss:.4f}") | |
| with rc2: | |
| st.metric("Training Time", f"{result.training_time:.1f}s") | |
| with rc3: | |
| st.metric("Total Steps", result.num_steps) | |
| except Exception as e: | |
| st.session_state.pipeline_status['training'] = 'error' | |
| st.error(f"β Training failed: {str(e)}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| # ==================================================================== | |
| # PATH B: Google Colab Notebook | |
| # ==================================================================== | |
| elif "Colab" in training_mode: | |
| st.markdown("### βοΈ Train on Google Colab (Free GPU)") | |
| st.markdown(""" | |
| Since no GPU was detected on this machine, you can fine-tune your model on Google Colab with a free GPU. | |
| Follow these steps: | |
| """) | |
| st.markdown(""" | |
| **Step 1:** Download your preprocessed training data (above) β¬οΈ | |
| **Step 2:** Download or copy the Colab notebook below | |
| **Step 3:** Open [Google Colab](https://colab.research.google.com/) β Upload the notebook | |
| **Step 4:** Upload your training JSONL to Colab's file browser | |
| **Step 5:** Run all cells β Download the fine-tuned model | |
| **Step 6:** Come back here β Upload your fine-tuned model results for evaluation | |
| """) | |
| # Show / Download Colab notebook | |
| notebook_path = Path("./Auto_FineTune_Ops_Colab.ipynb") | |
| if notebook_path.exists(): | |
| with open(notebook_path, 'r', encoding='utf-8') as f: | |
| notebook_content = f.read() | |
| st.download_button("π Download Colab Notebook (.ipynb)", notebook_content, | |
| file_name="Auto_FineTune_Ops_Colab.ipynb", mime="application/json", | |
| type="primary", use_container_width=True) | |
| with st.expander("ποΈ View Notebook Code", expanded=False): | |
| try: | |
| import json as json_mod | |
| nb = json_mod.loads(notebook_content) | |
| for cell in nb.get('cells', []): | |
| if cell.get('cell_type') == 'code': | |
| source = ''.join(cell.get('source', [])) | |
| if source.strip(): | |
| st.code(source, language='python') | |
| elif cell.get('cell_type') == 'markdown': | |
| source = ''.join(cell.get('source', [])) | |
| st.markdown(source) | |
| except Exception: | |
| st.code(notebook_content[:5000], language='json') | |
| else: | |
| st.warning("β οΈ Colab notebook not found at `Auto_FineTune_Ops_Colab.ipynb`") | |
| st.markdown("---") | |
| st.markdown("### π€ After Training on Colab") | |
| st.info("Once you've finished training on Colab, download your fine-tuned model outputs and upload them below for evaluation.") | |
| # ==================================================================== | |
| # PATH C: Upload Fine-Tuned Model / Results | |
| # ==================================================================== | |
| else: | |
| st.markdown("### π€ Upload Fine-Tuned Model Results") | |
| st.markdown("Upload outputs from your externally trained model for evaluation.") | |
| # ββ Upload Fine-Tuned Results (always shown at bottom) ββ | |
| st.markdown("---") | |
| st.markdown("### π¦ Upload Fine-Tuned Results for Evaluation") | |
| st.caption("If you trained on Colab or another machine, upload your model outputs here.") | |
| upload_tab1, upload_tab2 = st.tabs(["π Upload Evaluation Results (JSONL)", "π Upload Model Folder Path"]) | |
| with upload_tab1: | |
| ft_file = st.file_uploader("Upload fine-tuned model outputs (JSONL with predictions)", | |
| type=['jsonl', 'json'], key="ft_results_upload", | |
| help="JSONL file with model predictions/outputs from your fine-tuned model") | |
| if ft_file: | |
| try: | |
| ft_df = pd.read_json(ft_file, lines=ft_file.name.endswith('.jsonl')) | |
| st.success(f"β Loaded **{len(ft_df):,}** evaluation samples") | |
| st.dataframe(ft_df.head(5), use_container_width=True) | |
| # Save for evaluation | |
| eval_output = Path("./output/eval_results") | |
| eval_output.mkdir(parents=True, exist_ok=True) | |
| eval_path = eval_output / f"finetuned_outputs_{ft_file.name}" | |
| ft_df.to_json(eval_path, orient='records', lines=True) | |
| st.session_state.model_path = str(eval_path) | |
| st.session_state.pipeline_status['training'] = 'complete' | |
| st.success(f"β Results saved! You can now proceed to **Evaluation** page.") | |
| if st.button("βοΈ Go to Evaluation"): | |
| st.session_state.current_page = 'evaluation' | |
| st.rerun() | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| with upload_tab2: | |
| model_folder = st.text_input("Model Folder Path", | |
| placeholder="e.g., ./output/models/my_finetuned_model or /path/to/model", | |
| help="Local path to the fine-tuned model directory (LoRA adapter or full model)") | |
| if model_folder and st.button("β Set Model Path"): | |
| if Path(model_folder).exists(): | |
| st.session_state.model_path = model_folder | |
| st.session_state.pipeline_status['training'] = 'complete' | |
| st.success(f"β Model path set to: `{model_folder}`") | |
| else: | |
| st.error(f"β Path not found: `{model_folder}`") | |
| # ============================================================================ | |
| # PAGE: EVALUATION | |
| # ============================================================================ | |
| def render_evaluation(): | |
| st.markdown('<p class="gradient-header">βοΈ Model Evaluation</p>', unsafe_allow_html=True) | |
| # Initialize session state for results if not present | |
| if 'eval_results' not in st.session_state: | |
| st.session_state.eval_results = None | |
| # ββ Judge Provider Selection ββ | |
| st.markdown("### π€ Select AI Judge Provider") | |
| st.caption("Choose which LLM provider to use as the evaluation judge.") | |
| judge_provider = st.selectbox("AI Provider", [ | |
| "OpenAI (GPT-4o, GPT-4-turbo, etc.)", | |
| "Anthropic (Claude 3.5, Claude 3 Opus, etc.)", | |
| "Groq (Llama 3, Mixtral, Gemma, etc.)", | |
| "Custom OpenAI-Compatible Endpoint" | |
| ], help="Select the AI provider whose model will act as the judge.") | |
| st.markdown("---") | |
| st.markdown("### π API Configuration") | |
| api_key = None | |
| base_url = None | |
| if "OpenAI" in judge_provider: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| api_key = st.text_input("OpenAI API Key", type="password", key="openai_key_input") | |
| if api_key: os.environ["OPENAI_API_KEY"] = api_key | |
| with col2: | |
| judge_model = st.selectbox("Judge Model", ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"]) | |
| elif "Anthropic" in judge_provider: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| api_key = st.text_input("Anthropic API Key", type="password", key="anthropic_key_input") | |
| if api_key: os.environ["ANTHROPIC_API_KEY"] = api_key | |
| with col2: | |
| judge_model = st.selectbox("Judge Model", ["claude-3-5-sonnet-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229"]) | |
| elif "Groq" in judge_provider: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| api_key = st.text_input("Groq API Key", type="password", key="groq_key_input") | |
| if api_key: os.environ["GROQ_API_KEY"] = api_key | |
| with col2: | |
| judge_model = st.selectbox("Judge Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"]) | |
| base_url = "https://api.groq.com/openai/v1" | |
| else: # Custom | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| base_url = st.text_input("API Base URL", placeholder="https://api.your-provider.com/v1") | |
| api_key = st.text_input("API Key", type="password", key="custom_key_input") | |
| if api_key: os.environ["OPENAI_API_KEY"] = api_key | |
| with col2: | |
| judge_model = st.text_input("Model Name", placeholder="e.g., my-model") | |
| st.markdown("---") | |
| # ββ Evaluation Data ββ | |
| st.markdown("### π Evaluation Data") | |
| # 1. Use data from training (if available) | |
| if st.session_state.model_path and "finetuned_outputs" in str(st.session_state.model_path): | |
| st.info(f"Using results from training: `{st.session_state.model_path}`") | |
| try: | |
| st.session_state['eval_data'] = pd.read_json(st.session_state.model_path, lines=True) | |
| except Exception: | |
| pass | |
| # 2. Upload new data | |
| eval_upload = st.file_uploader("Upload JSONL (Must contain: 'instruction', 'base_output', 'finetuned_output')", | |
| type=['jsonl', 'json'], key="eval_uploader") | |
| if eval_upload: | |
| try: | |
| df = pd.read_json(eval_upload, lines=eval_upload.name.endswith('.jsonl')) | |
| required_cols = ['instruction', 'base_output', 'finetuned_output'] | |
| if all(col in df.columns for col in required_cols): | |
| st.session_state['eval_data'] = df | |
| st.success(f"β Loaded {len(df)} samples") | |
| else: | |
| st.error(f"β Missing columns! Found: {list(df.columns)}. Required: {required_cols}") | |
| except Exception as e: | |
| st.error(f"Error loading file: {e}") | |
| # Show Preview | |
| if st.session_state.get('eval_data') is not None: | |
| with st.expander("ποΈ View Data Preview"): | |
| st.dataframe(st.session_state['eval_data'].head(3), use_container_width=True) | |
| st.markdown("---") | |
| # ββ Run Evaluation ββ | |
| if st.button("π Run Dynamic Evaluation", type="primary", use_container_width=True): | |
| if not api_key: | |
| st.error("β Please provide an API Key above!") | |
| return | |
| if st.session_state.get('eval_data') is None: | |
| st.error("β No evaluation data loaded!") | |
| return | |
| # Prepare Judge | |
| st.session_state.pipeline_status['evaluation'] = 'running' | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| results = [] | |
| df = st.session_state['eval_data'] | |
| total = len(df) | |
| try: | |
| # Initialize Client | |
| client = None | |
| if "Anthropic" in judge_provider: | |
| from anthropic import Anthropic | |
| client = Anthropic(api_key=api_key) | |
| else: | |
| from openai import OpenAI | |
| client = OpenAI(api_key=api_key, base_url=base_url) | |
| JUDGE_PROMPT = """You are an expert evaluator comparing two AI responses. | |
| Query: {prompt} | |
| Response A (Base Model): | |
| {response_a} | |
| Response B (Fine-tuned Model): | |
| {response_b} | |
| Compare them on: Accuracy, Helpfulness, Clarity. | |
| Return a valid JSON object ONLY: | |
| {{ | |
| "winner": "A" or "B" or "TIE", | |
| "score_a": <1-10>, | |
| "score_b": <1-10>, | |
| "reasoning": "short explanation", | |
| "accuracy": {{"A": <1-10>, "B": <1-10>}}, | |
| "helpfulness": {{"A": <1-10>, "B": <1-10>}}, | |
| "clarity": {{"A": <1-10>, "B": <1-10>}} | |
| }} | |
| """ | |
| for i, row in df.iterrows(): | |
| status_text.text(f"Evaluating sample {i+1}/{total}...") | |
| prompt_text = JUDGE_PROMPT.format( | |
| prompt=row['instruction'], | |
| response_a=row['base_output'], | |
| response_b=row['finetuned_output'] | |
| ) | |
| # Call API | |
| if "Anthropic" in judge_provider: | |
| resp = client.messages.create( | |
| model=judge_model, max_tokens=1000, | |
| messages=[{"role": "user", "content": prompt_text}] | |
| ).content[0].text | |
| else: | |
| resp = client.chat.completions.create( | |
| model=judge_model, max_tokens=1000, | |
| messages=[{"role": "user", "content": prompt_text}], | |
| response_format={"type": "json_object"} | |
| ).choices[0].message.content | |
| # Parse | |
| try: | |
| import json | |
| # Clean json string if needed | |
| if "```json" in resp: resp = resp.split("```json")[1].split("```")[0] | |
| if "```" in resp: resp = resp.split("```")[1] | |
| data = json.loads(resp.strip()) | |
| data['instruction'] = row['instruction'] | |
| results.append(data) | |
| except Exception as e: | |
| print(f"Parse error: {e}") | |
| results.append({"winner": "TIE", "score_a": 5, "score_b": 5, "reasoning": "Error parsing judge response"}) | |
| progress_bar.progress((i + 1) / total) | |
| st.session_state.eval_results = results | |
| st.session_state.pipeline_status['evaluation'] = 'complete' | |
| status_text.text("β Evaluation Complete!") | |
| except Exception as e: | |
| st.error(f"Evaluation Failed: {str(e)}") | |
| st.session_state.pipeline_status['evaluation'] = 'error' | |
| # ββ Display Results ββ | |
| if st.session_state.get('eval_results'): | |
| results = st.session_state.eval_results | |
| df_res = pd.DataFrame(results) | |
| # Metrics | |
| wins_b = len(df_res[df_res['winner'] == 'B']) | |
| wins_a = len(df_res[df_res['winner'] == 'A']) | |
| ties = len(df_res[df_res['winner'] == 'TIE']) | |
| win_rate = (wins_b / len(df_res)) * 100 | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Fine-tuned Win Rate", f"{win_rate:.1f}%") | |
| col2.metric("Fine-Tuned Wins", wins_b) | |
| col3.metric("Base Model Wins", wins_a) | |
| col4.metric("Avg Score Improvement", f"{df_res['score_b'].mean() - df_res['score_a'].mean():.2f}") | |
| # Charts | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| fig = px.pie(values=[wins_b, wins_a, ties], names=['Fine-tuned', 'Base', 'Ties'], | |
| title="Win Distribution", color_discrete_sequence=['#6366f1', '#ef4444', '#94a3b8']) | |
| st.plotly_chart(fig, use_container_width=True) | |
| with c2: | |
| avg_scores = pd.DataFrame({ | |
| 'Model': ['Base', 'Fine-tuned'], | |
| 'Score': [df_res['score_a'].mean(), df_res['score_b'].mean()] | |
| }) | |
| fig2 = px.bar(avg_scores, x='Model', y='Score', color='Model', | |
| title="Average Overall Score", color_discrete_map={'Base': '#ef4444', 'Fine-tuned': '#6366f1'}) | |
| st.plotly_chart(fig2, use_container_width=True) | |
| # Detailed Table | |
| st.markdown("### π Detailed Verdicts") | |
| st.dataframe(df_res[['instruction', 'winner', 'score_a', 'score_b', 'reasoning']], use_container_width=True) | |
| # Download | |
| st.download_button("β¬οΈ Download Report (JSON)", | |
| data=json.dumps(results, indent=2), | |
| file_name="evaluation_report.json", | |
| mime="application/json") | |
| # ============================================================================ | |
| # PAGE: DEPLOYMENT | |
| # ============================================================================ | |
| def render_deploy(): | |
| st.markdown('<p class="gradient-header">π Model Deployment</p>', unsafe_allow_html=True) | |
| # Model selection | |
| st.markdown("### π¦ Select Model") | |
| models_dir = Path("./output/models") | |
| if models_dir.exists(): | |
| models = [d.name for d in models_dir.iterdir() if d.is_dir()] | |
| if models: | |
| selected_model = st.selectbox("Trained Models", models) | |
| model_path = models_dir / selected_model | |
| st.info(f"π Model path: `{model_path}`") | |
| else: | |
| st.warning("No trained models found.") | |
| selected_model = None | |
| else: | |
| st.warning("Models directory not found.") | |
| selected_model = None | |
| st.markdown("---") | |
| # Deployment options | |
| st.markdown("### π Deployment Options") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>π₯οΈ Local FastAPI Server</h4> | |
| <p>Deploy as a REST API on your local machine.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| port = st.number_input("Port", value=8000, min_value=1000, max_value=65535) | |
| if st.button("π Start Server", disabled=not selected_model): | |
| st.code(f"python scripts/deploy.py --model ./output/models/{selected_model} --port {port}") | |
| st.info("Run the command above in your terminal to start the server.") | |
| with col2: | |
| st.markdown(""" | |
| <div class="info-card"> | |
| <h4>βοΈ HuggingFace Hub</h4> | |
| <p>Push your model to HuggingFace for sharing.</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| hf_token = st.text_input("HuggingFace Token", type="password") | |
| repo_name = st.text_input("Repository Name", value=f"my-finetuned-{selected_model}" if selected_model else "") | |
| if st.button("βοΈ Push to Hub", disabled=not selected_model or not hf_token): | |
| st.info("Pushing to HuggingFace Hub...") | |
| st.markdown("---") | |
| # API documentation | |
| st.markdown("### π API Documentation") | |
| st.markdown(""" | |
| Once deployed, your API will have these endpoints: | |
| | Endpoint | Method | Description | | |
| |----------|--------|-------------| | |
| | `/` | GET | API info | | |
| | `/health` | GET | Health check | | |
| | `/generate` | POST | Generate text | | |
| | `/generate/batch` | POST | Batch generation | | |
| """) | |
| with st.expander("π Example Request"): | |
| st.code(""" | |
| import requests | |
| response = requests.post("http://localhost:8000/generate", json={ | |
| "prompt": "What are the symptoms of the common cold?", | |
| "max_tokens": 256, | |
| "temperature": 0.7 | |
| }) | |
| print(response.json()["generated_text"]) | |
| """, language="python") | |
| # ============================================================================ | |
| # MAIN ROUTER | |
| # ============================================================================ | |
| def main(): | |
| page = st.session_state.current_page | |
| if page == 'home': | |
| render_home() | |
| elif page == 'data': | |
| render_data_upload() | |
| elif page == 'process': | |
| render_processing() | |
| elif page == 'training': | |
| render_training() | |
| elif page == 'evaluation': | |
| render_evaluation() | |
| elif page == 'deploy': | |
| render_deploy() | |
| else: | |
| render_home() | |
| if __name__ == "__main__": | |
| main() | |