""" Auto-FineTune-Ops: Streamlit Dashboard ====================================== Premium interactive dashboard for ML fine-tuning pipeline. """ import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from pathlib import Path import sys import os import json import time from datetime import datetime # Add project root to path sys.path.insert(0, str(Path(__file__).parent)) # Page configuration st.set_page_config( page_title="Auto-FineTune-Ops", page_icon="๐ค", layout="wide", initial_sidebar_state="expanded" ) # Premium CSS styling st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if 'current_page' not in st.session_state: st.session_state.current_page = 'home' if 'uploaded_data' not in st.session_state: st.session_state.uploaded_data = None if 'processed_data_path' not in st.session_state: st.session_state.processed_data_path = None if 'model_path' not in st.session_state: st.session_state.model_path = None if 'training_goal' not in st.session_state: st.session_state.training_goal = None if 'pipeline_status' not in st.session_state: st.session_state.pipeline_status = { 'data': 'pending', 'training': 'pending', 'evaluation': 'pending', 'deployment': 'pending' } # Sidebar navigation with st.sidebar: st.markdown('
๐ค Auto-FineTune-Ops
', unsafe_allow_html=True) st.markdown("---") # Navigation pages = { 'home': ('๐ ', 'Dashboard'), 'data': ('๐', 'Data Upload'), 'process': ('๐งน', 'Processing'), 'training': ('๐', 'Training'), 'evaluation': ('โ๏ธ', 'Evaluation'), 'deploy': ('๐', 'Deploy') } for key, (icon, label) in pages.items(): if st.button(f"{icon} {label}", key=f"nav_{key}", use_container_width=True): st.session_state.current_page = key st.markdown("---") # Pipeline status st.markdown("### ๐ Pipeline Status") status_icons = {'pending': 'โณ', 'running': '๐', 'complete': 'โ ', 'error': 'โ'} for stage, status in st.session_state.pipeline_status.items(): st.markdown(f"{status_icons.get(status, 'โณ')} **{stage.title()}**: {status}") st.markdown("---") st.markdown("*Built with โค๏ธ using Streamlit*") # ============================================================================ # PAGE: HOME DASHBOARD # ============================================================================ def render_home(): st.markdown('๐ Pipeline Dashboard
', unsafe_allow_html=True) st.markdown("**One-click autonomous ML fine-tuning pipeline**") # Status cards col1, col2, col3, col4 = st.columns(4) with col1: st.metric( label="๐ Dataset", value="Ready" if st.session_state.uploaded_data is not None else "Not Loaded", delta="Uploaded" if st.session_state.uploaded_data is not None else None ) with col2: st.metric( label="๐งน Processing", value=st.session_state.pipeline_status['data'].title(), delta="Complete" if st.session_state.pipeline_status['data'] == 'complete' else None ) with col3: st.metric( label="๐ Training", value=st.session_state.pipeline_status['training'].title(), delta="Complete" if st.session_state.pipeline_status['training'] == 'complete' else None ) with col4: st.metric( label="โ๏ธ Evaluation", value=st.session_state.pipeline_status['evaluation'].title(), delta="Complete" if st.session_state.pipeline_status['evaluation'] == 'complete' else None ) st.markdown("---") # Quick start guide st.markdown("### ๐ Quick Start Guide") col1, col2 = st.columns(2) with col1: st.markdown("""Upload your CSV/JSON dataset with instruction-response pairs.
The DataArchitectAgent will clean and format your data.
Fine-tune with auto-configured hyperparameters.
Run Model Arena with LLM-as-Judge evaluation.
๐ Data Upload & Preview
', unsafe_allow_html=True) # โโ File Management Bar โโ if st.session_state.uploaded_data is not None: fm1, fm2, fm3 = st.columns([3, 1, 1]) with fm1: st.info(f"๐ Currently loaded: **{st.session_state.get('uploaded_filename', 'dataset')}** ({len(st.session_state.uploaded_data):,} rows)") with fm2: if st.button("๐๏ธ Remove Dataset", type="secondary"): st.session_state.uploaded_data = None st.session_state.uploaded_filename = None st.session_state.processed_data_path = None st.session_state.pipeline_status['data'] = 'pending' st.rerun() with fm3: if st.button("๐ Add More Data"): st.session_state['show_add_file'] = True # โโ File Uploader โโ show_uploader = (st.session_state.uploaded_data is None) or st.session_state.get('show_add_file', False) if show_uploader: upload_label = "Upload your dataset (CSV, JSON, or JSONL)" if st.session_state.uploaded_data is None else "Upload additional file to merge with current dataset" uploaded_file = st.file_uploader( upload_label, type=['csv', 'json', 'jsonl'], help="Your dataset should contain instruction-response pairs.", key=f"uploader_{st.session_state.get('upload_counter', 0)}" ) if uploaded_file: try: if uploaded_file.name.endswith('.csv'): new_df = pd.read_csv(uploaded_file) elif uploaded_file.name.endswith('.jsonl'): new_df = pd.read_json(uploaded_file, lines=True) else: new_df = pd.read_json(uploaded_file) # Merge or replace if st.session_state.uploaded_data is not None and st.session_state.get('show_add_file', False): existing_df = st.session_state.uploaded_data if list(new_df.columns) == list(existing_df.columns): st.session_state.uploaded_data = pd.concat([existing_df, new_df], ignore_index=True) st.session_state.uploaded_filename = f"{st.session_state.get('uploaded_filename', 'data')} + {uploaded_file.name}" st.success(f"โ Merged **{uploaded_file.name}** ({len(new_df):,} rows) โ Total: **{len(st.session_state.uploaded_data):,}** rows") else: st.error(f"โ Column mismatch! Existing: {list(existing_df.columns)} vs New: {list(new_df.columns)}") else: st.session_state.uploaded_data = new_df st.session_state.uploaded_filename = uploaded_file.name st.success(f"โ Successfully loaded **{uploaded_file.name}**") st.session_state['show_add_file'] = False st.session_state['upload_counter'] = st.session_state.get('upload_counter', 0) + 1 except Exception as e: st.error(f"Error loading file: {str(e)}") # โโ Data Display โโ if st.session_state.uploaded_data is not None: df = st.session_state.uploaded_data # Dataset statistics st.markdown("### ๐ Dataset Statistics") col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Rows", f"{len(df):,}") with col2: st.metric("Total Columns", len(df.columns)) with col3: total_bytes = df.memory_usage(deep=True).sum() st.metric("Memory Size", f"{total_bytes / 1024:.1f} KB") with col4: missing = df.isnull().sum().sum() st.metric("Missing Values", missing) st.markdown("---") # Column detection st.markdown("### ๐ Auto-Detected Columns") instruction_patterns = ['instruction', 'prompt', 'question', 'query', 'user', 'input_text'] output_patterns = ['output', 'response', 'answer', 'completion', 'assistant', 'target'] detected_instruction = None detected_output = None for col in df.columns: col_lower = col.lower() for pattern in instruction_patterns: if pattern in col_lower and not detected_instruction: detected_instruction = col for pattern in output_patterns: if pattern in col_lower and not detected_output: detected_output = col col1, col2 = st.columns(2) with col1: if detected_instruction: st.markdown(f'Instruction: {detected_instruction}', unsafe_allow_html=True) else: st.markdown(f'Instruction: Not detected', unsafe_allow_html=True) with col2: if detected_output: st.markdown(f'Output: {detected_output}', unsafe_allow_html=True) else: st.markdown(f'Output: Not detected', unsafe_allow_html=True) st.markdown("---") # Full data preview (scrollable) st.markdown("### ๐ Complete Data Preview") st.caption(f"Showing all **{len(df):,}** rows. Scroll to browse the full dataset.") st.dataframe(df, use_container_width=True, height=450) # Download raw data st.markdown("### ๐ฅ Download Dataset") dl1, dl2 = st.columns(2) with dl1: csv_data = df.to_csv(index=False).encode('utf-8') st.download_button("โฌ๏ธ Download as CSV", csv_data, file_name=f"{st.session_state.get('uploaded_filename', 'dataset').rsplit('.', 1)[0]}.csv", mime="text/csv") with dl2: json_data = df.to_json(orient='records', indent=2).encode('utf-8') st.download_button("โฌ๏ธ Download as JSON", json_data, file_name=f"{st.session_state.get('uploaded_filename', 'dataset').rsplit('.', 1)[0]}.json", mime="application/json") # Column summary st.markdown("### ๐ Column Summary") col_info = [] for col in df.columns: col_info.append({ 'Column': col, 'Type': str(df[col].dtype), 'Non-Null': df[col].notna().sum(), 'Unique': df[col].nunique(), 'Sample': str(df[col].iloc[0])[:80] + '...' if len(str(df[col].iloc[0])) > 80 else str(df[col].iloc[0]) }) st.dataframe(pd.DataFrame(col_info), use_container_width=True) # ============================================================================ # PAGE: DATA PROCESSING # ============================================================================ def render_processing(): st.markdown('๐งน Advanced Data Processing
', unsafe_allow_html=True) if st.session_state.uploaded_data is None: st.warning("โ ๏ธ Please upload a dataset first!") if st.button("๐ Go to Data Upload"): st.session_state.current_page = 'data' st.rerun() return df = st.session_state.uploaded_data # โโ Dataset Stats Header โโ st.markdown("### ๐ Dataset Statistics") sc1, sc2, sc3, sc4 = st.columns(4) with sc1: st.metric("Total Rows", f"{len(df):,}") with sc2: st.metric("Columns", len(df.columns)) with sc3: avg_len = int(df.iloc[:, 0].astype(str).str.len().mean()) if len(df) > 0 else 0 st.metric("Avg Text Length", f"{avg_len:,} chars") with sc4: est_tokens = int(avg_len * len(df) / 4) if avg_len > 0 else 0 st.metric("Est. Total Tokens", f"{est_tokens:,}") st.markdown("---") # โโ Training Goal โโ goal = st.text_input( "Training Goal", value=st.session_state.training_goal or "assistant", help="e.g., medical_assistant, customer_support, code_helper" ) st.session_state.training_goal = goal # โโ Column Mapping โโ st.markdown("### ๐ฏ Column Mapping") instruction_patterns = ['instruction', 'prompt', 'question', 'query', 'user', 'input_text', 'human'] output_patterns = ['output', 'response', 'answer', 'completion', 'assistant', 'target'] input_patterns = ['context', 'input', 'background', 'reference'] detected_instruction = detected_output = detected_input = None available_columns = list(df.columns) for col in available_columns: col_lower = col.lower() for p in instruction_patterns: if p in col_lower and not detected_instruction: detected_instruction = col for p in output_patterns: if p in col_lower and not detected_output: detected_output = col for p in input_patterns: if p in col_lower and not detected_input: detected_input = col mc1, mc2, mc3 = st.columns(3) with mc1: instruction_col = st.selectbox("Instruction Column *", options=available_columns, index=available_columns.index(detected_instruction) if detected_instruction else 0, help="Column containing instructions/prompts/questions") with mc2: output_col = st.selectbox("Output Column *", options=available_columns, index=available_columns.index(detected_output) if detected_output else (1 if len(available_columns) > 1 else 0), help="Column containing responses/answers/outputs") with mc3: input_col_options = ["None"] + available_columns default_input_idx = input_col_options.index(detected_input) if detected_input else 0 input_col_selection = st.selectbox("Input/Context Column (Optional)", options=input_col_options, index=default_input_idx, help="Optional column containing additional context") input_col = None if input_col_selection == "None" else input_col_selection st.markdown("---") # โโ Safe Preset Button โโ if st.button("๐ก๏ธ Load Safe Preset", help="Apply recommended defaults for most datasets"): st.session_state['safe_preset'] = True st.rerun() use_safe = st.session_state.get('safe_preset', False) # ==================================================================== # 1๏ธโฃ Text Cleaning Controls # ==================================================================== with st.expander("1๏ธโฃ Text Cleaning Controls", expanded=False): tc1, tc2 = st.columns(2) with tc1: clean_html = st.checkbox("Remove HTML Tags", value=use_safe, help="Strip all HTML/XML tags from text") clean_urls = st.checkbox("Remove URLs", value=use_safe, help="Remove http/https/www links") clean_emojis = st.checkbox("Remove Emojis", value=False, help="Strip emoji characters") clean_whitespace = st.checkbox("Normalize Whitespace", value=True, help="Collapse multiple spaces/tabs into one") with tc2: clean_lowercase = st.checkbox("Lowercase All Text", value=False, help="Convert text to lowercase (disable to preserve case)") clean_special = st.checkbox("Remove Special Characters", value=False, help="Keep only alphanumeric + basic punctuation") clean_linebreaks = st.checkbox("Strip Extra Line Breaks", value=True, help="Reduce 3+ newlines to double newlines") # ==================================================================== # 2๏ธโฃ Tokenization Controls # ==================================================================== with st.expander("2๏ธโฃ Tokenization Controls", expanded=False): tk1, tk2 = st.columns(2) with tk1: tokenizer_choice = st.selectbox("Tokenizer", ["tiktoken", "HuggingFace"], help="tiktoken = OpenAI-compatible, HuggingFace = model-specific tokenizer") if tokenizer_choice == "HuggingFace": hf_model_name = st.text_input("HF Model Name", value="meta-llama/Llama-3-8b", help="HuggingFace model name for tokenizer") else: hf_model_name = "" max_total_tokens = st.slider("Max Tokens per Sample", 128, 8192, 2048, help="Maximum total tokens allowed per sample") with tk2: truncate_long = st.checkbox("Truncate Long Samples", value=False, help="Cut text exceeding max tokens") split_long = st.checkbox("Split Long Samples into Chunks", value=False, help="Break long texts into overlapping chunks") if split_long: split_overlap = st.slider("Chunk Overlap Tokens", 0, 200, 50, help="Number of overlapping tokens between chunks") else: split_overlap = 50 # Token stats preview if st.button("๐ Show Token Stats Preview", key="token_stats_btn"): with st.spinner("Counting tokens..."): try: from preprocessing.tokenization import TokenizationConfig, get_tokenizer, compute_token_stats tk_cfg = TokenizationConfig( tokenizer_name="tiktoken" if tokenizer_choice == "tiktoken" else hf_model_name, ) tokenizer = get_tokenizer(tk_cfg) is_tiktoken = tokenizer_choice == "tiktoken" stats_cols = [c for c in [instruction_col, output_col] if c in df.columns] stats = compute_token_stats(df.head(200), stats_cols, tokenizer, is_tiktoken) for col_name, s in stats.items(): st.markdown(f"**{col_name}**: min={s['min']}, max={s['max']}, mean={s['mean']}, p95={s['p95']}") except Exception as e: st.warning(f"Could not compute token stats: {e}") # ==================================================================== # 3๏ธโฃ System Prompt Configuration # ==================================================================== with st.expander("3๏ธโฃ System Prompt Configuration", expanded=False): system_prompt_text = st.text_area("Global System Prompt", value="You are a helpful AI assistant." if not use_safe else "You are a helpful AI assistant.", height=100, help="System prompt prepended to every sample in chat format") prepend_system = st.checkbox("Prepend System Prompt to All Samples", value=True, help="Include this system prompt in all formatted entries") if st.button("๐๏ธ Preview Formatted Chat JSON", key="preview_chat_btn"): try: from preprocessing.system_prompt import preview_formatted_json preview = preview_formatted_json(df, system_prompt_text, instruction_col, output_col, input_col, n=2) st.code(preview, language="json") except Exception as e: st.warning(f"Preview error: {e}") # ==================================================================== # 4๏ธโฃ Dataset Balancing # ==================================================================== with st.expander("4๏ธโฃ Dataset Balancing (Classification)", expanded=False): balance_enabled = st.checkbox("Enable Class Balancing", value=False, help="Balance class distribution for classification tasks") if balance_enabled: label_col_options = available_columns label_col = st.selectbox("Label Column", options=label_col_options, help="Column containing class labels") balance_strategy = st.radio("Strategy", ["none", "oversample", "undersample"], help="Oversample = duplicate minority, Undersample = drop majority") # Show distribution chart if label_col in df.columns: from preprocessing.dataset_balancing import compute_label_distribution dist = compute_label_distribution(df, label_col) if dist: fig = px.bar(x=list(dist.keys()), y=list(dist.values()), labels={'x': 'Label', 'y': 'Count'}, title="Label Distribution") fig.update_layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_color='#e2e8f0') st.plotly_chart(fig, use_container_width=True) else: label_col = None balance_strategy = "none" # ==================================================================== # 5๏ธโฃ Quality Filters # ==================================================================== with st.expander("5๏ธโฃ Quality Filters", expanded=False): qf1, qf2 = st.columns(2) with qf1: min_words = st.number_input("Min Word Count", min_value=0, value=3 if use_safe else 0, help="Minimum words required per sample (0 = no filter)") max_words = st.number_input("Max Word Count", min_value=0, value=0, help="Maximum words allowed per sample (0 = no limit)") profanity_filter = st.checkbox("Profanity Filter", value=False, help="Remove samples containing profane language") with qf2: language_filter = st.checkbox("Language Detection Filter", value=False, help="Keep only samples in specified languages") if language_filter: allowed_langs = st.text_input("Allowed Languages (comma-separated)", value="en", help="ISO 639-1 codes, e.g. en,fr,de") else: allowed_langs = "en" remove_low_quality = st.checkbox("Remove Low-Quality Responses", value=use_safe, help="Remove short / generic / placeholder responses") # ==================================================================== # 6๏ธโฃ Deduplication Advanced # ==================================================================== with st.expander("6๏ธโฃ Deduplication", expanded=False): dedup_exact = st.checkbox("Remove Exact Duplicates", value=True, help="Remove rows with identical instruction text") dedup_semantic = st.checkbox("Remove Semantic Duplicates", value=False, help="Use TF-IDF cosine similarity to find near-duplicates") if dedup_semantic: semantic_threshold = st.slider("Similarity Threshold", 0.5, 1.0, 0.90, 0.01, help="Cosine similarity above this threshold = duplicate (higher = stricter)") else: semantic_threshold = 0.90 # ==================================================================== # 7๏ธโฃ Train / Validation Split # ==================================================================== with st.expander("7๏ธโฃ Train / Validation Split", expanded=False): split_enabled = st.checkbox("Enable Train/Val Split", value=True, help="Split dataset into training and validation sets") if split_enabled: train_ratio = st.slider("Train Ratio", 0.5, 0.95, 0.9 if use_safe else 0.8, 0.05, help="Proportion of data used for training") st.markdown(f"**Split**: {int(train_ratio*100)}% Train / {int((1-train_ratio)*100)}% Validation") random_seed = st.number_input("Random Seed", min_value=0, value=42, help="Seed for reproducible splits") shuffle_data = st.checkbox("Shuffle Before Split", value=True, help="Randomly shuffle data before splitting") else: train_ratio = 0.8 random_seed = 42 shuffle_data = True # ==================================================================== # 8๏ธโฃ Output Formatting # ==================================================================== with st.expander("8๏ธโฃ Output Formatting", expanded=False): format_type = st.selectbox("Export Format", ["openai_chat", "completion", "classification", "custom"], help="OpenAI Chat = messages format, Completion = prompt/completion, Classification = text/label") custom_schema = {} if format_type == "custom": st.markdown("**Define Custom Schema** (output_key โ source_column)") num_fields = st.number_input("Number of Fields", 1, 10, 2) for i in range(int(num_fields)): fc1, fc2 = st.columns(2) with fc1: key = st.text_input(f"Output Key {i+1}", value=f"field_{i+1}", key=f"ckey_{i}") with fc2: val = st.selectbox(f"Source Column {i+1}", options=available_columns, key=f"cval_{i}") custom_schema[key] = val # ==================================================================== # 9๏ธโฃ Safety & PII Filtering # ==================================================================== with st.expander("9๏ธโฃ Safety & PII Filtering", expanded=False): pii1, pii2 = st.columns(2) with pii1: pii_emails = st.checkbox("Detect & Mask Emails", value=use_safe, help="Replace email addresses with [REDACTED]") pii_phones = st.checkbox("Detect & Mask Phone Numbers", value=use_safe, help="Replace phone numbers with [REDACTED]") pii_ids = st.checkbox("Detect & Mask CNIC/SSN", value=use_safe, help="Replace national ID / SSN patterns with [REDACTED]") with pii2: pii_keys = st.checkbox("Detect & Mask API Keys", value=use_safe, help="Replace long hex/base64 strings that look like secrets") pii_addresses = st.checkbox("Detect & Mask Addresses", value=False, help="Replace street addresses and zip codes") # ==================================================================== # ๐ Augmentation (Optional) # ==================================================================== with st.expander("๐ Augmentation (Optional)", expanded=False): aug_enabled = st.checkbox("Enable Data Augmentation", value=False, help="Generate synthetic variations of existing samples") if aug_enabled: ag1, ag2 = st.columns(2) with ag1: aug_paraphrase = st.checkbox("Paraphrase Instructions", value=True, help="Synonym-based paraphrasing of instructions") aug_variations = st.checkbox("Generate Variations", value=False, help="Minor text variations (punctuation, casing)") with ag2: aug_backtranslate = st.checkbox("Back Translation", value=False, help="Simulate back-translation for diversity") aug_tone = st.checkbox("Tone Rewriting", value=False, help="Rewrite instructions in different tones") aug_factor = st.slider("Augmentation Factor", 1, 5, 1, help="Number of augmented copies per original sample") else: aug_paraphrase = aug_variations = aug_backtranslate = aug_tone = False aug_factor = 1 st.markdown("---") # โโ Run Pipeline Button โโ if st.button("๐ Run Advanced Processing Pipeline", type="primary", use_container_width=True): st.session_state.pipeline_status['data'] = 'running' with st.spinner("Running preprocessing pipeline..."): progress_bar = st.progress(0) status_text = st.empty() try: from preprocessing.pipeline import PreprocessingPipeline, PreprocessingConfig from preprocessing.text_cleaning import TextCleaningConfig from preprocessing.tokenization import TokenizationConfig from preprocessing.system_prompt import SystemPromptConfig from preprocessing.dataset_balancing import BalancingConfig from preprocessing.quality_filters import QualityFilterConfig from preprocessing.deduplication import DeduplicationConfig from preprocessing.train_val_split import SplitConfig from preprocessing.output_formatter import OutputFormatConfig, format_dataset, export_jsonl, generate_preview from preprocessing.pii_filter import PIIFilterConfig from preprocessing.augmentation import AugmentationConfig # Build config from UI values config = PreprocessingConfig( instruction_col=instruction_col, output_col=output_col, input_col=input_col, label_col=label_col if balance_enabled else None, text_cleaning=TextCleaningConfig( remove_html=clean_html, remove_urls=clean_urls, remove_emojis=clean_emojis, normalize_whitespace=clean_whitespace, lowercase=clean_lowercase, remove_special_chars=clean_special, strip_extra_linebreaks=clean_linebreaks, ), tokenization=TokenizationConfig( tokenizer_name="tiktoken" if tokenizer_choice == "tiktoken" else hf_model_name, max_total_tokens=max_total_tokens, truncate_long=truncate_long, split_long=split_long, split_overlap=split_overlap, ), system_prompt=SystemPromptConfig( system_prompt=system_prompt_text, prepend_to_all=prepend_system, ), balancing=BalancingConfig( enabled=balance_enabled, label_column=label_col if balance_enabled else "", strategy=balance_strategy if balance_enabled else "none", ), quality_filters=QualityFilterConfig( min_word_count=min_words, max_word_count=max_words, profanity_filter=profanity_filter, language_filter=language_filter, allowed_languages=[l.strip() for l in allowed_langs.split(',')], remove_low_quality=remove_low_quality, ), deduplication=DeduplicationConfig( remove_exact=dedup_exact, remove_semantic=dedup_semantic, semantic_threshold=semantic_threshold, ), split=SplitConfig( enabled=split_enabled, train_ratio=train_ratio, random_seed=int(random_seed), shuffle=shuffle_data, ), output_format=OutputFormatConfig( format_type=format_type, custom_schema=custom_schema, ), pii_filter=PIIFilterConfig( filter_emails=pii_emails, filter_phones=pii_phones, filter_id_numbers=pii_ids, filter_api_keys=pii_keys, filter_addresses=pii_addresses, ), augmentation=AugmentationConfig( enabled=aug_enabled, paraphrase=aug_paraphrase, generate_variations=aug_variations, back_translate=aug_backtranslate, tone_rewrite=aug_tone, augmentation_factor=aug_factor, ), ) def progress_cb(stage_name, pct): status_text.text(f"โ๏ธ {stage_name}...") progress_bar.progress(min(pct, 100)) pipeline = PreprocessingPipeline(config) train_df, val_df, logs = pipeline.run(df, progress_callback=progress_cb) # Format output sys_prompt = system_prompt_text if prepend_system else "" formatted_data = format_dataset( train_df, config.output_format, system_prompt=sys_prompt, instruction_col=instruction_col, output_col=output_col, input_col=input_col, label_col=label_col if balance_enabled else None, ) # Export output_dir = Path("./output/processed_data") output_dir.mkdir(parents=True, exist_ok=True) train_path = export_jsonl(formatted_data, str(output_dir / f"{goal}_train.jsonl")) val_path = None if len(val_df) > 0: val_formatted = format_dataset( val_df, config.output_format, system_prompt=sys_prompt, instruction_col=instruction_col, output_col=output_col, input_col=input_col, label_col=label_col if balance_enabled else None, ) val_path = export_jsonl(val_formatted, str(output_dir / f"{goal}_val.jsonl")) progress_bar.progress(100) status_text.text("โ Pipeline complete!") st.session_state.processed_data_path = train_path st.session_state.pipeline_status['data'] = 'complete' # โโ Results โโ st.success(f"โ Training data saved to: `{train_path}`") if val_path: st.success(f"โ Validation data saved to: `{val_path}`") # Stats rc1, rc2, rc3, rc4 = st.columns(4) with rc1: st.metric("Original Rows", f"{len(df):,}") with rc2: st.metric("Train Samples", f"{len(train_df):,}") with rc3: st.metric("Val Samples", f"{len(val_df):,}") with rc4: removed = len(df) - len(train_df) - len(val_df) st.metric("Removed", f"{max(0, removed):,}") # โโ Pipeline Logs โโ st.markdown("### ๐ Pipeline Logs") log_data = [] for log in logs: log_data.append({ 'Stage': log.stage, 'Description': log.description, 'Rows Before': log.rows_before, 'Rows After': log.rows_after, 'Delta': log.rows_delta, 'Time (ms)': log.duration_ms, }) st.dataframe(pd.DataFrame(log_data), use_container_width=True) # โโ Preview โโ st.markdown("### ๐๏ธ Output Preview") preview_json = generate_preview(formatted_data, n=3) st.code(preview_json, language="json") # โโ Download โโ st.markdown("### ๐ฅ Download") dl1, dl2 = st.columns(2) with dl1: with open(train_path, 'r', encoding='utf-8') as f: st.download_button("โฌ๏ธ Download Train JSONL", f.read(), file_name=f"{goal}_train.jsonl", mime="application/jsonl") with dl2: if val_path and Path(val_path).exists(): with open(val_path, 'r', encoding='utf-8') as f: st.download_button("โฌ๏ธ Download Val JSONL", f.read(), file_name=f"{goal}_val.jsonl", mime="application/jsonl") except Exception as e: st.session_state.pipeline_status['data'] = 'error' st.error(f"โ Pipeline Error: {str(e)}") import traceback st.code(traceback.format_exc()) # Show previously processed data if st.session_state.processed_data_path: st.markdown("---") st.markdown("### ๐ Last Processed Data") try: processed_path = Path(st.session_state.processed_data_path) if processed_path.exists(): with open(processed_path, encoding='utf-8') as f: samples = [json.loads(line) for line in f.readlines()[:5]] for i, sample in enumerate(samples): with st.expander(f"Sample {i+1}"): st.json(sample) except Exception as e: st.warning(f"Could not load preview: {e}") # ============================================================================ # PAGE: TRAINING # ============================================================================ def render_training(): st.markdown('๐ Model Training
', unsafe_allow_html=True) # Check prerequisites if st.session_state.processed_data_path is None: st.warning("โ ๏ธ Please process your data first!") if st.button("๐งน Go to Processing"): st.session_state.current_page = 'process' st.rerun() return # โโ GPU Detection โโ try: import torch has_gpu = torch.cuda.is_available() if has_gpu: gpu_name = torch.cuda.get_device_name(0) gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9 st.success(f"โ GPU Available: **{gpu_name}** ({gpu_memory:.1f} GB)") except Exception: has_gpu = False # โโ Download Preprocessed Data (always available) โโ st.markdown("### ๐ฅ Preprocessed Training Data") processed_path = Path(st.session_state.processed_data_path) if processed_path.exists(): with open(processed_path, 'r', encoding='utf-8') as f: processed_content = f.read() dl1, dl2 = st.columns(2) with dl1: st.download_button("โฌ๏ธ Download Training JSONL", processed_content, file_name=processed_path.name, mime="application/jsonl") with dl2: # Check for validation file val_path = processed_path.parent / processed_path.name.replace('_train', '_val') if val_path.exists(): with open(val_path, 'r', encoding='utf-8') as f: st.download_button("โฌ๏ธ Download Validation JSONL", f.read(), file_name=val_path.name, mime="application/jsonl") try: sample_count = sum(1 for _ in processed_content.split('\n') if _.strip()) except Exception: sample_count = 0 st.info(f"๐ Dataset: **{sample_count:,}** samples ready for training") else: st.warning("Processed data file not found.") st.markdown("---") # ==================================================================== # TWO PATHS: GPU Training OR Colab Notebook # ==================================================================== if has_gpu: training_mode = "gpu" else: training_mode = st.radio("๐ฅ๏ธ Select Training Mode", [ "โ๏ธ Use Google Colab (Recommended โ Free GPU)", "๐ค Upload Fine-Tuned Model (Already trained externally)" ], help="No GPU detected on this machine. Choose how to proceed.") # ==================================================================== # PATH A: GPU Training (local) # ==================================================================== if training_mode == "gpu": st.markdown("### โ๏ธ Training Configuration") col1, col2 = st.columns(2) with col1: model_source = st.radio("Model Source", ["Preset Models", "Custom HuggingFace Model"]) if model_source == "Preset Models": base_model = st.selectbox("Base Model", [ "unsloth/llama-3-8b-bnb-4bit", "unsloth/llama-3-70b-bnb-4bit", "unsloth/mistral-7b-bnb-4bit", "unsloth/gemma-7b-bnb-4bit", ]) else: base_model = st.text_input("HuggingFace Model ID", value="unsloth/llama-3-8b-bnb-4bit", help="Enter any HuggingFace model ID, e.g. 'meta-llama/Llama-3-8b', 'mistralai/Mistral-7B-v0.1'") max_seq_length = st.slider("Max Sequence Length", 512, 4096, 2048) with col2: dataset_size = sample_count if sample_count > 0 else 1000 if dataset_size < 1000: auto_rank, auto_alpha, auto_lr, auto_epochs = 8, 16, 2e-4, 5 size_category = "Small" elif dataset_size < 10000: auto_rank, auto_alpha, auto_lr, auto_epochs = 16, 32, 1e-4, 3 size_category = "Medium" else: auto_rank, auto_alpha, auto_lr, auto_epochs = 32, 64, 5e-5, 2 size_category = "Large" st.success(f"Auto-configured for **{size_category}** dataset ({dataset_size:,} samples)") st.markdown("---") with st.expander("๐ง Advanced Hyperparameters"): hc1, hc2, hc3 = st.columns(3) with hc1: lora_rank = st.slider("LoRA Rank", 4, 64, auto_rank) lora_alpha = st.slider("LoRA Alpha", 8, 128, auto_alpha) with hc2: learning_rate = st.select_slider("Learning Rate", options=[1e-5, 2e-5, 5e-5, 1e-4, 2e-4, 5e-4], value=auto_lr) num_epochs = st.slider("Epochs", 1, 10, auto_epochs) with hc3: batch_size = st.slider("Batch Size", 1, 16, 4) gradient_accumulation = st.slider("Gradient Accumulation", 1, 8, 4) st.markdown("---") col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button("๐ Start Training", type="primary", use_container_width=True): st.session_state.pipeline_status['training'] = 'running' with st.spinner("Training in progress..."): progress_bar = st.progress(0) status_text = st.empty() try: from agents.training_pilot import TrainingPilot, HyperParams status_text.text("๐ฆ Loading model...") progress_bar.progress(10) pilot = TrainingPilot( base_model=base_model, max_seq_length=max_seq_length, output_dir="./output/models" ) status_text.text("๐ Training...") progress_bar.progress(30) result = pilot.run( data_path=st.session_state.processed_data_path, output_name=st.session_state.training_goal ) progress_bar.progress(100) status_text.text("โ Training complete!") st.session_state.model_path = result.model_path st.session_state.pipeline_status['training'] = 'complete' st.success(f"โ Model saved to: `{result.model_path}`") rc1, rc2, rc3 = st.columns(3) with rc1: st.metric("Final Loss", f"{result.final_loss:.4f}") with rc2: st.metric("Training Time", f"{result.training_time:.1f}s") with rc3: st.metric("Total Steps", result.num_steps) except Exception as e: st.session_state.pipeline_status['training'] = 'error' st.error(f"โ Training failed: {str(e)}") import traceback st.code(traceback.format_exc()) # ==================================================================== # PATH B: Google Colab Notebook # ==================================================================== elif "Colab" in training_mode: st.markdown("### โ๏ธ Train on Google Colab (Free GPU)") st.markdown(""" Since no GPU was detected on this machine, you can fine-tune your model on Google Colab with a free GPU. Follow these steps: """) st.markdown(""" **Step 1:** Download your preprocessed training data (above) โฌ๏ธ **Step 2:** Download or copy the Colab notebook below **Step 3:** Open [Google Colab](https://colab.research.google.com/) โ Upload the notebook **Step 4:** Upload your training JSONL to Colab's file browser **Step 5:** Run all cells โ Download the fine-tuned model **Step 6:** Come back here โ Upload your fine-tuned model results for evaluation """) # Show / Download Colab notebook notebook_path = Path("./Auto_FineTune_Ops_Colab.ipynb") if notebook_path.exists(): with open(notebook_path, 'r', encoding='utf-8') as f: notebook_content = f.read() st.download_button("๐ Download Colab Notebook (.ipynb)", notebook_content, file_name="Auto_FineTune_Ops_Colab.ipynb", mime="application/json", type="primary", use_container_width=True) with st.expander("๐๏ธ View Notebook Code", expanded=False): try: import json as json_mod nb = json_mod.loads(notebook_content) for cell in nb.get('cells', []): if cell.get('cell_type') == 'code': source = ''.join(cell.get('source', [])) if source.strip(): st.code(source, language='python') elif cell.get('cell_type') == 'markdown': source = ''.join(cell.get('source', [])) st.markdown(source) except Exception: st.code(notebook_content[:5000], language='json') else: st.warning("โ ๏ธ Colab notebook not found at `Auto_FineTune_Ops_Colab.ipynb`") st.markdown("---") st.markdown("### ๐ค After Training on Colab") st.info("Once you've finished training on Colab, download your fine-tuned model outputs and upload them below for evaluation.") # ==================================================================== # PATH C: Upload Fine-Tuned Model / Results # ==================================================================== else: st.markdown("### ๐ค Upload Fine-Tuned Model Results") st.markdown("Upload outputs from your externally trained model for evaluation.") # โโ Upload Fine-Tuned Results (always shown at bottom) โโ st.markdown("---") st.markdown("### ๐ฆ Upload Fine-Tuned Results for Evaluation") st.caption("If you trained on Colab or another machine, upload your model outputs here.") upload_tab1, upload_tab2 = st.tabs(["๐ Upload Evaluation Results (JSONL)", "๐ Upload Model Folder Path"]) with upload_tab1: ft_file = st.file_uploader("Upload fine-tuned model outputs (JSONL with predictions)", type=['jsonl', 'json'], key="ft_results_upload", help="JSONL file with model predictions/outputs from your fine-tuned model") if ft_file: try: ft_df = pd.read_json(ft_file, lines=ft_file.name.endswith('.jsonl')) st.success(f"โ Loaded **{len(ft_df):,}** evaluation samples") st.dataframe(ft_df.head(5), use_container_width=True) # Save for evaluation eval_output = Path("./output/eval_results") eval_output.mkdir(parents=True, exist_ok=True) eval_path = eval_output / f"finetuned_outputs_{ft_file.name}" ft_df.to_json(eval_path, orient='records', lines=True) st.session_state.model_path = str(eval_path) st.session_state.pipeline_status['training'] = 'complete' st.success(f"โ Results saved! You can now proceed to **Evaluation** page.") if st.button("โ๏ธ Go to Evaluation"): st.session_state.current_page = 'evaluation' st.rerun() except Exception as e: st.error(f"Error loading file: {e}") with upload_tab2: model_folder = st.text_input("Model Folder Path", placeholder="e.g., ./output/models/my_finetuned_model or /path/to/model", help="Local path to the fine-tuned model directory (LoRA adapter or full model)") if model_folder and st.button("โ Set Model Path"): if Path(model_folder).exists(): st.session_state.model_path = model_folder st.session_state.pipeline_status['training'] = 'complete' st.success(f"โ Model path set to: `{model_folder}`") else: st.error(f"โ Path not found: `{model_folder}`") # ============================================================================ # PAGE: EVALUATION # ============================================================================ def render_evaluation(): st.markdown('โ๏ธ Model Evaluation
', unsafe_allow_html=True) # Initialize session state for results if not present if 'eval_results' not in st.session_state: st.session_state.eval_results = None # โโ Judge Provider Selection โโ st.markdown("### ๐ค Select AI Judge Provider") st.caption("Choose which LLM provider to use as the evaluation judge.") judge_provider = st.selectbox("AI Provider", [ "OpenAI (GPT-4o, GPT-4-turbo, etc.)", "Anthropic (Claude 3.5, Claude 3 Opus, etc.)", "Groq (Llama 3, Mixtral, Gemma, etc.)", "Custom OpenAI-Compatible Endpoint" ], help="Select the AI provider whose model will act as the judge.") st.markdown("---") st.markdown("### ๐ API Configuration") api_key = None base_url = None if "OpenAI" in judge_provider: col1, col2 = st.columns(2) with col1: api_key = st.text_input("OpenAI API Key", type="password", key="openai_key_input") if api_key: os.environ["OPENAI_API_KEY"] = api_key with col2: judge_model = st.selectbox("Judge Model", ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"]) elif "Anthropic" in judge_provider: col1, col2 = st.columns(2) with col1: api_key = st.text_input("Anthropic API Key", type="password", key="anthropic_key_input") if api_key: os.environ["ANTHROPIC_API_KEY"] = api_key with col2: judge_model = st.selectbox("Judge Model", ["claude-3-5-sonnet-20241022", "claude-3-opus-20240229", "claude-3-sonnet-20240229"]) elif "Groq" in judge_provider: col1, col2 = st.columns(2) with col1: api_key = st.text_input("Groq API Key", type="password", key="groq_key_input") if api_key: os.environ["GROQ_API_KEY"] = api_key with col2: judge_model = st.selectbox("Judge Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"]) base_url = "https://api.groq.com/openai/v1" else: # Custom col1, col2 = st.columns(2) with col1: base_url = st.text_input("API Base URL", placeholder="https://api.your-provider.com/v1") api_key = st.text_input("API Key", type="password", key="custom_key_input") if api_key: os.environ["OPENAI_API_KEY"] = api_key with col2: judge_model = st.text_input("Model Name", placeholder="e.g., my-model") st.markdown("---") # โโ Evaluation Data โโ st.markdown("### ๐ Evaluation Data") # 1. Use data from training (if available) if st.session_state.model_path and "finetuned_outputs" in str(st.session_state.model_path): st.info(f"Using results from training: `{st.session_state.model_path}`") try: st.session_state['eval_data'] = pd.read_json(st.session_state.model_path, lines=True) except Exception: pass # 2. Upload new data eval_upload = st.file_uploader("Upload JSONL (Must contain: 'instruction', 'base_output', 'finetuned_output')", type=['jsonl', 'json'], key="eval_uploader") if eval_upload: try: df = pd.read_json(eval_upload, lines=eval_upload.name.endswith('.jsonl')) required_cols = ['instruction', 'base_output', 'finetuned_output'] if all(col in df.columns for col in required_cols): st.session_state['eval_data'] = df st.success(f"โ Loaded {len(df)} samples") else: st.error(f"โ Missing columns! Found: {list(df.columns)}. Required: {required_cols}") except Exception as e: st.error(f"Error loading file: {e}") # Show Preview if st.session_state.get('eval_data') is not None: with st.expander("๐๏ธ View Data Preview"): st.dataframe(st.session_state['eval_data'].head(3), use_container_width=True) st.markdown("---") # โโ Run Evaluation โโ if st.button("๐ Run Dynamic Evaluation", type="primary", use_container_width=True): if not api_key: st.error("โ Please provide an API Key above!") return if st.session_state.get('eval_data') is None: st.error("โ No evaluation data loaded!") return # Prepare Judge st.session_state.pipeline_status['evaluation'] = 'running' progress_bar = st.progress(0) status_text = st.empty() results = [] df = st.session_state['eval_data'] total = len(df) try: # Initialize Client client = None if "Anthropic" in judge_provider: from anthropic import Anthropic client = Anthropic(api_key=api_key) else: from openai import OpenAI client = OpenAI(api_key=api_key, base_url=base_url) JUDGE_PROMPT = """You are an expert evaluator comparing two AI responses. Query: {prompt} Response A (Base Model): {response_a} Response B (Fine-tuned Model): {response_b} Compare them on: Accuracy, Helpfulness, Clarity. Return a valid JSON object ONLY: {{ "winner": "A" or "B" or "TIE", "score_a": <1-10>, "score_b": <1-10>, "reasoning": "short explanation", "accuracy": {{"A": <1-10>, "B": <1-10>}}, "helpfulness": {{"A": <1-10>, "B": <1-10>}}, "clarity": {{"A": <1-10>, "B": <1-10>}} }} """ for i, row in df.iterrows(): status_text.text(f"Evaluating sample {i+1}/{total}...") prompt_text = JUDGE_PROMPT.format( prompt=row['instruction'], response_a=row['base_output'], response_b=row['finetuned_output'] ) # Call API if "Anthropic" in judge_provider: resp = client.messages.create( model=judge_model, max_tokens=1000, messages=[{"role": "user", "content": prompt_text}] ).content[0].text else: resp = client.chat.completions.create( model=judge_model, max_tokens=1000, messages=[{"role": "user", "content": prompt_text}], response_format={"type": "json_object"} ).choices[0].message.content # Parse try: import json # Clean json string if needed if "```json" in resp: resp = resp.split("```json")[1].split("```")[0] if "```" in resp: resp = resp.split("```")[1] data = json.loads(resp.strip()) data['instruction'] = row['instruction'] results.append(data) except Exception as e: print(f"Parse error: {e}") results.append({"winner": "TIE", "score_a": 5, "score_b": 5, "reasoning": "Error parsing judge response"}) progress_bar.progress((i + 1) / total) st.session_state.eval_results = results st.session_state.pipeline_status['evaluation'] = 'complete' status_text.text("โ Evaluation Complete!") except Exception as e: st.error(f"Evaluation Failed: {str(e)}") st.session_state.pipeline_status['evaluation'] = 'error' # โโ Display Results โโ if st.session_state.get('eval_results'): results = st.session_state.eval_results df_res = pd.DataFrame(results) # Metrics wins_b = len(df_res[df_res['winner'] == 'B']) wins_a = len(df_res[df_res['winner'] == 'A']) ties = len(df_res[df_res['winner'] == 'TIE']) win_rate = (wins_b / len(df_res)) * 100 col1, col2, col3, col4 = st.columns(4) col1.metric("Fine-tuned Win Rate", f"{win_rate:.1f}%") col2.metric("Fine-Tuned Wins", wins_b) col3.metric("Base Model Wins", wins_a) col4.metric("Avg Score Improvement", f"{df_res['score_b'].mean() - df_res['score_a'].mean():.2f}") # Charts c1, c2 = st.columns(2) with c1: fig = px.pie(values=[wins_b, wins_a, ties], names=['Fine-tuned', 'Base', 'Ties'], title="Win Distribution", color_discrete_sequence=['#6366f1', '#ef4444', '#94a3b8']) st.plotly_chart(fig, use_container_width=True) with c2: avg_scores = pd.DataFrame({ 'Model': ['Base', 'Fine-tuned'], 'Score': [df_res['score_a'].mean(), df_res['score_b'].mean()] }) fig2 = px.bar(avg_scores, x='Model', y='Score', color='Model', title="Average Overall Score", color_discrete_map={'Base': '#ef4444', 'Fine-tuned': '#6366f1'}) st.plotly_chart(fig2, use_container_width=True) # Detailed Table st.markdown("### ๐ Detailed Verdicts") st.dataframe(df_res[['instruction', 'winner', 'score_a', 'score_b', 'reasoning']], use_container_width=True) # Download st.download_button("โฌ๏ธ Download Report (JSON)", data=json.dumps(results, indent=2), file_name="evaluation_report.json", mime="application/json") # ============================================================================ # PAGE: DEPLOYMENT # ============================================================================ def render_deploy(): st.markdown('๐ Model Deployment
', unsafe_allow_html=True) # Model selection st.markdown("### ๐ฆ Select Model") models_dir = Path("./output/models") if models_dir.exists(): models = [d.name for d in models_dir.iterdir() if d.is_dir()] if models: selected_model = st.selectbox("Trained Models", models) model_path = models_dir / selected_model st.info(f"๐ Model path: `{model_path}`") else: st.warning("No trained models found.") selected_model = None else: st.warning("Models directory not found.") selected_model = None st.markdown("---") # Deployment options st.markdown("### ๐ Deployment Options") col1, col2 = st.columns(2) with col1: st.markdown("""Deploy as a REST API on your local machine.
Push your model to HuggingFace for sharing.