Spaces:
Running on Zero
Running on Zero
| """ | |
| DataSense E2B — Hugging Face Space demo | |
| Execution-grounded data agent (SFT v1) with bundled or uploaded CSVs/Excel. | |
| """ | |
| from __future__ import annotations | |
| import html | |
| import re | |
| import spaces # must be first — before any torch/CUDA import | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| from agent import run_agent | |
| from config import ADAPTER_MODEL, AGENT_MAX_STEPS, DATA_DIR | |
| from examples import DEMO_DATASETS, DEMO_EXAMPLES | |
| MODEL, TOKENIZER = None, None | |
| STORY_URL = "https://datasense-e2b.netlify.app/" | |
| DEMO_VIDEO_URL = "https://youtu.be/ucFoCdMK7sE" | |
| LINKEDIN_POST_URL = ( | |
| "https://www.linkedin.com/posts/sanjaymalladi_buildsmall-huggingface-modal-share-7471993638814654464-47hY/" | |
| ) | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,600;1,6..72,400&display=swap'); | |
| .gradio-container { | |
| --ds-bg: #0c1117; | |
| --ds-surface: #151c26; | |
| --ds-border: #2a3544; | |
| --ds-accent: #3ecfae; | |
| --ds-accent-dim: #1f6f5c; | |
| --ds-text: #e8eef5; | |
| --ds-muted: #8b9cb3; | |
| max-width: 1280px !important; | |
| font-family: 'Newsreader', Georgia, serif !important; | |
| } | |
| #ds-header { | |
| background: linear-gradient(135deg, #0f1a24 0%, #122a2a 55%, #0c1117 100%); | |
| border: 1px solid var(--ds-border); | |
| border-radius: 16px; | |
| padding: 1.5rem 1.75rem; | |
| margin-bottom: 1rem; | |
| } | |
| #ds-header h1 { | |
| font-family: 'Newsreader', Georgia, serif; | |
| font-size: 2rem; | |
| font-weight: 600; | |
| margin: 0 0 0.35rem 0; | |
| color: var(--ds-text); | |
| } | |
| #ds-badge { | |
| display: inline-block; | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.72rem; | |
| letter-spacing: 0.08em; | |
| text-transform: uppercase; | |
| color: var(--ds-accent); | |
| border: 1px solid var(--ds-accent-dim); | |
| border-radius: 999px; | |
| padding: 0.2rem 0.65rem; | |
| margin-bottom: 0.75rem; | |
| } | |
| #ds-panel, #ds-results { | |
| background: var(--ds-surface); | |
| border: 1px solid var(--ds-border); | |
| border-radius: 14px; | |
| padding: 1.1rem; | |
| min-height: 520px; | |
| } | |
| #ds-preview-box { | |
| margin-top: 0.5rem; | |
| border: 1px solid var(--ds-border); | |
| border-radius: 10px; | |
| overflow: hidden; | |
| } | |
| #ds-preview-box .label-wrap { padding: 0.5rem 0.75rem !important; } | |
| #run-btn { | |
| background: linear-gradient(90deg, #1f6f5c, #3ecfae) !important; | |
| border: none !important; | |
| font-weight: 600 !important; | |
| letter-spacing: 0.02em; | |
| } | |
| #ds-progress-label { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.82rem; | |
| color: var(--ds-accent); | |
| margin: 0 0 0.35rem 0; | |
| } | |
| .ds-progress-wrap { | |
| margin: 0 0 1rem 0; | |
| padding: 0.85rem 1rem; | |
| background: #0f1419; | |
| border: 1px solid #2a3544; | |
| border-radius: 10px; | |
| } | |
| .ds-progress-text { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.82rem; | |
| color: #3ecfae; | |
| margin-bottom: 0.55rem; | |
| line-height: 1.4; | |
| } | |
| .ds-progress-text .ds-pct { | |
| color: #8b9cb3; | |
| font-size: 0.75rem; | |
| } | |
| .ds-progress-track { | |
| height: 6px; | |
| background: #1a2330; | |
| border-radius: 999px; | |
| overflow: hidden; | |
| } | |
| .ds-progress-fill { | |
| height: 100%; | |
| background: linear-gradient(90deg, #1f6f5c, #3ecfae); | |
| border-radius: 999px; | |
| transition: width 0.35s ease; | |
| } | |
| .ds-progress-wrap.ds-idle .ds-progress-track { display: none; } | |
| #ds-results .tabitem { padding-top: 0.75rem !important; } | |
| .ds-answer-card { | |
| background: linear-gradient(145deg, #122a2a 0%, #151c26 100%); | |
| border: 1px solid #2a3544; | |
| border-left: 4px solid #3ecfae; | |
| border-radius: 12px; | |
| padding: 1.5rem 1.75rem; | |
| margin: 0; | |
| min-height: 200px; | |
| } | |
| .ds-answer-label { | |
| font-family: 'IBM Plex Mono', monospace; | |
| font-size: 0.72rem; | |
| letter-spacing: 0.1em; | |
| text-transform: uppercase; | |
| color: #3ecfae; | |
| margin-bottom: 0.75rem; | |
| } | |
| .ds-answer-value { | |
| font-family: 'Newsreader', Georgia, serif; | |
| font-size: 1.35rem; | |
| font-weight: 400; | |
| color: #c5d0de; | |
| line-height: 1.55; | |
| word-break: break-word; | |
| } | |
| .ds-answer-value strong { | |
| color: #ffffff; | |
| font-weight: 700; | |
| } | |
| .ds-summary { | |
| margin-top: 1.25rem; | |
| padding-top: 1rem; | |
| border-top: 1px solid #2a3544; | |
| font-size: 1.05rem; | |
| color: #8b9cb3; | |
| line-height: 1.55; | |
| } | |
| .ds-answer-empty { | |
| color: #8b9cb3; | |
| font-style: italic; | |
| padding: 1rem 0; | |
| } | |
| .ds-trace-wrap { margin: 0; padding: 0; } | |
| .ds-trace-wrap h3 { margin-top: 1rem; color: #3ecfae; font-size: 1rem; } | |
| footer { visibility: hidden; } | |
| """ | |
| def build_theme() -> gr.Theme: | |
| return ( | |
| gr.themes.Base( | |
| primary_hue=gr.themes.colors.emerald, | |
| secondary_hue=gr.themes.colors.slate, | |
| neutral_hue=gr.themes.colors.gray, | |
| font=gr.themes.GoogleFont("Newsreader"), | |
| font_mono=gr.themes.GoogleFont("IBM Plex Mono"), | |
| ) | |
| .set( | |
| body_background_fill="#0c1117", | |
| body_background_fill_dark="#0c1117", | |
| block_background_fill="#151c26", | |
| block_background_fill_dark="#151c26", | |
| block_border_width="1px", | |
| block_border_color="#2a3544", | |
| block_border_color_dark="#2a3544", | |
| body_text_color="#e8eef5", | |
| body_text_color_dark="#e8eef5", | |
| button_primary_background_fill="linear-gradient(90deg, #1f6f5c, #3ecfae)", | |
| button_primary_background_fill_hover="linear-gradient(90deg, #2a8a72, #4de0c0)", | |
| ) | |
| ) | |
| def _load_model(): | |
| global MODEL, TOKENIZER | |
| if MODEL is None or TOKENIZER is None: | |
| from model_loader import load_model_and_tokenizer | |
| MODEL, TOKENIZER = load_model_and_tokenizer() | |
| return MODEL, TOKENIZER | |
| def _resolve_data_path(data_mode: str, dataset_name: str, upload_file) -> Path | None: | |
| if data_mode == "Upload your file": | |
| if upload_file is None: | |
| return None | |
| path_str = upload_file[0] if isinstance(upload_file, list) else upload_file | |
| if not path_str: | |
| return None | |
| path = Path(path_str) | |
| return path if path.is_file() else None | |
| return DEMO_DATASETS.get(dataset_name) | |
| def _load_preview(data_mode: str, dataset_name: str, upload_file): | |
| path = _resolve_data_path(data_mode, dataset_name, upload_file) | |
| if path is None: | |
| return pd.DataFrame(), "_Select or upload a file to preview._" | |
| try: | |
| suffix = path.suffix.lower() | |
| if suffix in (".xlsx", ".xls"): | |
| df = pd.read_excel(path, nrows=100) | |
| else: | |
| df = pd.read_csv(path, nrows=100) | |
| meta = f"**{path.name}** · {len(df)} rows · {len(df.columns)} columns" | |
| return df, meta | |
| except Exception as exc: | |
| return pd.DataFrame(), f"_Could not preview file: {exc}_" | |
| def _inline_markdown_to_html(text: str) -> str: | |
| """Turn **bold** into <strong>; escape everything else.""" | |
| if not text: | |
| return "" | |
| parts = re.split(r"\*\*(.+?)\*\*", text) | |
| chunks: list[str] = [] | |
| for i, part in enumerate(parts): | |
| safe = html.escape(part) | |
| chunks.append(f"<strong>{safe}</strong>" if i % 2 == 1 else safe) | |
| return "".join(chunks).replace("**", "") | |
| def _format_answer_html(answer: str, summary: str = "") -> str: | |
| if not answer: | |
| return '<div class="ds-answer-empty">Could not parse an answer — check the execution trace tab.</div>' | |
| answer_html = _inline_markdown_to_html(answer) | |
| summary_block = "" | |
| if summary: | |
| summary_block = f'<p class="ds-summary">{_inline_markdown_to_html(summary)}</p>' | |
| return ( | |
| f'<div class="ds-answer-card">' | |
| f'<div class="ds-answer-label">Verified answer</div>' | |
| f'<div class="ds-answer-value">{answer_html}</div>' | |
| f"{summary_block}" | |
| f"</div>" | |
| ) | |
| def _toggle_data_inputs(data_mode: str): | |
| is_upload = data_mode == "Upload your file" | |
| return ( | |
| gr.update(visible=not is_upload), | |
| gr.update(visible=is_upload), | |
| ) | |
| def _wrap_trace(trace: str) -> str: | |
| if not trace: | |
| return "" | |
| return f'<div class="ds-trace-wrap">\n\n{trace}\n\n</div>' | |
| def _progress_html(pct: int | None, label: str) -> str: | |
| safe_label = html.escape(label) | |
| if pct is None: | |
| return f'<div class="ds-progress-wrap ds-idle"><div class="ds-progress-text">{safe_label}</div></div>' | |
| pct = max(0, min(100, pct)) | |
| return ( | |
| f'<div class="ds-progress-wrap">' | |
| f'<div class="ds-progress-text">{safe_label} <span class="ds-pct">{pct}%</span></div>' | |
| f'<div class="ds-progress-track"><div class="ds-progress-fill" style="width:{pct}%"></div></div>' | |
| f"</div>" | |
| ) | |
| def _progress_update(pct: int | None, label: str, trace: str = "", answer: str = ""): | |
| return ( | |
| _progress_html(pct, label), | |
| _wrap_trace(trace), | |
| answer, | |
| ) | |
| IDLE_PROGRESS_HTML = _progress_html(None, "Ready — click Run DataSense") | |
| def run_task( | |
| data_mode: str, | |
| dataset_name: str, | |
| upload_file, | |
| task: str, | |
| max_steps: int, | |
| progress=gr.Progress(), | |
| ): | |
| if not task.strip(): | |
| yield _progress_update(None, "Enter a question to run DataSense", "", '<div class="ds-answer-empty">Enter a task question.</div>') | |
| return | |
| yield _progress_update(5, "Reading your dataset…") | |
| data_path = _resolve_data_path(data_mode, dataset_name, upload_file) | |
| if data_path is None: | |
| msg = "⚠️ Upload a `.csv` or `.xlsx` file first." if data_mode == "Upload your file" else f"⚠️ Dataset not found: {dataset_name}" | |
| yield _progress_update(0, "Dataset missing", "", f'<div class="ds-answer-empty">{html.escape(msg)}</div>') | |
| return | |
| try: | |
| yield _progress_update(12, "Loading DataSense on GPU…") | |
| progress(0.15, desc="Loading DataSense…") | |
| model, tokenizer = _load_model() | |
| yield _progress_update(22, "DataSense is starting…") | |
| agent_stream = run_agent( | |
| model, | |
| tokenizer, | |
| data_path, | |
| task.strip(), | |
| max_steps=int(max_steps), | |
| progress=progress, | |
| stream=True, | |
| ) | |
| for event in agent_stream: | |
| if event[0] == "progress": | |
| _, step, total, trace_md = event | |
| pct = int(22 + (73 * step / max(total, 1))) | |
| yield _progress_update( | |
| pct, | |
| f"DataSense · step {step}/{total}", | |
| trace_md, | |
| "", | |
| ) | |
| elif event[0] == "final": | |
| result = event[1] | |
| answer_html = _format_answer_html(result.get("answer", ""), result.get("summary", "")) | |
| yield _progress_update( | |
| 100, | |
| "DataSense finished", | |
| result["steps_markdown"], | |
| answer_html, | |
| ) | |
| except Exception as exc: | |
| yield _progress_update(0, "Error", "", f'<div class="ds-answer-empty">Error: {html.escape(str(exc))}</div>') | |
| def preload_model(): | |
| _load_model() | |
| def build_ui() -> gr.Blocks: | |
| dataset_choices = list(DEMO_DATASETS.keys()) | |
| default_df, default_meta = _load_preview("Bundled examples", dataset_choices[0], None) | |
| with gr.Blocks(title="DataSense E2B") as demo: | |
| with gr.Column(elem_id="ds-header"): | |
| gr.HTML('<div id="ds-badge">Execution-verified · Gemma / DataBench</div>') | |
| gr.Markdown( | |
| """ | |
| # DataSense E2B | |
| **Live inference** — Gemma-4 2B + SFT v1 writes Python, runs it on your data, reads real stdout/errors. | |
| """ | |
| ) | |
| gr.Markdown( | |
| f"📖 [Full project story]({STORY_URL}) · " | |
| f"🎬 [Demo video]({DEMO_VIDEO_URL}) · " | |
| f"[LinkedIn post]({LINKEDIN_POST_URL}) · " | |
| f"LoRA [`DataSense-Modal-E2B-SFT`](https://huggingface.co/{ADAPTER_MODEL})", | |
| ) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=5, elem_id="ds-panel"): | |
| gr.Markdown("### Configure") | |
| data_mode = gr.Radio( | |
| choices=["Bundled examples", "Upload your file"], | |
| value="Bundled examples", | |
| label="Data source", | |
| ) | |
| dataset = gr.Dropdown( | |
| choices=dataset_choices, | |
| value=dataset_choices[0], | |
| label="Demo dataset", | |
| ) | |
| upload = gr.File( | |
| label="Your CSV or Excel file", | |
| file_types=[".csv", ".xlsx", ".xls"], | |
| type="filepath", | |
| visible=False, | |
| ) | |
| gr.Markdown("### Data preview") | |
| preview_meta = gr.Markdown(default_meta) | |
| with gr.Group(elem_id="ds-preview-box"): | |
| preview_df = gr.Dataframe( | |
| value=default_df, | |
| interactive=False, | |
| wrap=True, | |
| max_height=280, | |
| ) | |
| task = gr.Textbox( | |
| label="Question / task", | |
| placeholder="e.g. Which product had the highest total revenue?", | |
| lines=3, | |
| ) | |
| max_steps = gr.Slider( | |
| minimum=3, | |
| maximum=12, | |
| value=AGENT_MAX_STEPS, | |
| step=1, | |
| label="Max agent steps", | |
| ) | |
| run_btn = gr.Button("▶ Run DataSense", variant="primary", elem_id="run-btn") | |
| gr.Examples( | |
| examples=DEMO_EXAMPLES, | |
| inputs=[dataset, task], | |
| label="Quick examples (bundled data)", | |
| ) | |
| with gr.Column(scale=7, elem_id="ds-results"): | |
| gr.Markdown("### Results") | |
| progress_out = gr.HTML(value=IDLE_PROGRESS_HTML) | |
| with gr.Tabs(): | |
| with gr.Tab("🔍 Execution trace", id="trace_tab"): | |
| steps_out = gr.Markdown() | |
| with gr.Tab("✅ Answer", id="answer_tab"): | |
| answer_out = gr.HTML() | |
| preview_inputs = [data_mode, dataset, upload] | |
| data_mode.change(_toggle_data_inputs, data_mode, [dataset, upload]).then( | |
| _load_preview, preview_inputs, [preview_df, preview_meta] | |
| ) | |
| dataset.change(_load_preview, preview_inputs, [preview_df, preview_meta]) | |
| upload.change(_load_preview, preview_inputs, [preview_df, preview_meta]) | |
| run_btn.click( | |
| fn=run_task, | |
| inputs=[data_mode, dataset, upload, task, max_steps], | |
| outputs=[progress_out, steps_out, answer_out], | |
| show_progress="hidden", | |
| ) | |
| demo.load(_load_preview, preview_inputs, [preview_df, preview_meta]) | |
| return demo | |
| try: | |
| preload_model() | |
| except Exception as exc: | |
| print(f"Startup preload skipped (will load on first run): {exc}") | |
| demo = build_ui() | |
| if __name__ == "__main__": | |
| demo.queue(max_size=8).launch(theme=build_theme(), css=CUSTOM_CSS) | |