| | """ |
| | Code parsing and formatting utilities for different frameworks. |
| | Handles parsing of transformers.js, React, multi-file HTML, Streamlit, and Gradio code. |
| | """ |
| | import re |
| | import os |
| | import json |
| | import base64 |
| | from typing import Dict, List, Optional, Tuple |
| | from bs4 import BeautifulSoup |
| | import html |
| |
|
| | from .config import SEARCH_START, DIVIDER, REPLACE_END |
| |
|
| | |
| | History = List[Dict[str, str]] |
| |
|
| | def strip_tool_call_markers(text): |
| | """Remove TOOL_CALL markers and thinking tags that some LLMs add to their output.""" |
| | if not text: |
| | return text |
| | |
| | text = re.sub(r'\[/?TOOL_CALL\]', '', text, flags=re.IGNORECASE) |
| | |
| | text = re.sub(r'<think>[\s\S]*?</think>', '', text, flags=re.IGNORECASE) |
| | |
| | text = re.sub(r'^<think>[\s\S]*?(?=\n|$)', '', text, flags=re.IGNORECASE | re.MULTILINE) |
| | |
| | text = re.sub(r'</think>', '', text, flags=re.IGNORECASE) |
| | |
| | |
| | text = re.sub(r'^\s*\}\}\s*$', '', text, flags=re.MULTILINE) |
| | return text.strip() |
| |
|
| | def remove_code_block(text): |
| | |
| | text = strip_tool_call_markers(text) |
| | |
| | |
| | patterns = [ |
| | r'```(?:html|HTML)\n([\s\S]+?)\n```', |
| | r'```\n([\s\S]+?)\n```', |
| | r'```([\s\S]+?)```' |
| | ] |
| | for pattern in patterns: |
| | match = re.search(pattern, text, re.DOTALL) |
| | if match: |
| | extracted = match.group(1).strip() |
| | |
| | if extracted.split('\n', 1)[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql', 'sql-mssql', 'sql-mysql', 'sql-mariadb', 'sql-sqlite', 'sql-cassandra', 'sql-plSQL', 'sql-hive', 'sql-pgsql', 'sql-gql', 'sql-gpsql', 'sql-sparksql', 'sql-esper']: |
| | return extracted.split('\n', 1)[1] if '\n' in extracted else '' |
| | |
| | html_root_idx = None |
| | for tag in ['<!DOCTYPE html', '<html']: |
| | idx = extracted.find(tag) |
| | if idx != -1: |
| | html_root_idx = idx if html_root_idx is None else min(html_root_idx, idx) |
| | if html_root_idx is not None and html_root_idx > 0: |
| | return extracted[html_root_idx:].strip() |
| | return extracted |
| | |
| | stripped = text.strip() |
| | if stripped.startswith('<!DOCTYPE html>') or stripped.startswith('<html') or stripped.startswith('<'): |
| | |
| | for tag in ['<!DOCTYPE html', '<html']: |
| | idx = stripped.find(tag) |
| | if idx > 0: |
| | return stripped[idx:].strip() |
| | return stripped |
| | |
| | if text.strip().startswith('```python'): |
| | return text.strip()[9:-3].strip() |
| | |
| | lines = text.strip().split('\n', 1) |
| | if lines[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json', 'c', 'cpp', 'markdown', 'latex', 'jinja2', 'typescript', 'yaml', 'dockerfile', 'shell', 'r', 'sql', 'sql-mssql', 'sql-mysql', 'sql-mariadb', 'sql-sqlite', 'sql-cassandra', 'sql-plSQL', 'sql-hive', 'sql-pgsql', 'sql-gql', 'sql-gpsql', 'sql-sparksql', 'sql-esper']: |
| | return lines[1] if len(lines) > 1 else '' |
| | return text.strip() |
| |
|
| | |
| |
|
| | def strip_placeholder_thinking(text: str) -> str: |
| | """Remove placeholder 'Thinking...' status lines from streamed text.""" |
| | if not text: |
| | return text |
| | |
| | return re.sub(r"(?mi)^[\t ]*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?[\t ]*$\n?", "", text) |
| |
|
| | def is_placeholder_thinking_only(text: str) -> bool: |
| | """Return True if text contains only 'Thinking...' placeholder lines (with optional elapsed).""" |
| | if not text: |
| | return False |
| | stripped = text.strip() |
| | if not stripped: |
| | return False |
| | return re.fullmatch(r"(?s)(?:\s*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?\s*)+", stripped) is not None |
| |
|
| | def extract_last_thinking_line(text: str) -> str: |
| | """Extract the last 'Thinking...' line to display as status.""" |
| | matches = list(re.finditer(r"Thinking\.\.\.(?:\s*\(\d+s elapsed\))?", text)) |
| | return matches[-1].group(0) if matches else "Thinking..." |
| |
|
| | def parse_transformers_js_output(text): |
| | """Parse transformers.js output and extract the three files (index.html, index.js, style.css)""" |
| | files = { |
| | 'index.html': '', |
| | 'index.js': '', |
| | 'style.css': '' |
| | } |
| | |
| | |
| | html_patterns = [ |
| | r'```html\s*\n([\s\S]*?)(?:```|\Z)', |
| | r'```htm\s*\n([\s\S]*?)(?:```|\Z)', |
| | r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)' |
| | ] |
| | |
| | js_patterns = [ |
| | r'```javascript\s*\n([\s\S]*?)(?:```|\Z)', |
| | r'```js\s*\n([\s\S]*?)(?:```|\Z)', |
| | r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)' |
| | ] |
| | |
| | css_patterns = [ |
| | r'```css\s*\n([\s\S]*?)(?:```|\Z)', |
| | r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)' |
| | ] |
| | |
| | |
| | for pattern in html_patterns: |
| | html_match = re.search(pattern, text, re.IGNORECASE) |
| | if html_match: |
| | files['index.html'] = html_match.group(1).strip() |
| | break |
| | |
| | |
| | for pattern in js_patterns: |
| | js_match = re.search(pattern, text, re.IGNORECASE) |
| | if js_match: |
| | files['index.js'] = js_match.group(1).strip() |
| | break |
| | |
| | |
| | for pattern in css_patterns: |
| | css_match = re.search(pattern, text, re.IGNORECASE) |
| | if css_match: |
| | files['style.css'] = css_match.group(1).strip() |
| | break |
| | |
| | |
| | if not (files['index.html'] and files['index.js'] and files['style.css']): |
| | |
| | html_fallback = re.search(r'===\s*index\.html\s*===\s*\n([\s\S]+?)(?=\n===|$)', text, re.IGNORECASE) |
| | js_fallback = re.search(r'===\s*index\.js\s*===\s*\n([\s\S]+?)(?=\n===|$)', text, re.IGNORECASE) |
| | css_fallback = re.search(r'===\s*style\.css\s*===\s*\n([\s\S]+?)(?=\n===|$)', text, re.IGNORECASE) |
| | |
| | if html_fallback: |
| | files['index.html'] = html_fallback.group(1).strip() |
| | if js_fallback: |
| | files['index.js'] = js_fallback.group(1).strip() |
| | if css_fallback: |
| | files['style.css'] = css_fallback.group(1).strip() |
| | |
| | |
| | if not (files['index.html'] and files['index.js'] and files['style.css']): |
| | |
| | patterns = [ |
| | (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.html(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.html'), |
| | (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)index\.js(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'index.js'), |
| | (r'(?:^\d+\.\s*|^##\s*|^\*\*\s*)style\.css(?:\s*:|\*\*:?)\s*\n([\s\S]+?)(?=\n(?:\d+\.|##|\*\*|===)|$)', 'style.css') |
| | ] |
| | |
| | for pattern, file_key in patterns: |
| | if not files[file_key]: |
| | match = re.search(pattern, text, re.IGNORECASE | re.MULTILINE) |
| | if match: |
| | |
| | content = match.group(1).strip() |
| | content = re.sub(r'^```\w*\s*\n', '', content) |
| | content = re.sub(r'\n```\s*$', '', content) |
| | files[file_key] = content.strip() |
| | |
| | return files |
| |
|
| | def format_transformers_js_output(files): |
| | """Format the three files into a single display string""" |
| | output = [] |
| | output.append("=== index.html ===") |
| | output.append(files['index.html']) |
| | output.append("\n=== index.js ===") |
| | output.append(files['index.js']) |
| | output.append("\n=== style.css ===") |
| | output.append(files['style.css']) |
| | return '\n'.join(output) |
| |
|
| | def build_transformers_inline_html(files: dict) -> str: |
| | """Merge transformers.js three-file output into a single self-contained HTML document. |
| | |
| | - Inlines style.css into a <style> tag |
| | - Inlines index.js into a <script type="module"> tag |
| | - Rewrites ESM imports for transformers.js to a stable CDN URL so it works in data: iframes |
| | """ |
| | import re as _re |
| |
|
| | html = files.get('index.html') or '' |
| | js = files.get('index.js') or '' |
| | css = files.get('style.css') or '' |
| |
|
| | |
| | cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3" |
| |
|
| | def _normalize_imports(_code: str) -> str: |
| | if not _code: |
| | return _code or "" |
| | _code = _re.sub(r"from\s+['\"]@huggingface/transformers['\"]", f"from '{cdn_url}'", _code) |
| | _code = _re.sub(r"from\s+['\"]@xenova/transformers['\"]", f"from '{cdn_url}'", _code) |
| | _code = _re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code) |
| | _code = _re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code) |
| | return _code |
| |
|
| | |
| | inline_modules = [] |
| | try: |
| | for _m in _re.finditer(r"<script\\b[^>]*type=[\"\']module[\"\'][^>]*>([\s\S]*?)</script>", html, flags=_re.IGNORECASE): |
| | inline_modules.append(_m.group(1)) |
| | if inline_modules: |
| | html = _re.sub(r"<script\\b[^>]*type=[\"\']module[\"\'][^>]*>[\s\S]*?</script>\\s*", "", html, flags=_re.IGNORECASE) |
| | |
| | html = _re.sub(r"https://cdn\.jsdelivr\.net/npm/@huggingface/transformers@[^'\"<>\s]+", cdn_url, html) |
| | html = _re.sub(r"https://cdn\.jsdelivr\.net/npm/@xenova/transformers@[^'\"<>\s]+", cdn_url, html) |
| | except Exception: |
| | |
| | pass |
| |
|
| | |
| | combined_js_parts = [] |
| | if inline_modules: |
| | combined_js_parts.append("\n\n".join(inline_modules)) |
| | if js: |
| | combined_js_parts.append(js) |
| | js = "\n\n".join([p for p in combined_js_parts if (p and p.strip())]) |
| | js = _normalize_imports(js) |
| |
|
| | |
| | |
| | |
| | if js.strip(): |
| | prelude = ( |
| | f"import {{ env }} from '{cdn_url}';\n" |
| | "try { env.useBrowserCache = false; } catch (e) {}\n" |
| | "try { if (env && env.backends && env.backends.onnx && env.backends.onnx.wasm) { env.backends.onnx.wasm.numThreads = 1; env.backends.onnx.wasm.proxy = false; } } catch (e) {}\n" |
| | f"(async () => {{ try {{ if (typeof globalThis.transformers === 'undefined') {{ const m = await import('{cdn_url}'); globalThis.transformers = m; }} }} catch (e) {{}} }})();\n" |
| | ) |
| | js = prelude + js |
| |
|
| | |
| | doc = html.strip() |
| | if not doc or ('<html' not in doc.lower()): |
| | doc = ( |
| | "<!DOCTYPE html>\n" |
| | "<html>\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Transformers.js App</title>\n</head>\n" |
| | "<body>\n<div id=\"app\"></div>\n</body>\n</html>" |
| | ) |
| |
|
| | |
| | doc = _re.sub(r"<link[^>]+href=\"[^\"]*style\.css\"[^>]*>\s*", "", doc, flags=_re.IGNORECASE) |
| | doc = _re.sub(r"<script[^>]+src=\"[^\"]*index\.js\"[^>]*>\s*</script>\s*", "", doc, flags=_re.IGNORECASE) |
| |
|
| | |
| | style_tag = f"<style>\n{css}\n</style>" if css else "" |
| | if style_tag: |
| | if '</head>' in doc.lower(): |
| | |
| | match = _re.search(r"</head>", doc, flags=_re.IGNORECASE) |
| | if match: |
| | idx = match.start() |
| | doc = doc[:idx] + style_tag + doc[idx:] |
| | else: |
| | |
| | match = _re.search(r"<body[^>]*>", doc, flags=_re.IGNORECASE) |
| | if match: |
| | idx = match.end() |
| | doc = doc[:idx] + "\n" + style_tag + doc[idx:] |
| | else: |
| | |
| | doc = style_tag + doc |
| |
|
| | |
| | script_tag = f"<script type=\"module\">\n{js}\n</script>" if js else "" |
| | |
| | debug_overlay = ( |
| | "<style>\n" |
| | "#anycoder-debug{position:fixed;left:0;right:0;bottom:0;max-height:45%;overflow:auto;" |
| | "background:rgba(0,0,0,.85);color:#9eff9e;padding:.5em;font:12px/1.4 monospace;z-index:2147483647;display:none}" |
| | "#anycoder-debug pre{margin:0;white-space:pre-wrap;word-break:break-word}" |
| | "</style>\n" |
| | "<div id=\"anycoder-debug\"></div>\n" |
| | "<script>\n" |
| | "(function(){\n" |
| | " const el = document.getElementById('anycoder-debug');\n" |
| | " function show(){ if(el && el.style.display!=='block'){ el.style.display='block'; } }\n" |
| | " function log(msg){ try{ show(); const pre=document.createElement('pre'); pre.textContent=msg; el.appendChild(pre);}catch(e){} }\n" |
| | " const origError = console.error.bind(console);\n" |
| | " console.error = function(){ origError.apply(console, arguments); try{ log('console.error: ' + Array.from(arguments).map(a=>{try{return (typeof a==='string')?a:JSON.stringify(a);}catch(e){return String(a);}}).join(' ')); }catch(e){} };\n" |
| | " window.addEventListener('error', e => { log('window.onerror: ' + (e && e.message ? e.message : 'Unknown error')); });\n" |
| | " window.addEventListener('unhandledrejection', e => { try{ const r=e && e.reason; log('unhandledrejection: ' + (r && (r.message || JSON.stringify(r)))); }catch(err){ log('unhandledrejection'); } });\n" |
| | "})();\n" |
| | "</script>" |
| | ) |
| | |
| | cleanup_tag = ( |
| | "<script>\n" |
| | "(function(){\n" |
| | " function cleanup(){\n" |
| | " try { if (window.caches && caches.keys) { caches.keys().then(keys => keys.forEach(k => caches.delete(k))); } } catch(e){}\n" |
| | " try { if (window.indexedDB && indexedDB.databases) { indexedDB.databases().then(dbs => dbs.forEach(db => db && db.name && indexedDB.deleteDatabase(db.name))); } } catch(e){}\n" |
| | " }\n" |
| | " window.addEventListener('pagehide', cleanup, { once: true });\n" |
| | " window.addEventListener('beforeunload', cleanup, { once: true });\n" |
| | "})();\n" |
| | "</script>" |
| | ) |
| | if script_tag: |
| | match = _re.search(r"</body>", doc, flags=_re.IGNORECASE) |
| | if match: |
| | idx = match.start() |
| | doc = doc[:idx] + debug_overlay + script_tag + cleanup_tag + doc[idx:] |
| | else: |
| | |
| | doc = doc + debug_overlay + script_tag + cleanup_tag |
| |
|
| | return doc |
| |
|
| | def send_transformers_to_sandbox(files: dict) -> str: |
| | """Build a self-contained HTML document from transformers.js files and return an iframe preview.""" |
| | merged_html = build_transformers_inline_html(files) |
| | return send_to_sandbox(merged_html) |
| |
|
| | def parse_multipage_html_output(text: str) -> Dict[str, str]: |
| | """Parse multi-page HTML output formatted as repeated "=== filename ===" sections. |
| | |
| | Returns a mapping of filename → file content. Supports nested paths like assets/css/styles.css. |
| | If HTML content appears before the first === marker, it's treated as index.html. |
| | """ |
| | if not text: |
| | return {} |
| | |
| | cleaned = remove_code_block(text) |
| | files: Dict[str, str] = {} |
| | import re as _re |
| | |
| | |
| | first_marker_match = _re.search(r"^===\s*([^=\n]+?)\s*===", cleaned, _re.MULTILINE) |
| | if first_marker_match: |
| | |
| | first_marker_pos = first_marker_match.start() |
| | if first_marker_pos > 0: |
| | leading_content = cleaned[:first_marker_pos].strip() |
| | |
| | if leading_content and ('<!DOCTYPE' in leading_content or '<html' in leading_content or leading_content.startswith('<')): |
| | files['index.html'] = leading_content |
| | |
| | |
| | remaining_text = cleaned[first_marker_pos:] if first_marker_pos > 0 else cleaned |
| | pattern = _re.compile(r"^===\s*([^=\n]+?)\s*===\s*\n([\s\S]*?)(?=\n===\s*[^=\n]+?\s*===|\Z)", _re.MULTILINE) |
| | for m in pattern.finditer(remaining_text): |
| | name = m.group(1).strip() |
| | content = m.group(2).strip() |
| | |
| | content = _re.sub(r"^```\w*\s*\n|\n```\s*$", "", content) |
| | files[name] = content |
| | else: |
| | |
| | pattern = _re.compile(r"^===\s*([^=\n]+?)\s*===\s*\n([\s\S]*?)(?=\n===\s*[^=\n]+?\s*===|\Z)", _re.MULTILINE) |
| | for m in pattern.finditer(cleaned): |
| | name = m.group(1).strip() |
| | content = m.group(2).strip() |
| | |
| | content = _re.sub(r"^```\w*\s*\n|\n```\s*$", "", content) |
| | files[name] = content |
| | |
| | return files |
| |
|
| | def format_multipage_output(files: Dict[str, str]) -> str: |
| | """Format a dict of files back into === filename === sections. |
| | |
| | Ensures `index.html` appears first if present; others follow sorted by path. |
| | """ |
| | if not isinstance(files, dict) or not files: |
| | return "" |
| | ordered_paths = [] |
| | if 'index.html' in files: |
| | ordered_paths.append('index.html') |
| | for path in sorted(files.keys()): |
| | if path == 'index.html': |
| | continue |
| | ordered_paths.append(path) |
| | parts: list[str] = [] |
| | for path in ordered_paths: |
| | parts.append(f"=== {path} ===") |
| | |
| | parts.append((files.get(path) or '').rstrip()) |
| | return "\n".join(parts) |
| |
|
| | def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]: |
| | """Ensure minimal contract for multi-file sites; auto-fix missing pieces. |
| | |
| | Rules: |
| | - Ensure at least one HTML entrypoint (index.html). If none, synthesize a simple index.html linking discovered pages. |
| | - For each HTML file, ensure referenced local assets exist in files; if missing, add minimal stubs. |
| | - Normalize relative paths (strip leading '/'). |
| | """ |
| | if not isinstance(files, dict) or not files: |
| | return files or {} |
| | import re as _re |
| |
|
| | normalized: Dict[str, str] = {} |
| | for k, v in files.items(): |
| | safe_key = k.strip().lstrip('/') |
| | normalized[safe_key] = v |
| |
|
| | html_files = [p for p in normalized.keys() if p.lower().endswith('.html')] |
| | has_index = 'index.html' in normalized |
| |
|
| | |
| | if not has_index and html_files: |
| | links = '\n'.join([f"<li><a href=\"{p}\">{p}</a></li>" for p in html_files]) |
| | normalized['index.html'] = ( |
| | "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\"/>\n" |
| | "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/>\n" |
| | "<title>Site Index</title>\n</head>\n<body>\n<h1>Site</h1>\n<ul>\n" |
| | + links + "\n</ul>\n</body>\n</html>" |
| | ) |
| |
|
| | |
| | asset_refs: set[str] = set() |
| | link_href = _re.compile(r"<link[^>]+href=\"([^\"]+)\"") |
| | script_src = _re.compile(r"<script[^>]+src=\"([^\"]+)\"") |
| | img_src = _re.compile(r"<img[^>]+src=\"([^\"]+)\"") |
| | a_href = _re.compile(r"<a[^>]+href=\"([^\"]+)\"") |
| |
|
| | for path, content in list(normalized.items()): |
| | if not path.lower().endswith('.html'): |
| | continue |
| | for patt in (link_href, script_src, img_src, a_href): |
| | for m in patt.finditer(content or ""): |
| | ref = (m.group(1) or "").strip() |
| | if not ref or ref.startswith('http://') or ref.startswith('https://') or ref.startswith('data:') or '#' in ref: |
| | continue |
| | asset_refs.add(ref.lstrip('/')) |
| |
|
| | |
| | for ref in list(asset_refs): |
| | if ref not in normalized: |
| | if ref.lower().endswith('.css'): |
| | normalized[ref] = "/* generated stub */\n" |
| | elif ref.lower().endswith('.js'): |
| | normalized[ref] = "// generated stub\n" |
| | elif ref.lower().endswith('.html'): |
| | normalized[ref] = ( |
| | "<!DOCTYPE html>\n<html lang=\"en\">\n<head><meta charset=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/><title>Page</title></head>\n" |
| | "<body><main><h1>Placeholder page</h1><p>This page was auto-created to satisfy an internal link.</p></main></body>\n</html>" |
| | ) |
| | |
| | |
| |
|
| | return normalized |
| | def inline_multipage_into_single_preview(files: Dict[str, str]) -> str: |
| | """Inline local CSS/JS referenced by index.html for preview inside a data: iframe. |
| | |
| | - Uses index.html as the base document |
| | - Inlines <link href="..."> if the target exists in files |
| | - Inlines <script src="..."> if the target exists in files |
| | - Leaves other links (e.g., about.html) untouched; preview covers the home page |
| | """ |
| | import re as _re |
| | html = files.get('index.html', '') |
| | if not html: |
| | return "" |
| | doc = html |
| | |
| | def _inline_css(match): |
| | href = match.group(1) |
| | if href in files: |
| | return f"<style>\n{files[href]}\n</style>" |
| | return match.group(0) |
| | doc = _re.sub(r"<link[^>]+href=\"([^\"]+)\"[^>]*/?>", _inline_css, doc, flags=_re.IGNORECASE) |
| |
|
| | |
| | def _inline_js(match): |
| | src = match.group(1) |
| | if src in files: |
| | return f"<script>\n{files[src]}\n</script>" |
| | return match.group(0) |
| | doc = _re.sub(r"<script[^>]+src=\"([^\"]+)\"[^>]*>\s*</script>", _inline_js, doc, flags=_re.IGNORECASE) |
| |
|
| | |
| | try: |
| | import json as _json |
| | import base64 as _b64 |
| | import re as _re |
| | html_pages = {k: v for k, v in files.items() if k.lower().endswith('.html')} |
| | |
| | _m_body = _re.search(r"<body[^>]*>([\s\S]*?)</body>", doc, flags=_re.IGNORECASE) |
| | _index_body = _m_body.group(1) if _m_body else doc |
| | html_pages['index.html'] = _index_body |
| | encoded = _b64.b64encode(_json.dumps(html_pages).encode('utf-8')).decode('ascii') |
| | nav_script = ( |
| | "<script>\n" |
| | "(function(){\n" |
| | f" const MP_FILES = JSON.parse(atob('{encoded}'));\n" |
| | " function extractBody(html){\n" |
| | " try {\n" |
| | " const doc = new DOMParser().parseFromString(html, 'text/html');\n" |
| | " const title = doc.querySelector('title'); if (title) document.title = title.textContent || document.title;\n" |
| | " return doc.body ? doc.body.innerHTML : html;\n" |
| | " } catch(e){ return html; }\n" |
| | " }\n" |
| | " function loadPage(path){\n" |
| | " if (!MP_FILES[path]) return false;\n" |
| | " const bodyHTML = extractBody(MP_FILES[path]);\n" |
| | " document.body.innerHTML = bodyHTML;\n" |
| | " attach();\n" |
| | " try { history.replaceState({}, '', '#'+path); } catch(e){}\n" |
| | " return true;\n" |
| | " }\n" |
| | " function clickHandler(e){\n" |
| | " const a = e.target && e.target.closest ? e.target.closest('a') : null;\n" |
| | " if (!a) return;\n" |
| | " const href = a.getAttribute('href') || '';\n" |
| | " if (!href || href.startsWith('#') || /^https?:/i.test(href) || href.startsWith('mailto:') || href.startsWith('tel:')) return;\n" |
| | " const clean = href.split('#')[0].split('?')[0];\n" |
| | " if (MP_FILES[clean]) { e.preventDefault(); loadPage(clean); }\n" |
| | " }\n" |
| | " function attach(){ document.removeEventListener('click', clickHandler, true); document.addEventListener('click', clickHandler, true); }\n" |
| | " document.addEventListener('DOMContentLoaded', function(){ attach(); const initial = (location.hash||'').slice(1); if (initial && MP_FILES[initial]) loadPage(initial); }, { once:true });\n" |
| | "})();\n" |
| | "</script>" |
| | ) |
| | m = _re.search(r"</body>", doc, flags=_re.IGNORECASE) |
| | if m: |
| | i = m.start() |
| | doc = doc[:i] + nav_script + doc[i:] |
| | else: |
| | doc = doc + nav_script |
| | except Exception: |
| | |
| | pass |
| |
|
| | return doc |
| |
|
| | def extract_html_document(text: str) -> str: |
| | """Return substring starting from the first <!DOCTYPE html> or <html> if present, else original text. |
| | |
| | This ignores prose or planning notes before the actual HTML so previews don't break. |
| | """ |
| | if not text: |
| | return text |
| | lower = text.lower() |
| | idx = lower.find("<!doctype html") |
| | if idx == -1: |
| | idx = lower.find("<html") |
| | return text[idx:] if idx != -1 else text |
| |
|
| |
|
| | def parse_react_output(text): |
| | """Parse React/Next.js output to extract individual files. |
| | |
| | Supports multi-file sections using === filename === sections. |
| | """ |
| | if not text: |
| | return {} |
| |
|
| | |
| | try: |
| | files = parse_multipage_html_output(text) or {} |
| | except Exception: |
| | files = {} |
| |
|
| | return files if isinstance(files, dict) and files else {} |
| |
|
| |
|
| | def history_render(history: History): |
| | return gr.update(visible=True), history |
| |
|
| | def clear_history(): |
| | return [], [], [] |
| |
|
| | def create_multimodal_message(text, image=None): |
| | """Create a chat message. For broad provider compatibility, always return content as a string. |
| | |
| | Some providers (e.g., Hugging Face router endpoints like Cerebras) expect `content` to be a string, |
| | not a list of typed parts. To avoid 422 validation errors, we inline a brief note when an image is provided. |
| | """ |
| | if image is None: |
| | return {"role": "user", "content": text} |
| | |
| | |
| | return {"role": "user", "content": f"{text}\n\n[An image was provided as reference.]"} |
| | def apply_search_replace_changes(original_content: str, changes_text: str) -> str: |
| | """Apply search/replace changes to content (HTML, Python, etc.)""" |
| | if not changes_text.strip(): |
| | return original_content |
| | |
| | |
| | |
| | if (SEARCH_START not in changes_text) and (DIVIDER not in changes_text) and (REPLACE_END not in changes_text): |
| | try: |
| | import re |
| | updated_content = original_content |
| | replaced_any_rule = False |
| | |
| | |
| | css_blocks = re.findall(r"([^{]+)\{([\s\S]*?)\}", changes_text, flags=re.MULTILINE) |
| | for selector_raw, body_raw in css_blocks: |
| | selector = selector_raw.strip() |
| | body = body_raw.strip() |
| | if not selector: |
| | continue |
| | |
| | |
| | pattern = re.compile(rf"({re.escape(selector)}\s*\{{)([\s\S]*?)(\}})") |
| | def _replace_rule(match): |
| | nonlocal replaced_any_rule |
| | replaced_any_rule = True |
| | prefix, existing_body, suffix = match.groups() |
| | |
| | first_line_indent = "" |
| | for line in existing_body.splitlines(): |
| | stripped = line.lstrip(" \t") |
| | if stripped: |
| | first_line_indent = line[: len(line) - len(stripped)] |
| | break |
| | |
| | if body: |
| | new_body_lines = [first_line_indent + line if line.strip() else line for line in body.splitlines()] |
| | new_body_text = "\n" + "\n".join(new_body_lines) + "\n" |
| | else: |
| | new_body_text = existing_body |
| | return f"{prefix}{new_body_text}{suffix}" |
| | updated_content, num_subs = pattern.subn(_replace_rule, updated_content, count=1) |
| | if replaced_any_rule: |
| | return updated_content |
| | except Exception: |
| | |
| | pass |
| |
|
| | |
| | blocks = [] |
| | current_block = "" |
| | lines = changes_text.split('\n') |
| | |
| | for line in lines: |
| | if line.strip() == SEARCH_START: |
| | if current_block.strip(): |
| | blocks.append(current_block.strip()) |
| | current_block = line + '\n' |
| | elif line.strip() == REPLACE_END: |
| | current_block += line + '\n' |
| | blocks.append(current_block.strip()) |
| | current_block = "" |
| | else: |
| | current_block += line + '\n' |
| | |
| | if current_block.strip(): |
| | blocks.append(current_block.strip()) |
| | |
| | modified_content = original_content |
| | |
| | for block in blocks: |
| | if not block.strip(): |
| | continue |
| | |
| | |
| | lines = block.split('\n') |
| | search_lines = [] |
| | replace_lines = [] |
| | in_search = False |
| | in_replace = False |
| | |
| | for line in lines: |
| | if line.strip() == SEARCH_START: |
| | in_search = True |
| | in_replace = False |
| | elif line.strip() == DIVIDER: |
| | in_search = False |
| | in_replace = True |
| | elif line.strip() == REPLACE_END: |
| | in_replace = False |
| | elif in_search: |
| | search_lines.append(line) |
| | elif in_replace: |
| | replace_lines.append(line) |
| | |
| | |
| | if search_lines: |
| | search_text = '\n'.join(search_lines).strip() |
| | replace_text = '\n'.join(replace_lines).strip() |
| | |
| | if search_text in modified_content: |
| | modified_content = modified_content.replace(search_text, replace_text) |
| | else: |
| | |
| | try: |
| | import re |
| | updated_content = modified_content |
| | replaced_any_rule = False |
| | css_blocks = re.findall(r"([^{]+)\{([\s\S]*?)\}", replace_text, flags=re.MULTILINE) |
| | for selector_raw, body_raw in css_blocks: |
| | selector = selector_raw.strip() |
| | body = body_raw.strip() |
| | if not selector: |
| | continue |
| | pattern = re.compile(rf"({re.escape(selector)}\s*\{{)([\s\S]*?)(\}})") |
| | def _replace_rule(match): |
| | nonlocal replaced_any_rule |
| | replaced_any_rule = True |
| | prefix, existing_body, suffix = match.groups() |
| | first_line_indent = "" |
| | for line in existing_body.splitlines(): |
| | stripped = line.lstrip(" \t") |
| | if stripped: |
| | first_line_indent = line[: len(line) - len(stripped)] |
| | break |
| | if body: |
| | new_body_lines = [first_line_indent + line if line.strip() else line for line in body.splitlines()] |
| | new_body_text = "\n" + "\n".join(new_body_lines) + "\n" |
| | else: |
| | new_body_text = existing_body |
| | return f"{prefix}{new_body_text}{suffix}" |
| | updated_content, num_subs = pattern.subn(_replace_rule, updated_content, count=1) |
| | if replaced_any_rule: |
| | modified_content = updated_content |
| | else: |
| | print(f"Warning: Search text not found in content: {search_text[:100]}...") |
| | except Exception: |
| | print(f"Warning: Search text not found in content: {search_text[:100]}...") |
| | |
| | return modified_content |
| |
|
| | def apply_transformers_js_search_replace_changes(original_formatted_content: str, changes_text: str) -> str: |
| | """Apply search/replace changes to transformers.js formatted content (three files)""" |
| | if not changes_text.strip(): |
| | return original_formatted_content |
| | |
| | |
| | files = parse_transformers_js_output(original_formatted_content) |
| | |
| | |
| | blocks = [] |
| | current_block = "" |
| | lines = changes_text.split('\n') |
| | |
| | for line in lines: |
| | if line.strip() == SEARCH_START: |
| | if current_block.strip(): |
| | blocks.append(current_block.strip()) |
| | current_block = line + '\n' |
| | elif line.strip() == REPLACE_END: |
| | current_block += line + '\n' |
| | blocks.append(current_block.strip()) |
| | current_block = "" |
| | else: |
| | current_block += line + '\n' |
| | |
| | if current_block.strip(): |
| | blocks.append(current_block.strip()) |
| | |
| | |
| | for block in blocks: |
| | if not block.strip(): |
| | continue |
| | |
| | |
| | lines = block.split('\n') |
| | search_lines = [] |
| | replace_lines = [] |
| | in_search = False |
| | in_replace = False |
| | target_file = None |
| | |
| | for line in lines: |
| | if line.strip() == SEARCH_START: |
| | in_search = True |
| | in_replace = False |
| | elif line.strip() == DIVIDER: |
| | in_search = False |
| | in_replace = True |
| | elif line.strip() == REPLACE_END: |
| | in_replace = False |
| | elif in_search: |
| | search_lines.append(line) |
| | elif in_replace: |
| | replace_lines.append(line) |
| | |
| | |
| | if search_lines: |
| | search_text = '\n'.join(search_lines).strip() |
| | replace_text = '\n'.join(replace_lines).strip() |
| | |
| | |
| | if search_text in files['index.html']: |
| | target_file = 'index.html' |
| | elif search_text in files['index.js']: |
| | target_file = 'index.js' |
| | elif search_text in files['style.css']: |
| | target_file = 'style.css' |
| | |
| | |
| | if target_file and search_text in files[target_file]: |
| | files[target_file] = files[target_file].replace(search_text, replace_text) |
| | else: |
| | print(f"Warning: Search text not found in any transformers.js file: {search_text[:100]}...") |
| | |
| | |
| | return format_transformers_js_output(files) |
| |
|
| | def send_to_sandbox(code): |
| | """Render HTML in a sandboxed iframe. Assumes full HTML is provided by prompts.""" |
| | html_doc = (code or "").strip() |
| | |
| | |
| | try: |
| | import re |
| | import base64 as _b64 |
| | import mimetypes as _mtypes |
| | import urllib.parse as _uparse |
| | def _file_url_to_data_uri(file_url: str) -> Optional[str]: |
| | try: |
| | parsed = _uparse.urlparse(file_url) |
| | path = _uparse.unquote(parsed.path) |
| | if not path: |
| | return None |
| | with open(path, 'rb') as _f: |
| | raw = _f.read() |
| | mime = _mtypes.guess_type(path)[0] or 'application/octet-stream' |
| | |
| | b64 = _b64.b64encode(raw).decode() |
| | return f"data:{mime};base64,{b64}" |
| | except Exception as e: |
| | print(f"[Sandbox] Failed to convert file URL to data URI: {str(e)}") |
| | return None |
| | def _repl_double(m): |
| | url = m.group(1) |
| | data_uri = _file_url_to_data_uri(url) |
| | return f'src="{data_uri}"' if data_uri else m.group(0) |
| | def _repl_single(m): |
| | url = m.group(1) |
| | data_uri = _file_url_to_data_uri(url) |
| | return f"src='{data_uri}'" if data_uri else m.group(0) |
| | html_doc = re.sub(r'src="(file:[^"]+)"', _repl_double, html_doc) |
| | html_doc = re.sub(r"src='(file:[^']+)'", _repl_single, html_doc) |
| | |
| | except Exception: |
| | |
| | pass |
| | encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8') |
| | data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" |
| | iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>' |
| | return iframe |
| |
|
| | def is_streamlit_code(code: str) -> bool: |
| | """Heuristic check to determine if Python code is a Streamlit app.""" |
| | if not code: |
| | return False |
| | lowered = code.lower() |
| | return ("import streamlit" in lowered) or ("from streamlit" in lowered) or ("st." in code and "streamlit" in lowered) |
| |
|
| | def clean_requirements_txt_content(content: str) -> str: |
| | """ |
| | Clean up requirements.txt content to remove markdown formatting. |
| | This function removes code blocks, markdown lists, headers, and other formatting |
| | that might be mistakenly included by LLMs. |
| | """ |
| | if not content: |
| | return content |
| | |
| | |
| | if '```' in content: |
| | content = remove_code_block(content) |
| | |
| | |
| | lines = content.split('\n') |
| | clean_lines = [] |
| | |
| | for line in lines: |
| | stripped_line = line.strip() |
| | |
| | |
| | if not stripped_line: |
| | continue |
| | |
| | |
| | if (stripped_line == '```' or |
| | stripped_line.startswith('```') or |
| | |
| | (stripped_line.startswith('#') and len(stripped_line) > 1 and stripped_line[1] != ' ') or |
| | stripped_line.startswith('**') or |
| | stripped_line.startswith('===') or |
| | stripped_line.startswith('---') or |
| | |
| | stripped_line.lower().startswith('here') or |
| | stripped_line.lower().startswith('this') or |
| | stripped_line.lower().startswith('the ') or |
| | stripped_line.lower().startswith('based on') or |
| | stripped_line.lower().startswith('dependencies') or |
| | stripped_line.lower().startswith('requirements')): |
| | continue |
| | |
| | |
| | if (stripped_line.startswith('- ') or stripped_line.startswith('* ')): |
| | |
| | stripped_line = stripped_line[2:].strip() |
| | if not stripped_line: |
| | continue |
| | |
| | |
| | |
| | if (stripped_line.startswith('# ') or |
| | stripped_line.startswith('git+') or |
| | stripped_line[0].isalnum() or |
| | '==' in stripped_line or |
| | '>=' in stripped_line or |
| | '<=' in stripped_line or |
| | '~=' in stripped_line): |
| | clean_lines.append(stripped_line) |
| | |
| | result = '\n'.join(clean_lines) |
| | |
| | |
| | if result and not result.endswith('\n'): |
| | result += '\n' |
| | |
| | return result if result else "# No additional dependencies required\n" |
| |
|
| | def parse_multi_file_python_output(code: str) -> dict: |
| | """Parse multi-file Python output (Gradio/Streamlit) into separate files""" |
| | files = {} |
| | if not code: |
| | return files |
| | |
| | |
| | import re |
| | file_pattern = r'=== ([^=]+) ===' |
| | parts = re.split(file_pattern, code) |
| | |
| | if len(parts) > 1: |
| | |
| | for i in range(1, len(parts), 2): |
| | if i + 1 < len(parts): |
| | filename = parts[i].strip() |
| | content = parts[i + 1].strip() |
| | |
| | |
| | if filename == 'requirements.txt': |
| | content = clean_requirements_txt_content(content) |
| | |
| | files[filename] = content |
| | else: |
| | |
| | if "IMPORTED PROJECT FROM HUGGING FACE SPACE" in code: |
| | |
| | lines = code.split('\n') |
| | current_file = None |
| | current_content = [] |
| | |
| | for line in lines: |
| | if line.startswith('=== ') and line.endswith(' ==='): |
| | |
| | if current_file and current_content: |
| | content = '\n'.join(current_content) |
| | |
| | if current_file == 'requirements.txt': |
| | content = clean_requirements_txt_content(content) |
| | files[current_file] = content |
| | |
| | current_file = line[4:-4].strip() |
| | current_content = [] |
| | elif current_file: |
| | current_content.append(line) |
| | |
| | |
| | if current_file and current_content: |
| | content = '\n'.join(current_content) |
| | |
| | if current_file == 'requirements.txt': |
| | content = clean_requirements_txt_content(content) |
| | files[current_file] = content |
| | else: |
| | |
| | if is_streamlit_code(code): |
| | files['streamlit_app.py'] = code |
| | elif 'import gradio' in code.lower() or 'from gradio' in code.lower(): |
| | files['app.py'] = code |
| | else: |
| | files['app.py'] = code |
| | |
| | return files |
| |
|
| | def format_multi_file_python_output(files: dict) -> str: |
| | """Format multiple Python files into the standard multi-file format""" |
| | if not files: |
| | return "" |
| | |
| | if len(files) == 1: |
| | |
| | return list(files.values())[0] |
| | |
| | |
| | output = [] |
| | |
| | |
| | file_order = ['app.py', 'streamlit_app.py', 'main.py', 'utils.py', 'models.py', 'config.py', 'requirements.txt'] |
| | ordered_files = [] |
| | |
| | |
| | for preferred_file in file_order: |
| | if preferred_file in files: |
| | ordered_files.append(preferred_file) |
| | |
| | |
| | for filename in sorted(files.keys()): |
| | if filename not in ordered_files: |
| | ordered_files.append(filename) |
| | |
| | |
| | for filename in ordered_files: |
| | output.append(f"=== {filename} ===") |
| | |
| | |
| | content = files[filename] |
| | if filename == 'requirements.txt': |
| | content = clean_requirements_txt_content(content) |
| | |
| | output.append(content) |
| | output.append("") |
| | |
| | return '\n'.join(output) |
| |
|
| | def send_streamlit_to_stlite(code: str) -> str: |
| | """Render Streamlit code using stlite inside a sandboxed iframe for preview.""" |
| | |
| | html_doc = ( |
| | """<!doctype html> |
| | <html> |
| | <head> |
| | <meta charset=\"UTF-8\" /> |
| | <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /> |
| | <meta name=\"viewport\" content=\"width=device-width, initial-scale=1, shrink-to-fit=no\" /> |
| | <title>Streamlit Preview</title> |
| | <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/@stlite/browser@0.86.0/build/stlite.css\" /> |
| | <style>html,body{margin:0;padding:0;height:100%;} streamlit-app{display:block;height:100%;}</style> |
| | <script type=\"module\" src=\"https://cdn.jsdelivr.net/npm/@stlite/browser@0.86.0/build/stlite.js\"></script> |
| | </head> |
| | <body> |
| | <streamlit-app> |
| | """ |
| | + (code or "") |
| | + """ |
| | </streamlit-app> |
| | </body> |
| | </html> |
| | """ |
| | ) |
| | encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8') |
| | data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" |
| | iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>' |
| | return iframe |
| |
|
| | def is_gradio_code(code: str) -> bool: |
| | """Heuristic check to determine if Python code is a Gradio app.""" |
| | if not code: |
| | return False |
| | lowered = code.lower() |
| | return ( |
| | "import gradio" in lowered |
| | or "from gradio" in lowered |
| | or "gr.Interface(" in code |
| | or "gr.Blocks(" in code |
| | ) |
| |
|
| | def send_gradio_to_lite(code: str) -> str: |
| | """Render Gradio code using gradio-lite inside a sandboxed iframe for preview.""" |
| | html_doc = ( |
| | """<!doctype html> |
| | <html> |
| | <head> |
| | <meta charset=\"UTF-8\" /> |
| | <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\" /> |
| | <meta name=\"viewport\" content=\"width=device-width, initial-scale=1, shrink-to-fit=no\" /> |
| | <title>Gradio Preview</title> |
| | <script type=\"module\" crossorigin src=\"https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js\"></script> |
| | <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css\" /> |
| | <style>html,body{margin:0;padding:0;height:100%;} gradio-lite{display:block;height:100%;}</style> |
| | </head> |
| | <body> |
| | <gradio-lite> |
| | """ |
| | + (code or "") |
| | + """ |
| | </gradio-lite> |
| | </body> |
| | </html> |
| | """ |
| | ) |
| | encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8') |
| | data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" |
| | iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>' |
| | return iframe |
| |
|
| | stop_generation = False |
| |
|
| |
|