Spaces:

Wckd314
/

Pundit_Feynman

Sleeping

App Files Files Community

Wckd314 commited on Feb 24

Commit

fd1afd0

verified ·

1 Parent(s): 9dc8ead

Upload 7 files

Browse files

Files changed (7) hide show

static/index.html +142 -0
static/script.js +346 -0
static/style.css +609 -0
utils/__init__.py +1 -0
utils/llm_client.py +603 -0
utils/notebook_builder.py +85 -0
utils/pdf_processor.py +25 -0

static/index.html ADDED Viewed

	@@ -0,0 +1,142 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Pundit Feynman — Research Paper to Code</title>
+    <link rel="stylesheet" href="/style.css">
+    <link
+        href="https://fonts.googleapis.com/css2?family=Playfair+Display:wght@400;600;700&family=JetBrains+Mono:wght@400;500&display=swap"
+        rel="stylesheet">
+</head>
+<body>
+    <!-- Left Panel: Upload & Status -->
+    <aside class="left-panel" id="left-panel">
+        <div class="panel-inner">
+            <header>
+                <h1>Pundit Feynman</h1>
+                <p class="tagline">Upload a research paper.<br>Learn it the Feynman way.</p>
+                <button id="visualize-btn" class="header-visualize hidden" style="display: none !important;">🎨
+                    Visualize Concept</button>
+            </header>
+            <!-- Upload State -->
+            <div id="upload-section">
+                <div id="drop-zone" class="drop-zone">
+                    <svg class="upload-icon" width="32" height="32" viewBox="0 0 24 24" fill="none"
+                        stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
+                        <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
+                        <polyline points="17 8 12 3 7 8"></polyline>
+                        <line x1="12" y1="3" x2="12" y2="15"></line>
+                    </svg>
+                    <p class="drop-text">Drop your PDF here</p>
+                    <span class="drop-hint">or click to browse</span>
+                    <input type="file" id="file-input" accept="application/pdf" hidden>
+                </div>
+                <!-- Divider -->
+                <div class="divider">
+                    <span>or paste arXiv link</span>
+                </div>
+                <!-- arXiv URL Input -->
+                <div class="arxiv-input-row">
+                    <input type="text" id="arxiv-input" class="arxiv-input"
+                        placeholder="https://arxiv.org/abs/2401.12345">
+                    <button id="arxiv-btn" class="btn btn-primary arxiv-btn">Go →</button>
+                </div>
+            </div>
+            <!-- Extraction Progress -->
+            <div id="extract-status" class="status-box hidden">
+                <div class="spinner"></div>
+                <p class="status-label" id="extract-label">Analyzing paper…</p>
+                <p class="status-sub">This may take a few minutes for long papers.</p>
+            </div>
+            <!-- Stream Active Indicator -->
+            <div id="stream-status" class="status-box hidden">
+                <div class="pulse-dot"></div>
+                <p class="status-label">Generating code live…</p>
+                <p class="status-sub">Watch the output in the code viewer →</p>
+            </div>
+            <!-- Done -->
+            <div id="done-section" class="status-box hidden">
+                <p class="done-check">✓</p>
+                <p class="status-label">Generation complete</p>
+                <div class="btn-row">
+                    <a id="download-btn" class="btn btn-primary">⬇ Download .ipynb</a>
+                    <button id="reset-btn" class="btn btn-secondary">↻ New Paper</button>
+                </div>
+            </div>
+            <!-- Error -->
+            <div id="error-section" class="status-box hidden">
+                <p class="error-x">✕</p>
+                <p class="status-label">Something went wrong</p>
+                <p class="status-sub" id="error-text"></p>
+                <button id="error-reset-btn" class="btn btn-secondary">↻ Try Again</button>
+            </div>
+            <footer>
+                <p>Powered by <strong>NVIDIA NIM</strong></p>
+                <div class="feedback-footer">
+                    <p>please give feedback, so that i can make it better</p>
+                    <a href="https://mail.google.com/mail/?view=cm&to=Avijitshil52460@gmail.com&su=Pundit%20Feynman%20Feedback"
+                        target="_blank" class="feedback-link">Avijitshil52460@gmail.com</a>
+                </div>
+            </footer>
+        </div>
+    </aside>
+    <!-- Right Panel: Live Code Viewer -->
+    <main class="right-panel" id="right-panel">
+        <div class="code-header">
+            <span class="code-title">Code Output</span>
+            <span class="code-badge" id="code-badge">waiting</span>
+        </div>
+        <pre class="code-viewer"
+            id="code-viewer"><code id="code-output">// Upload a paper to see the generated code here…</code></pre>
+    </main>
+    <script src="/script.js"></script>
+    <!-- Floating Image Window (Hidden) -->
+    <div id="image-float" class="float-window hidden" style="display: none !important;">
+        <div class="float-header" id="float-header">
+            <span class="float-title">🎨 Concept Illustration</span>
+            <div class="float-actions">
+                <button id="float-download" class="float-btn" title="Download PNG">
+                    <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"
+                        stroke-linecap="round" stroke-linejoin="round">
+                        <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4" />
+                        <polyline points="7 10 12 15 17 10" />
+                        <line x1="12" y1="15" x2="12" y2="3" />
+                    </svg>
+                </button>
+                <button id="float-minimize" class="float-btn" title="Minimize">─</button>
+                <button id="float-close" class="float-btn" title="Close">✕</button>
+            </div>
+        </div>
+        <div class="float-body" id="float-body">
+            <div class="float-spinner" id="float-spinner">
+                <div class="paint-brush-container">
+                    <div class="brush">🖌️</div>
+                    <div class="shimmer-line"></div>
+                </div>
+                <p id="visualize-status">FLUX is painting your concept…</p>
+            </div>
+            <img id="float-image" class="float-image hidden" alt="Concept Illustration" />
+        </div>
+    </div>
+    <!-- Minimized Pill (Hidden) -->
+    <div id="image-pill" class="float-pill hidden" style="display: none !important;">
+        <span>🎨 Illustration</span>
+    </div>
+</body>
+</html>

static/script.js ADDED Viewed

	@@ -0,0 +1,346 @@

+// ── DOM Refs ──
+const dropZone = document.getElementById('drop-zone');
+const fileInput = document.getElementById('file-input');
+const uploadSection = document.getElementById('upload-section');
+const extractStatus = document.getElementById('extract-status');
+const extractLabel = document.getElementById('extract-label');
+const streamStatus = document.getElementById('stream-status');
+const doneSection = document.getElementById('done-section');
+const errorSection = document.getElementById('error-section');
+const errorText = document.getElementById('error-text');
+const downloadBtn = document.getElementById('download-btn');
+const resetBtn = document.getElementById('reset-btn');
+const errorResetBtn = document.getElementById('error-reset-btn');
+const codeOutput = document.getElementById('code-output');
+const codeViewer = document.getElementById('code-viewer');
+const codeBadge = document.getElementById('code-badge');
+const arxivInput = document.getElementById('arxiv-input');
+const arxivBtn = document.getElementById('arxiv-btn');
+const visualizeBtn = document.getElementById('visualize-btn');
+const imageFloat = document.getElementById('image-float');
+const imagePill = document.getElementById('image-pill');
+const floatHeader = document.getElementById('float-header');
+const floatImage = document.getElementById('float-image');
+const floatSpinner = document.getElementById('float-spinner');
+const floatDownload = document.getElementById('float-download');
+const floatMinimize = document.getElementById('float-minimize');
+console.log('🚀 Pundit Feynman Script Loaded [v2.1]');
+console.log('🎨 Visualize Button found:', !!visualizeBtn);
+window.onerror = function (msg, url, lineNo, columnNo, error) {
+    alert(`JS Error: ${msg}\nLine: ${lineNo}\nCheck browser console!`);
+    return false;
+};
+// Test backend connectivity
+fetch('/api/ping').then(r => r.json()).then(d => console.log('🏓 Backend connectivity:', d.status)).catch(e => console.error('❌ Backend UNREACHABLE:', e));
+// ── Visual Illustration State ──
+let currentJobId = null;
+window._debugJobId = () => currentJobId; // Access via console: window._debugJobId()
+// ── State Manager ──
+function showSection(section) {
+    [uploadSection, extractStatus, streamStatus, doneSection, errorSection]
+        .forEach(el => el.classList.add('hidden'));
+    if (section) section.classList.remove('hidden');
+}
+// ── Drag & Drop ──
+dropZone.addEventListener('click', () => fileInput.click());
+dropZone.addEventListener('dragover', (e) => {
+    e.preventDefault();
+    dropZone.classList.add('drag-over');
+});
+dropZone.addEventListener('dragleave', () => dropZone.classList.remove('drag-over'));
+dropZone.addEventListener('drop', (e) => {
+    e.preventDefault();
+    dropZone.classList.remove('drag-over');
+    if (e.dataTransfer.files.length > 0) handleUpload(e.dataTransfer.files[0]);
+});
+fileInput.addEventListener('change', (e) => {
+    if (e.target.files.length > 0) handleUpload(e.target.files[0]);
+});
+// ── arXiv URL Handler ──
+arxivBtn.addEventListener('click', () => handleArxiv());
+arxivInput.addEventListener('keydown', (e) => {
+    if (e.key === 'Enter') handleArxiv();
+});
+async function handleArxiv() {
+    const url = arxivInput.value.trim();
+    if (!url) return;
+    if (!url.includes('arxiv.org')) {
+        alert('Please enter a valid arXiv URL (e.g. https://arxiv.org/abs/2401.12345)');
+        return;
+    }
+    showSection(extractStatus);
+    extractLabel.textContent = 'Downloading & analyzing arXiv paper…';
+    codeOutput.textContent = '// Downloading PDF from arXiv…';
+    codeBadge.textContent = 'extracting';
+    codeBadge.className = 'code-badge';
+    try {
+        const res = await fetch('/api/extract-arxiv', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ url })
+        });
+        if (!res.ok) {
+            const err = await res.json().catch(() => ({ detail: 'arXiv extraction failed' }));
+            throw new Error(err.detail || 'arXiv extraction failed');
+        }
+        const data = await res.json();
+        console.log('arXiv extraction complete:', data);
+        startStream(data.job_id);
+    } catch (err) {
+        showError(err.message);
+    }
+}
+// ── Upload & Extract (Step 1) ──
+async function handleUpload(file) {
+    if (!file.name.toLowerCase().endsWith('.pdf')) {
+        alert('Please upload a PDF file.');
+        return;
+    }
+    // Show extraction spinner
+    showSection(extractStatus);
+    extractLabel.textContent = 'Uploading & analyzing paper…';
+    codeOutput.textContent = '// Waiting for paper analysis to complete…';
+    codeBadge.textContent = 'extracting';
+    codeBadge.className = 'code-badge';
+    const formData = new FormData();
+    formData.append('file', file);
+    try {
+        const res = await fetch('/api/extract', {
+            method: 'POST',
+            body: formData
+        });
+        if (!res.ok) {
+            const err = await res.json().catch(() => ({ detail: 'Extraction failed' }));
+            throw new Error(err.detail || 'Extraction failed');
+        }
+        const data = await res.json();
+        console.log('Extraction complete:', data);
+        // Hide visualize button from previous run if any
+        visualizeBtn.classList.add('hidden');
+        // Start streaming (Step 2)
+        startStream(data.job_id);
+    } catch (err) {
+        showError(err.message);
+    }
+}
+// ── Live Streaming (Step 2) ──
+function startStream(jobId) {
+    currentJobId = jobId; // Store immediately
+    showSection(streamStatus);
+    codeOutput.textContent = '';
+    codeBadge.textContent = 'streaming';
+    codeBadge.className = 'code-badge streaming';
+    const source = new EventSource(`/api/generate_stream/${jobId}`);
+    let hasError = false;
+    source.onmessage = (event) => {
+        try {
+            const payload = JSON.parse(event.data);
+            if (payload.done) {
+                source.close();
+                if (payload.success) {
+                    onStreamComplete(jobId);
+                } else {
+                    // Pipeline finished but failed — show error state
+                    showError('Pipeline failed to generate notebook. Check the code output panel for details.');
+                    codeBadge.textContent = 'failed';
+                    codeBadge.className = 'code-badge';
+                }
+                return;
+            }
+            if (payload.analysis_done) {
+                // Show visualize button early!
+                visualizeBtn.classList.remove('hidden');
+                return;
+            }
+            if (payload.text) {
+                // Check if it's an error message
+                if (payload.text.includes('❌')) {
+                    hasError = true;
+                }
+                codeOutput.textContent += payload.text;
+                // Auto-scroll to bottom
+                codeViewer.scrollTop = codeViewer.scrollHeight;
+            }
+        } catch (e) {
+            console.error('Parse error:', e);
+        }
+    };
+    source.onerror = (err) => {
+        console.error('SSE error:', err);
+        source.close();
+        showError('Stream connection lost. Please try again.');
+    };
+}
+function onStreamComplete(jobId) {
+    showSection(doneSection);
+    downloadBtn.href = `/api/download/${jobId}`;
+    currentJobId = jobId; // Store for visualization
+    codeBadge.textContent = 'complete';
+    codeBadge.className = 'code-badge done';
+}
+// ── Visual Illustration Logic ──
+visualizeBtn.addEventListener('click', async (e) => {
+    console.log('🖱️ Visualize button CLICKED. Event object:', e);
+    if (!currentJobId) {
+        console.error('❌ Cannot visualize: currentJobId is null');
+        alert('Software Error: Job ID not captured yet. Please wait for analysis or refresh.');
+        return;
+    }
+    console.log('🎨 Requesting visualization for Job:', currentJobId);
+    // Disable button to prevent double-clicks
+    visualizeBtn.disabled = true;
+    const originalText = visualizeBtn.textContent;
+    visualizeBtn.textContent = '🎨 Painting...';
+    // Show float UI
+    imageFloat.classList.remove('hidden');
+    imagePill.classList.add('hidden');
+    floatImage.classList.add('hidden');
+    floatSpinner.classList.remove('hidden');
+    try {
+        const url = `/api/visualize/${currentJobId}`;
+        console.log('🌐 Fetching:', url);
+        const res = await fetch(url, { method: 'POST' });
+        console.log('📥 Response status:', res.status);
+        if (!res.ok) {
+            const errDetail = await res.json().catch(() => ({ detail: 'Network error' }));
+            throw new Error(errDetail.detail || `Server error ${res.status}`);
+        }
+        const data = await res.json();
+        console.log('🖼️ Image received! Length:', data.image.length);
+        floatImage.src = data.image;
+        floatImage.classList.remove('hidden');
+        floatSpinner.classList.add('hidden');
+    } catch (err) {
+        console.error('❌ Visualization flow error:', err);
+        alert(`Painting failed: ${err.message}`);
+        imageFloat.classList.add('hidden');
+    } finally {
+        visualizeBtn.disabled = false;
+        visualizeBtn.textContent = originalText;
+        console.log('🏁 Visualize flow completed.');
+    }
+});
+// Drag Logic
+let isDragging = false;
+let startX, startY, initialX, initialY;
+floatHeader.addEventListener('mousedown', (e) => {
+    isDragging = true;
+    startX = e.clientX;
+    startY = e.clientY;
+    initialX = imageFloat.offsetLeft;
+    initialY = imageFloat.offsetTop;
+    imageFloat.style.transition = 'none';
+});
+document.addEventListener('mousemove', (e) => {
+    if (!isDragging) return;
+    const dx = e.clientX - startX;
+    const dy = e.clientY - startY;
+    imageFloat.style.left = (initialX + dx) + 'px';
+    imageFloat.style.top = (initialY + dy) + 'px';
+    imageFloat.style.bottom = 'auto'; // Remove fixed positioning
+    imageFloat.style.right = 'auto';
+});
+document.addEventListener('mouseup', () => {
+    isDragging = false;
+    imageFloat.style.transition = '';
+});
+// Minimize/Close/Download
+floatMinimize.addEventListener('click', () => {
+    imageFloat.classList.add('hidden');
+    imagePill.classList.remove('hidden');
+});
+imagePill.addEventListener('click', () => {
+    imageFloat.classList.remove('hidden');
+    imagePill.classList.add('hidden');
+});
+floatClose.addEventListener('click', () => {
+    imageFloat.classList.add('hidden');
+    imagePill.classList.add('hidden');
+});
+floatDownload.addEventListener('click', () => {
+    if (!floatImage.src) return;
+    const link = document.createElement('a');
+    link.href = floatImage.src;
+    link.download = `pundit_feynman_illustration_${currentJobId}.png`;
+    link.click();
+});
+// ── Error & Reset ──
+function showError(msg) {
+    showSection(errorSection);
+    errorText.textContent = msg;
+    codeBadge.textContent = 'error';
+    codeBadge.className = 'code-badge';
+    // Cleanup float on error
+    imageFloat.classList.add('hidden');
+    imagePill.classList.add('hidden');
+}
+function resetUI() {
+    showSection(uploadSection);
+    fileInput.value = '';
+    arxivInput.value = '';
+    codeOutput.textContent = '// Upload a paper to see the generated code here…';
+    codeBadge.textContent = 'waiting';
+    codeBadge.className = 'code-badge';
+    currentJobId = null;
+    visualizeBtn.classList.add('hidden');
+    // Cleanup float on reset
+    imageFloat.classList.add('hidden');
+    imagePill.classList.add('hidden');
+}
+resetBtn.addEventListener('click', resetUI);
+errorResetBtn.addEventListener('click', resetUI);

static/style.css ADDED Viewed

	@@ -0,0 +1,609 @@

+/* ── Reset & Base ── */
+*,
+*::before,
+*::after {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+:root {
+    --bg: #f5f0e8;
+    --bg-deep: #ebe4d6;
+    --text: #2c2417;
+    --text-muted: #7a6e5d;
+    --accent: #8b6914;
+    --accent-soft: #c9a84c;
+    --border: #d4cbb8;
+    --code-bg: #1e1e2e;
+    --code-text: #cdd6f4;
+    --code-accent: #89b4fa;
+    --panel-shadow: 0 0 40px rgba(0, 0, 0, 0.06);
+}
+html,
+body {
+    height: 100%;
+    overflow: hidden;
+}
+body {
+    font-family: 'Times New Roman', 'Playfair Display', Georgia, serif;
+    background: var(--bg);
+    color: var(--text);
+    display: flex;
+}
+/* ── Left Panel ── */
+.left-panel {
+    width: 380px;
+    min-width: 380px;
+    height: 100vh;
+    background: var(--bg);
+    border-right: 1px solid var(--border);
+    display: flex;
+    flex-direction: column;
+    overflow-y: auto;
+}
+.panel-inner {
+    padding: 40px 32px 24px;
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+}
+/* ── Header ── */
+header {
+    margin-bottom: 32px;
+}
+header h1 {
+    font-family: 'Playfair Display', Georgia, serif;
+    font-size: 2rem;
+    font-weight: 700;
+    color: var(--accent);
+    margin-bottom: 8px;
+    letter-spacing: -0.02em;
+}
+.header-visualize {
+    display: inline-block;
+    margin-top: 16px;
+    background: transparent;
+    border: 1px solid #6b4226;
+    color: #6b4226;
+    padding: 8px 16px;
+    border-radius: 20px;
+    font-family: 'Times New Roman', serif;
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: all 0.2s;
+}
+.header-visualize:hover {
+    background: #6b4226;
+    color: #fff;
+}
+.tagline {
+    font-size: 0.95rem;
+    color: var(--text-muted);
+    line-height: 1.5;
+}
+/* ── Drop Zone ── */
+.drop-zone {
+    border: 2px dashed var(--border);
+    border-radius: 12px;
+    padding: 36px 24px;
+    text-align: center;
+    cursor: pointer;
+    transition: all 0.25s ease;
+    background: var(--bg-deep);
+}
+.drop-zone:hover,
+.drop-zone.drag-over {
+    border-color: var(--accent);
+    background: rgba(139, 105, 20, 0.06);
+}
+.upload-icon {
+    color: var(--accent-soft);
+    margin-bottom: 12px;
+    opacity: 0.8;
+}
+.drop-text {
+    font-size: 1rem;
+    font-weight: 600;
+    margin-bottom: 4px;
+    color: var(--text);
+}
+.drop-hint {
+    font-size: 0.85rem;
+    color: var(--text-muted);
+}
+/* ── Divider & arXiv Input ── */
+.divider {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    margin: 16px 0;
+}
+.divider::before,
+.divider::after {
+    content: '';
+    flex: 1;
+    height: 1px;
+    background: var(--border);
+}
+.divider span {
+    font-size: 0.78rem;
+    color: var(--text-muted);
+    white-space: nowrap;
+    letter-spacing: 0.02em;
+}
+.arxiv-input-row {
+    display: flex;
+    gap: 8px;
+}
+.arxiv-input {
+    flex: 1;
+    padding: 14px 16px;
+    border: 1.5px solid var(--border);
+    border-radius: 8px;
+    background: var(--bg);
+    font-family: 'Times New Roman', Georgia, serif;
+    font-size: 0.9rem;
+    color: var(--text);
+    outline: none;
+    transition: border-color 0.2s ease;
+}
+.arxiv-input:focus {
+    border-color: var(--accent);
+    background: #fff;
+}
+.arxiv-input::placeholder {
+    color: var(--text-muted);
+    opacity: 0.6;
+}
+.arxiv-btn {
+    padding: 14px 20px;
+    font-size: 0.85rem;
+    white-space: nowrap;
+    font-family: 'Times New Roman', Georgia, serif;
+}
+/* ── Status Boxes ── */
+.status-box {
+    text-align: center;
+    padding: 32px 0;
+}
+.spinner {
+    width: 28px;
+    height: 28px;
+    border: 2.5px solid var(--border);
+    border-top-color: var(--accent);
+    border-radius: 50%;
+    margin: 0 auto 16px;
+    animation: spin 0.7s linear infinite;
+}
+@keyframes spin {
+    to {
+        transform: rotate(360deg);
+    }
+}
+.pulse-dot {
+    width: 12px;
+    height: 12px;
+    background: #22c55e;
+    border-radius: 50%;
+    margin: 0 auto 16px;
+    animation: pulse 1.5s ease-in-out infinite;
+}
+@keyframes pulse {
+    0%,
+    100% {
+        opacity: 1;
+        transform: scale(1);
+    }
+    50% {
+        opacity: 0.5;
+        transform: scale(1.3);
+    }
+}
+.status-label {
+    font-family: 'Times New Roman', Georgia, serif;
+    font-size: 0.9rem;
+    font-weight: 600;
+    color: var(--text);
+    margin-bottom: 6px;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+}
+.status-sub {
+    font-family: 'Times New Roman', Georgia, serif;
+    font-size: 0.82rem;
+    color: var(--text-muted);
+    line-height: 1.4;
+}
+.done-check {
+    font-size: 2rem;
+    color: #22c55e;
+    margin-bottom: 8px;
+}
+.error-x {
+    font-size: 2rem;
+    color: #ef4444;
+    margin-bottom: 8px;
+}
+/* ── Buttons ── */
+.btn-row {
+    display: flex;
+    gap: 10px;
+    justify-content: center;
+    margin-top: 16px;
+}
+.btn {
+    display: inline-flex;
+    align-items: center;
+    gap: 6px;
+    padding: 10px 20px;
+    border-radius: 8px;
+    font-weight: 600;
+    font-size: 0.82rem;
+    cursor: pointer;
+    border: none;
+    text-decoration: none;
+    transition: all 0.2s ease;
+    font-family: 'Times New Roman', Georgia, serif;
+}
+.btn-primary {
+    background: var(--accent);
+    color: #fff;
+}
+.btn-primary:hover {
+    background: #6f5410;
+    transform: translateY(-1px);
+}
+.btn-secondary {
+    background: transparent;
+    color: var(--text);
+    border: 1px solid var(--border);
+}
+.btn-secondary:hover {
+    background: var(--bg-deep);
+}
+/* ── Footer ── */
+footer {
+    margin-top: auto;
+    padding-top: 24px;
+    text-align: center;
+}
+footer p {
+    font-size: 0.72rem;
+    color: var(--text-muted);
+}
+footer strong {
+    color: var(--accent);
+    font-weight: 600;
+}
+/* ── Right Panel: Code Viewer ── */
+.right-panel {
+    flex: 1;
+    height: 100vh;
+    background: var(--code-bg);
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+}
+.code-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 14px 24px;
+    background: #181825;
+    border-bottom: 1px solid #313244;
+}
+.code-title {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 0.78rem;
+    color: #6c7086;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+}
+.code-badge {
+    font-family: 'JetBrains Mono', monospace;
+    font-size: 0.68rem;
+    padding: 3px 10px;
+    border-radius: 20px;
+    background: #313244;
+    color: #6c7086;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+}
+.code-badge.streaming {
+    background: rgba(34, 197, 94, 0.15);
+    color: #22c55e;
+    animation: pulse 1.5s ease-in-out infinite;
+}
+.code-badge.done {
+    background: rgba(34, 197, 94, 0.15);
+    color: #22c55e;
+}
+.code-viewer {
+    flex: 1;
+    overflow-y: auto;
+    padding: 24px;
+    margin: 0;
+    font-family: 'Times New Roman', Georgia, serif;
+    font-size: 0.95rem;
+    line-height: 1.8;
+    color: var(--code-text);
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    scrollbar-width: thin;
+    scrollbar-color: #313244 transparent;
+}
+.code-viewer::-webkit-scrollbar {
+    width: 6px;
+}
+.code-viewer::-webkit-scrollbar-thumb {
+    background: #313244;
+    border-radius: 3px;
+}
+.code-viewer code {
+    font-family: inherit;
+    color: inherit;
+}
+/* Feedback Footer */
+.feedback-footer {
+    margin-top: 16px;
+    padding-top: 16px;
+    border-top: 1px solid rgba(0, 0, 0, 0.08);
+    font-size: 0.95rem;
+    color: #6b4226;
+    line-height: 1.5;
+}
+.feedback-link {
+    display: inline-block;
+    margin-top: 6px;
+    color: #5a3318;
+    text-decoration: none;
+    font-size: 1.05rem;
+    font-weight: 700;
+    transition: opacity 0.2s;
+}
+.feedback-link:hover {
+    text-decoration: underline;
+    opacity: 0.8;
+}
+/* ── Floating Window ── */
+.float-window {
+    position: fixed;
+    bottom: 24px;
+    right: 24px;
+    width: 400px;
+    background: #fff;
+    border-radius: 12px;
+    box-shadow: 0 10px 40px rgba(0, 0, 0, 0.15);
+    z-index: 1000;
+    overflow: hidden;
+    border: 1px solid rgba(0, 0, 0, 0.1);
+    display: flex;
+    flex-direction: column;
+    transition: transform 0.3s cubic-bezier(0.4, 0, 0.2, 1), opacity 0.3s;
+}
+.float-header {
+    background: #fdfaf6;
+    /* Beige header */
+    padding: 12px 16px;
+    border-bottom: 1px solid rgba(0, 0, 0, 0.05);
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    cursor: move;
+    /* Indicate draggable */
+    user-select: none;
+}
+.float-title {
+    font-family: 'Playfair Display', serif;
+    font-weight: 700;
+    font-size: 0.9rem;
+    color: #6b4226;
+}
+.float-actions {
+    display: flex;
+    gap: 8px;
+}
+.float-btn {
+    background: transparent;
+    border: none;
+    color: #8b8b8b;
+    font-size: 1rem;
+    cursor: pointer;
+    width: 28px;
+    height: 28px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: 6px;
+    transition: all 0.2s;
+}
+.float-btn:hover {
+    background: rgba(0, 0, 0, 0.05);
+    color: #6b4226;
+}
+.float-body {
+    position: relative;
+    min-height: 200px;
+    max-height: 400px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: #fafafa;
+}
+.float-image {
+    width: 100%;
+    height: auto;
+    display: block;
+    max-height: 400px;
+    object-fit: contain;
+}
+.float-spinner {
+    padding: 40px;
+    text-align: center;
+    color: #8b8b8b;
+    font-size: 0.85rem;
+}
+/* ── Paint Brush Loading ── */
+.paint-brush-container {
+    position: relative;
+    width: 60px;
+    height: 60px;
+    margin: 0 auto 16px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+}
+.brush {
+    font-size: 32px;
+    z-index: 2;
+    animation: sweep 1.5s infinite ease-in-out;
+    transform-origin: bottom center;
+}
+@keyframes sweep {
+    0%,
+    100% {
+        transform: rotate(-15deg) translateX(-10px);
+    }
+    50% {
+        transform: rotate(15deg) translateX(10px);
+    }
+}
+.shimmer-line {
+    position: absolute;
+    bottom: 10px;
+    width: 40px;
+    height: 4px;
+    background: var(--accent-soft);
+    border-radius: 2px;
+    opacity: 0.3;
+    animation: paint-shimmer 1.5s infinite ease-in-out;
+}
+@keyframes paint-shimmer {
+    0%,
+    100% {
+        width: 0;
+        left: 10px;
+        opacity: 0;
+    }
+    50% {
+        width: 40px;
+        left: 10px;
+        opacity: 0.6;
+    }
+}
+.float-spinner p {
+    font-family: 'Times New Roman', serif;
+    font-style: italic;
+    color: var(--text-muted);
+}
+.header-visualize:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}
+/* Minimized Pill */
+.float-pill {
+    position: fixed;
+    bottom: 24px;
+    right: 24px;
+    background: #6b4226;
+    color: #fff;
+    padding: 10px 20px;
+    border-radius: 30px;
+    font-family: 'Playfair Display', serif;
+    font-size: 0.9rem;
+    font-weight: 600;
+    cursor: pointer;
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
+    z-index: 1001;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    transition: transform 0.2s;
+}
+.float-pill:hover {
+    transform: translateY(-2px);
+}
+/* ── Utility ── */
+.hidden {
+    display: none !important;
+}

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Pundit Feynman Utils Package

utils/llm_client.py ADDED Viewed

	@@ -0,0 +1,603 @@

+"""
+Pundit Feynman LLM Client — 3-Stage Pipeline
+Stage 1: Analyze   (images → structured JSON analysis)
+Stage 2: Design    (analysis → implementation plan JSON)
+Stage 3: Generate  (analysis + design → notebook cells JSON)
+"""
+import os
+import json
+import time
+import re
+import requests
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+# ── Configuration ──────────────────────────────────────────────────────────
+API_KEY = os.getenv("NVIDIA_API_KEY", "")
+BASE_URL = os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1")
+MODEL = os.getenv("LLM_MODEL", "qwen/qwen3.5-397b-a17b")
+MAX_IMAGES_PER_REQUEST = int(os.getenv("MAX_IMAGES_PER_REQUEST", "8"))
+# OCR Configuration
+OCR_API_KEY = os.getenv("NVIDIA_OCR_API_KEY", "")
+OCR_API_URL = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
+# FLUX.1-schnell Image Generation
+FLUX_API_KEY = os.getenv("NVIDIA_FLUX_API_KEY", "")
+FLUX_API_URL = "https://ai.api.nvidia.com/v1/genai/black-forest-labs/flux.1-schnell"
+MAX_RETRIES = 3
+RETRY_DELAYS = [5, 15, 30]
+client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
+# ── Prompts ────────────────────────────────────────────────────────────────
+SYSTEM_PROMPT = (
+    "You are an expert research engineer and educator who converts academic papers into "
+    "clear, educational, executable Python code. You produce structured JSON output for "
+    "each stage of the pipeline. When building toy implementations, you create REAL working code "
+    "(PyTorch, Transformer layers, actual training loops) at reduced scale that "
+    "runs on CPU. You prioritize faithful replication of the paper's architecture "
+    "and algorithms while making the code deeply educational with clear explanations, "
+    "using the Feynman technique to break down complex math into simple analogies, "
+    "verbose logging, and insightful visualizations."
+)
+ANALYSIS_PROMPT = """Analyze this research paper text and return a JSON object with:
+{
+  "title": "exact paper title",
+  "authors": ["author names"],
+  "research_field": "e.g. NLP, Computer Vision, RL",
+  "abstract_summary": "2-3 sentence plain English summary of the paper",
+  "feynman_analogy": "A brilliant, everyday analogy that maps perfectly to the paper's core key_insight (e.g., comparing attention mechanisms to a cocktail party)",
+  "feynman_core_concept": "Explain the paper's main idea as if teaching a bright 12-year-old, using the analogy above, in 3-5 sentences",
+  "key_insight": "the core novel contribution in one sentence",
+  "algorithms": [
+    {
+      "name": "algorithm name",
+      "purpose": "what it does",
+      "key_equations": ["important formulas in LaTeX notation"],
+      "pseudocode_steps": ["step1", "step2"]
+    }
+  ],
+  "architecture": {
+    "type": "e.g. Transformer, CNN, GAN",
+    "components": ["list of main components"],
+    "data_flow": "description of how data flows through the model"
+  },
+  "datasets_mentioned": ["dataset names"],
+  "implementation_requirements": {
+    "frameworks": ["PyTorch"],
+    "key_hyperparameters": {"param": "value"},
+    "estimated_complexity": "low/medium/high for toy version"
+  }
+}
+Return ONLY valid JSON, no markdown, no extra text."""
+DESIGN_PROMPT = """Based on this paper analysis, create a toy implementation design that runs on CPU.
+Return a JSON object with:
+{
+  "model_architecture": {
+    "type": "architecture type",
+    "embed_dim": 64,
+    "num_layers": 2,
+    "num_heads": 4,
+    "vocab_size": 1000,
+    "max_seq_len": 64,
+    "components": [
+      {
+        "name": "component name",
+        "class_name": "PythonClassName",
+        "description": "what this component does",
+        "key_params": {"param": "value"}
+      }
+    ]
+  },
+  "training_config": {
+    "optimizer": "Adam",
+    "learning_rate": 0.001,
+    "num_epochs": 5,
+    "batch_size": 16,
+    "loss_function": "CrossEntropyLoss",
+    "dataset_strategy": "synthetic generation approach"
+  },
+  "visualization_plan": [
+    "loss curve",
+    "attention heatmap",
+    "sample predictions"
+  ],
+  "estimated_cells": 15,
+  "code_structure": [
+    {"section": "imports", "description": "required libraries"},
+    {"section": "model", "description": "model architecture classes"},
+    {"section": "data", "description": "synthetic data generation"},
+    {"section": "training", "description": "training loop"},
+    {"section": "evaluation", "description": "testing and visualization"}
+  ]
+}
+Return ONLY valid JSON, no markdown, no extra text."""
+GENERATE_PROMPT_TEMPLATE = """You are generating a Jupyter notebook from a paper analysis and implementation design.
+Analysis: {analysis}
+Design: {design}
+Note: You are a 397B parameter model (Qwen 3.5) with 17B actively used parameters (MoE architecture).
+This means you have deep expertise and vast knowledge. Use it to produce genuinely educational content.
+Return a JSON array of notebook cells following this **exact 13-section structure**:
+1. **Title & Overview** (markdown) — Paper title, authors, a one-paragraph summary of the paper.
+2. **Table of Contents** (markdown) — Numbered list of all 13 sections. Each section name should be a clickable anchor link.
+3. **The Feynman Explanation** (markdown) — A step-by-step explanation of the WHOLE paper using the Feynman technique. Break down the core algorithms, math, and architecture into the absolute simplest terms possible. Expand heavily on the `feynman_analogy` and `feynman_core_concept` from the analysis. Use relatable, everyday analogies for each major step so a beginner can intuitively grasp how the system works before seeing the code.
+4. **Environment Setup** (code) — pip installs and imports. Include `torch`, `numpy`, `matplotlib`, and any other needed libraries.
+5. **Configuration & Hyperparameters** (code) — A single config dict or dataclass with all hyperparameters. Add comments explaining each.
+6. **Data Preparation** (code) — Synthetic dataset generation or loading. Must produce realistic dummy data matching the paper's domain.
+7. **Model Architecture** (code) — Full PyTorch model implementation. Use `nn.Module` subclasses with detailed docstrings about each component. Include shape comments.
+8. **Training Loop** (code) — Complete training loop with loss tracking, progress printing, and gradient clipping.
+9. **Training Execution** (code) — Run the training and display results.
+10. **Evaluation & Metrics** (code) — Run inference on test data and compute relevant metrics.
+11. **Visualizations** (code) — Matplotlib charts: loss curves, attention heatmaps or feature maps, sample predictions.
+12. **Key Takeaways** (markdown) — Bullet-point summary of what was learned, what would change at full scale, potential improvements.
+13. **References** (markdown) — Paper citation, related work links, library documentation links.
+Each cell in the JSON array must have:
+{{"cell_type": "code" or "markdown", "source": "cell content as a string"}}
+RULES:
+- All code must be executable on CPU
+- Use educational variable names and heavy commenting
+- Include print() statements showing tensor shapes and intermediate results
+- Follow the 13-section structure exactly
+- Minimum 15 cells total
+- The Feynman Explanation should be at least 300 words
+- Return ONLY the JSON array, no markdown fences"""
+# ── OCR extraction (NVIDIA NeMo Retriever OCR v1) ─────────────────────────
+def extract_text_from_images(base64_images):
+    """Extract text from paper page images using NVIDIA NeMo Retriever OCR API.
+    Sends page images to the dedicated OCR model for fast, accurate extraction.
+    Falls back to page-by-page if a batch request fails.
+    """
+    all_text = []
+    headers = {
+        "Authorization": f"Bearer {OCR_API_KEY}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+    }
+    total = len(base64_images)
+    print(f"  OCR: Processing {total} pages via NVIDIA NeMo Retriever...")
+    for page_idx, img_b64 in enumerate(base64_images):
+        print(f"    Page {page_idx + 1}/{total}...")
+        payload = {
+            "input": [
+                {
+                    "type": "image_url",
+                    "url": f"data:image/jpeg;base64,{img_b64}"
+                }
+            ],
+            "merge_levels": ["paragraph"]
+        }
+        try:
+            resp = requests.post(
+                OCR_API_URL,
+                headers=headers,
+                json=payload,
+                timeout=60,
+            )
+            resp.raise_for_status()
+            result = resp.json()
+            # Extract text from OCR response
+            page_text = _parse_ocr_response(result, page_idx + 1)
+            if page_text:
+                all_text.append(page_text)
+        except Exception as e:
+            print(f"    \u26a0 OCR failed for page {page_idx + 1}: {e}")
+            # Continue with remaining pages
+            continue
+    if not all_text:
+        raise RuntimeError("OCR failed: No text extracted from any page")
+    combined = "\n\n".join(all_text)
+    print(f"  OCR complete: {len(combined)} chars from {len(all_text)}/{total} pages")
+    return combined
+def _parse_ocr_response(response_json, page_num):
+    """Parse the NVIDIA OCR API response into clean text.
+    Response format: {"data": [{"text_detections": [{"text_prediction": {"text": ..., "confidence": ...}}]}]}
+    """
+    texts = []
+    try:
+        for item in response_json.get("data", []):
+            for detection in item.get("text_detections", []):
+                pred = detection.get("text_prediction", {})
+                text = pred.get("text", "").strip()
+                confidence = pred.get("confidence", 0)
+                # Only include text with reasonable confidence
+                if text and confidence > 0.3:
+                    texts.append(text)
+    except Exception as e:
+        print(f"    \u26a0 Error parsing OCR response for page {page_num}: {e}")
+        return ""
+    return "\n".join(texts)
+# ── LLM Call with Retry ───────────────────────────────────────────────────
+def call_with_retry(messages, max_tokens=4096, temperature=0.3, stream=False):
+    """Call the LLM API with retry logic for transient errors."""
+    last_error = None
+    for attempt in range(MAX_RETRIES):
+        try:
+            kwargs = dict(
+                model=MODEL,
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                timeout=300,
+            )
+            if stream:
+                kwargs["stream"] = True
+                return client.chat.completions.create(**kwargs)
+            else:
+                response = client.chat.completions.create(**kwargs)
+                return response.choices[0].message.content
+        except Exception as e:
+            error_str = str(e).lower()
+            if any(kw in error_str for kw in ["429", "rate", "500", "503", "overloaded", "unavailable"]):
+                last_error = e
+                wait = RETRY_DELAYS[min(attempt, len(RETRY_DELAYS) - 1)]
+                print(f"  ⚠ Transient error. Waiting {wait}s before retry {attempt + 1}/{MAX_RETRIES}...")
+                time.sleep(wait)
+            else:
+                raise
+    raise RuntimeError(f"Failed after {MAX_RETRIES} retries. Last error: {last_error}")
+# ── JSON Parsing ──────────────────────────────────────────────────────────
+def parse_llm_json(raw_text, step_name):
+    """Parse JSON from LLM response, with cleanup and one repair attempt."""
+    if raw_text is None:
+        print(f"  ⚠ LLM returned None for {step_name}")
+        return {}
+    text = raw_text.strip()
+    # Strip markdown code fences if present
+    if text.startswith("```"):
+        first_newline = text.index("\n")
+        text = text[first_newline + 1:]
+    if text.endswith("```"):
+        text = text[:-3]
+    text = text.strip()
+    # Try direct parse
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError as e:
+        print(f"  ⚠ JSON parse failed in {step_name}. Attempting repair...")
+    # Attempt auto-repair via LLM
+    repair_prompt = (
+        f"The following text was supposed to be valid JSON but has a syntax error:\n\n"
+        f"{text[:6000]}\n\n"
+        f"Error: {e}\n\n"
+        f"Return ONLY the corrected valid JSON, nothing else."
+    )
+    repaired = call_with_retry(
+        messages=[
+            {"role": "system", "content": "You are a JSON repair tool. Return only valid JSON."},
+            {"role": "user", "content": repair_prompt},
+        ],
+        max_tokens=max(len(text) // 2, 4096),
+        temperature=0.1,
+    )
+    if repaired is None:
+        raise ValueError(f"Could not repair JSON from {step_name} — LLM returned None")
+    repaired = repaired.strip()
+    if repaired.startswith("```"):
+        repaired = repaired.split("\n", 1)[1]
+    if repaired.endswith("```"):
+        repaired = repaired[:-3]
+    try:
+        return json.loads(repaired.strip())
+    except json.JSONDecodeError:
+        # Last resort: try to extract JSON from the text
+        json_match = re.search(r'[\[{].*[\]}]', repaired.strip(), re.DOTALL)
+        if json_match:
+            return json.loads(json_match.group())
+        raise ValueError(f"Could not parse JSON from {step_name} even after repair.")
+# ── Pipeline Stages ───────────────────────────────────────────────────────
+def analyze_paper(raw_text):
+    """Stage 1: Analyze extracted text into structured JSON."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"{ANALYSIS_PROMPT}\n\n--- EXTRACTED PAPER TEXT ---\n\n{raw_text}"},
+    ]
+    raw = call_with_retry(messages, max_tokens=6144, temperature=0.2)
+    return parse_llm_json(raw, "paper_analysis")
+def design_implementation(analysis):
+    """Stage 2: Create implementation design from analysis."""
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"{DESIGN_PROMPT}\n\n--- PAPER ANALYSIS ---\n\n{json.dumps(analysis, indent=2)}"},
+    ]
+    raw = call_with_retry(messages, max_tokens=6144, temperature=0.2)
+    return parse_llm_json(raw, "implementation_design")
+def generate_notebook_cells_stream(analysis, design):
+    """
+    Stage 3: Generate notebook cells from analysis and design.
+    Yields tokens from the LLM for live streaming in the UI.
+    Finally yields the parsed cells list.
+    """
+    prompt = GENERATE_PROMPT_TEMPLATE.format(
+        analysis=json.dumps(analysis, indent=2),
+        design=json.dumps(design, indent=2),
+    )
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": prompt},
+    ]
+    # Use streaming mode
+    stream = call_with_retry(messages, max_tokens=65536, temperature=0.3, stream=True)
+    full_response = []
+    for chunk in stream:
+        if chunk.choices and chunk.choices[0].delta.content:
+            token = chunk.choices[0].delta.content
+            full_response.append(token)
+            yield ("token", token)
+    raw_text = "".join(full_response)
+    result = parse_llm_json(raw_text, "notebook_cells")
+    # Final logic to ensure we return a list of cells
+    cells = []
+    if isinstance(result, dict):
+        cells = result.get("cells", [{"cell_type": "markdown", "source": json.dumps(result, indent=2)}])
+    elif isinstance(result, list):
+        cells = result
+    else:
+        cells = [{"cell_type": "markdown", "source": raw_text}]
+    yield ("cells_final", cells)
+# ── Streaming Pipeline ─────────────────────────────────────────────────────
+def run_full_pipeline_stream(raw_text):
+    """
+    Orchestrates the full 3-stage pipeline.
+    Yields SSE-formatted text events for the frontend code viewer.
+    Returns final cells via the 'cells' key in the last event.
+    Yields tuples of (event_type, data):
+        ("text",  str)       — display text for the code viewer
+        ("cells", list)      — final cells (only yielded once at end)
+        ("analysis", dict)   — analysis metadata
+        ("error", str)       — error message
+    """
+    try:
+        # ── Stage 1: Analyze ──
+        yield ("text", "\n  Analyzing Paper\n")
+        yield ("text", "  " + "─" * 40 + "\n\n")
+        analysis = analyze_paper(raw_text)
+        if not analysis:
+            yield ("text", "  Analysis returned empty. The LLM may have failed.\n\n")
+            yield ("error", "Analysis returned empty result")
+            return
+        title = analysis.get("title", "Unknown Paper")
+        field = analysis.get("research_field", "")
+        insight = analysis.get("key_insight", "")
+        algos = [a.get("name", "") for a in analysis.get("algorithms", [])]
+        feynman_analogy = analysis.get("feynman_analogy", "")
+        feynman_concept = analysis.get("feynman_core_concept", "")
+        # Clean, minimal analysis output
+        yield ("text", f"  {title}\n")
+        yield ("text", f"  {field}\n\n")
+        # The Feynman Explanation — the star of the show
+        if feynman_analogy or feynman_concept:
+            yield ("text", "  ─── The Feynman Explanation ───\n\n")
+            if feynman_analogy:
+                yield ("text", f"  {feynman_analogy}\n\n")
+            if feynman_concept:
+                yield ("text", f"  {feynman_concept}\n\n")
+        if insight:
+            yield ("text", f"  Key Insight: {insight}\n\n")
+        yield ("text", "  Analysis complete.\n\n")
+        yield ("analysis", {
+            "title": title,
+            "field": field,
+            "insight": insight,
+            "algorithms": algos,
+            "feynman_analogy": feynman_analogy,
+        })
+        # ── Stage 2: Design ──
+        yield ("text", "\n  Designing Implementation\n")
+        yield ("text", "  " + "─" * 40 + "\n\n")
+        design = design_implementation(analysis)
+        if not design:
+            design = {}
+        arch = design.get("model_architecture", {})
+        tc = design.get("training_config", {})
+        yield ("text", f"  Architecture: {arch.get('type', 'N/A')}\n")
+        yield ("text", f"  Training: {tc.get('optimizer', 'Adam')}, lr={tc.get('learning_rate', 0.001)}, {tc.get('num_epochs', 10)} epochs\n")
+        yield ("text", "  Design complete.\n\n")
+        # ── Stage 3: Generate (Now with LIVE STREAMING) ──
+        yield ("text", "\n  Generating Notebook (Live Streaming)\n")
+        yield ("text", "  " + "─" * 40 + "\n\n")
+        cells = []
+        for event_type, data in generate_notebook_cells_stream(analysis, design):
+            if event_type == "token":
+                # Yield raw tokens to the code viewer for "ghost-writing" effect
+                yield ("text", data)
+            elif event_type == "cells_final":
+                cells = data
+        code_cells = sum(1 for c in cells if c.get("cell_type") == "code")
+        md_cells = sum(1 for c in cells if c.get("cell_type") == "markdown")
+        yield ("text", f"\n\n  ✅ Generation complete: {len(cells)} cells ({code_cells} code, {md_cells} markdown)\n")
+        yield ("text", "  Notebook ready for download.\n")
+        yield ("cells", cells)
+    except Exception as e:
+        yield ("error", str(e))
+# ── Legacy compatibility ───────────────────────────────────────────────────
+# Keep old function signatures working for backward compatibility
+def extract_methodology(base64_images):
+    """Legacy wrapper: extracts text from images."""
+    return extract_text_from_images(base64_images)
+# ── Visual Illustration (FLUX.1-schnell) ───────────────────────────────────
+# System prompt for Qwen to craft image generation prompts
+IMAGE_PROMPT_SYSTEM = """You are a world-class scientific illustrator and prompt engineer.
+Your job: given a structured analysis of a research paper, write ONE prompt for an
+AI image generator (FLUX) that will produce a clear, beautiful, academic-quality
+visual illustration of the paper's CORE CONCEPT.
+Rules:
+1. Focus on the MAIN IDEA — the central algorithm, architecture, or mechanism.
+2. Describe the visual layout precisely: shapes, arrows, labels, flow direction.
+3. Use academic illustration style: clean lines, labeled components, white background.
+4. Include spatial relationships: "on the left", "flowing into", "surrounded by".
+5. Mention color coding for different components.
+6. Do NOT include text/equations in the image — focus on visual metaphors.
+7. Keep it to ONE paragraph, 80-120 words.
+8. End with style keywords: "scientific diagram, educational poster, vector style,
+   clean layout, professional, high resolution"
+Return ONLY the prompt text, nothing else."""
+def generate_concept_image(analysis):
+    """
+    Generate a visual illustration of a paper's core concept.
+    Step 1: Qwen crafts a detailed, structured prompt from the analysis.
+    Step 2: FLUX.1-schnell generates the image.
+    Returns base64-encoded PNG string or None on failure.
+    """
+    if not FLUX_API_KEY:
+        raise RuntimeError("NVIDIA_FLUX_API_KEY not set")
+    # ── Step 1: Qwen → Image Prompt ──
+    analysis_summary = json.dumps({
+        "title": analysis.get("title", ""),
+        "research_field": analysis.get("research_field") or analysis.get("field", ""),
+        "key_insight": analysis.get("key_insight") or analysis.get("insight", ""),
+        "algorithms": analysis.get("algorithms", []),
+        "feynman_analogy": analysis.get("feynman_analogy", ""),
+        "feynman_core_concept": analysis.get("feynman_core_concept", ""),
+    }, indent=2)
+    prompt_messages = [
+        {"role": "system", "content": IMAGE_PROMPT_SYSTEM},
+        {"role": "user", "content": f"Create an image generation prompt for this paper:\n\n{analysis_summary}"},
+    ]
+    print("  🎨 Generating image prompt via Qwen...")
+    image_prompt = call_with_retry(prompt_messages, max_tokens=300, temperature=0.7)
+    if not image_prompt:
+        raise RuntimeError("Qwen returned empty image prompt")
+    # Add preamble for FLUX to ensure academic quality
+    full_prompt = (
+        "A detailed, clean scientific illustration for an academic paper. "
+        "Style: professional educational diagram, labeled components, "
+        "modern flat vector design, white background, high contrast, "
+        "color-coded sections, no text. "
+        f"{image_prompt.strip()}"
+    )
+    print(f"  📝 FLUX prompt ({len(full_prompt)} chars): {full_prompt[:100]}...")
+    # ── Step 2: FLUX.1-schnell → Image ──
+    print("  🖼️  Calling FLUX.1-schnell...")
+    headers = {
+        "Authorization": f"Bearer {FLUX_API_KEY}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
+    payload = {
+        "prompt": full_prompt,
+        "height": 1024,
+        "width": 1024,
+        "num_inference_steps": 4,
+        "guidance_scale": 0.0,
+    }
+    response = requests.post(FLUX_API_URL, headers=headers, json=payload, timeout=60)
+    if response.status_code != 200:
+        raise RuntimeError(f"FLUX API error {response.status_code}: {response.text[:200]}")
+    result = response.json()
+    # FLUX returns {"image": "base64..."} or {"artifacts": [{"base64": "..."}]}
+    image_b64 = None
+    if "image" in result:
+        image_b64 = result["image"]
+    elif "artifacts" in result and len(result["artifacts"]) > 0:
+        image_b64 = result["artifacts"][0].get("base64", "")
+    if not image_b64:
+        raise RuntimeError("FLUX returned no image data")
+    print(f"  ✅ Image generated ({len(image_b64)} chars base64)")
+    return image_b64

utils/notebook_builder.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""
+Pundit Feynman Notebook Builder
+Supports both structured JSON cells and legacy free-text → regex approach.
+"""
+import re
+import nbformat
+from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
+def build_notebook_from_cells(cells_json, output_path):
+    """
+    Build a .ipynb from a list of structured cell dicts.
+    Each cell: {"cell_type": "code"|"markdown", "source": "..."}
+    """
+    nb = new_notebook()
+    nb.metadata["kernelspec"] = {
+        "display_name": "Python 3",
+        "language": "python",
+        "name": "python3",
+    }
+    nb.metadata["language_info"] = {
+        "name": "python",
+        "version": "3.9",
+    }
+    for cell_data in cells_json:
+        cell_type = cell_data.get("cell_type", "code")
+        source = cell_data.get("source", "")
+        if cell_type == "markdown":
+            nb.cells.append(new_markdown_cell(source))
+        elif cell_type == "code":
+            nb.cells.append(new_code_cell(source))
+        else:
+            # Default to code for unknown types
+            nb.cells.append(new_code_cell(source))
+    # Fallback: if no cells, add a placeholder
+    if not nb.cells:
+        nb.cells.append(new_markdown_cell("# No cells were generated"))
+    with open(output_path, "w", encoding="utf-8") as f:
+        nbformat.write(nb, f)
+    code_cells = sum(1 for c in nb.cells if c.cell_type == "code")
+    md_cells = sum(1 for c in nb.cells if c.cell_type == "markdown")
+    print(f"  📓 Notebook saved: {output_path} ({len(nb.cells)} cells: {code_cells} code, {md_cells} markdown)")
+    return output_path
+def build_notebook(full_text, output_path):
+    """
+    Legacy: Parses mixed markdown/code text into a Jupyter Notebook.
+    Separates ```python code blocks into Code cells, everything else into Markdown cells.
+    """
+    nb = new_notebook()
+    nb.metadata["kernelspec"] = {
+        "display_name": "Python 3",
+        "language": "python",
+        "name": "python3",
+    }
+    # Split on ```python ... ``` blocks
+    pattern = r"```python\s*\n(.*?)```"
+    parts = re.split(pattern, full_text, flags=re.DOTALL)
+    for i, part in enumerate(parts):
+        content = part.strip()
+        if not content:
+            continue
+        if i % 2 == 0:
+            nb.cells.append(new_markdown_cell(content))
+        else:
+            nb.cells.append(new_code_cell(content))
+    if not nb.cells:
+        nb.cells.append(new_markdown_cell(full_text))
+    with open(output_path, "w", encoding="utf-8") as f:
+        nbformat.write(nb, f)
+    print(f"  📓 Notebook saved: {output_path} ({len(nb.cells)} cells)")
+    return output_path

utils/pdf_processor.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import base64
+import fitz  # PyMuPDF
+def process_pdf_to_base64(pdf_path: str, dpi: int = 150) -> list[str]:
+    """
+    Converts each page of a PDF into a base64-encoded JPEG string.
+    Preserves full RGB color (important for color-coded graphs in papers).
+    """
+    try:
+        doc = fitz.open(pdf_path)
+        base64_images = []
+        for page in doc:
+            pix = page.get_pixmap(dpi=dpi)
+            img_bytes = pix.tobytes("jpeg")
+            img_b64 = base64.b64encode(img_bytes).decode("utf-8")
+            base64_images.append(img_b64)
+        doc.close()
+        print(f"Extracted {len(base64_images)} pages at {dpi} DPI (color preserved)")
+        return base64_images
+    except Exception as e:
+        print(f"Error processing PDF: {e}")
+        raise e