Yusufarsh commited on
Commit
80f8512
·
verified ·
1 Parent(s): 2e9eb8e

Upload 9 files

Browse files
server/__pycache__/api.cpython-312.pyc ADDED
Binary file (12.5 kB). View file
 
server/__pycache__/app.cpython-312.pyc ADDED
Binary file (40 kB). View file
 
server/__pycache__/llm_handler.cpython-312.pyc ADDED
Binary file (8.37 kB). View file
 
server/__pycache__/pdf_processor.cpython-312.pyc ADDED
Binary file (834 Bytes). View file
 
server/__pycache__/ppt_generator.cpython-312.pyc ADDED
Binary file (4.83 kB). View file
 
server/app.py ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ReproAgent - Gradio Web Interface
3
+ Interactive demo for AI-powered ML paper reproduction.
4
+
5
+ Three tabs:
6
+ 1. Reproduce a Paper — Upload PDF or paste URL, agent works through it live
7
+ 2. Simulation Demo — Quick simulation with pre-loaded papers
8
+ 3. Benchmark — Compare reasoning vs random agents
9
+ """
10
+
11
+ import sys
12
+ import os
13
+ import re
14
+ import json
15
+ import time
16
+ import traceback
17
+ import uuid
18
+ from pptx import Presentation
19
+ from pptx.util import Inches, Pt
20
+ from pptx.dml.color import RGBColor
21
+ from pathlib import Path
22
+ from typing import Dict, Any, List, Tuple, Optional, Generator
23
+
24
+ # Ensure project root is importable
25
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
26
+
27
+ import gradio as gr
28
+ import numpy as np
29
+
30
+ from reproagent.environment import ReproAgentEnv
31
+ from reproagent.state import PaperState
32
+ from reproagent.models import LLMClient
33
+ from reproagent.papers import create_sample_papers
34
+ from agents.reasoning_agent import create_agent
35
+
36
+ # Modular Easy Mode Imports
37
+ from server.llm_handler import generate_summary_and_ppt_content
38
+ from server.pdf_processor import extract_text_from_pdf as extract_text_fitz
39
+ from server.ppt_generator import create_ppt
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Helpers
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def safe_print(msg: str):
47
+ """Print without unicode crashes on Windows."""
48
+ try:
49
+ print(msg)
50
+ except UnicodeEncodeError:
51
+ print(msg.encode("ascii", "replace").decode())
52
+
53
+
54
+ def extract_text_from_pdf(pdf_path: str) -> str:
55
+ """Extract text from a PDF file using available libraries."""
56
+ # Try pdfplumber first
57
+ try:
58
+ import pdfplumber
59
+ text = ""
60
+ with pdfplumber.open(pdf_path) as pdf:
61
+ for page in pdf.pages[:15]:
62
+ page_text = page.extract_text()
63
+ if page_text:
64
+ text += page_text + "\n"
65
+ if text.strip():
66
+ return text
67
+ except Exception:
68
+ pass
69
+
70
+ # Fallback to PyPDF2
71
+ try:
72
+ import PyPDF2
73
+ with open(pdf_path, "rb") as f:
74
+ reader = PyPDF2.PdfReader(f)
75
+ text = ""
76
+ for page in reader.pages[:15]:
77
+ page_text = page.extract_text()
78
+ if page_text:
79
+ text += page_text + "\n"
80
+ if text.strip():
81
+ return text
82
+ except Exception:
83
+ pass
84
+
85
+ return ""
86
+
87
+
88
+ def extract_paper_info_regex(text: str) -> Dict[str, Any]:
89
+ """Regex-based extraction of paper metadata from raw text."""
90
+ info: Dict[str, Any] = {
91
+ "title": "",
92
+ "abstract": "",
93
+ "github_links": [],
94
+ "datasets": [],
95
+ "metrics": [],
96
+ "key_claims": [],
97
+ }
98
+
99
+ # Title: first non-empty line that looks like a title
100
+ lines = [l.strip() for l in text.split("\n") if l.strip()]
101
+ if lines:
102
+ info["title"] = lines[0][:200]
103
+
104
+ # Abstract
105
+ abs_match = re.search(
106
+ r"(?i)abstract[:\s]*\n?(.*?)(?:\n\s*\n|introduction|1[\.\s])",
107
+ text, re.DOTALL,
108
+ )
109
+ if abs_match:
110
+ info["abstract"] = abs_match.group(1).strip()[:1000]
111
+
112
+ # GitHub links
113
+ gh_urls = re.findall(r"https?://github\.com/[\w\-]+/[\w\-\.]+", text)
114
+ # Clean trailing punctuation (period, comma, etc.) from each URL
115
+ cleaned = []
116
+ for url in gh_urls:
117
+ url = re.sub(r'[.,;:)\]!?\'"]+$', '', url) # strip trailing punctuation
118
+ url = url.rstrip('.') # extra safety for trailing dots
119
+ if url not in cleaned:
120
+ cleaned.append(url)
121
+ info["github_links"] = cleaned
122
+
123
+ # Datasets
124
+ known_datasets = [
125
+ "CIFAR-10", "CIFAR-100", "MNIST", "ImageNet", "COCO",
126
+ "SST-2", "GLUE", "SQuAD", "WMT", "CelebA", "VOC",
127
+ ]
128
+ for ds in known_datasets:
129
+ if ds.lower() in text.lower():
130
+ info["datasets"].append(ds)
131
+
132
+ # Metrics — look for common ML metrics with numbers
133
+ metric_patterns = [
134
+ r"(?i)(accuracy|acc)[\s:=]*(\d+\.?\d*)\s*%",
135
+ r"(?i)(accuracy|acc)[\s:=]*(0\.\d+)",
136
+ r"(?i)(f1[\s\-]?score)[\s:=]*(\d+\.?\d*)",
137
+ r"(?i)(bleu)[\s:=]*(\d+\.?\d*)",
138
+ r"(?i)(FID)[\s:=of ]*(\d+\.?\d*)",
139
+ r"(?i)(perplexity|ppl)[\s:=]*(\d+\.?\d*)",
140
+ r"(?i)(speedup|speed-up)[\s:of=]*(\d+\.?\d*)[x\s]",
141
+ r"(?i)(MACs?|FLOPs?)[\s:=reduction of]*(\d+\.?\d*)%",
142
+ r"(?i)(PSNR)[\s:=]*(\d+\.?\d*)",
143
+ r"(?i)(SSIM)[\s:=]*(0\.\d+)",
144
+ r"(?i)(mAP|AP)[\s:=]*(\d+\.?\d*)",
145
+ r"(?i)(top-?1)[\s:=accuracy ]*(\d+\.?\d*)",
146
+ ]
147
+ for pat in metric_patterns:
148
+ for m in re.finditer(pat, text):
149
+ info["metrics"].append({"name": m.group(1), "value": m.group(2)})
150
+
151
+ return info
152
+
153
+
154
+ def extract_paper_info_llm(text: str, llm: LLMClient) -> Dict[str, Any]:
155
+ """Use Groq LLM to intelligently extract paper metadata."""
156
+ prompt = f"""You are an expert ML research assistant. Extract the following from this research paper text:
157
+
158
+ 1. title - Full paper title
159
+ 2. abstract - The abstract (first 500 chars)
160
+ 3. github_links - Any GitHub repository URLs mentioned
161
+ 4. datasets - Datasets used (e.g., CIFAR-10, ImageNet)
162
+ 5. target_metric_name - Main evaluation metric name (e.g. FID, CLIP score, BLEU, accuracy). Extract this EXACTLY as written in the text. DO NOT default to accuracy.
163
+ 6. target_metric_value - The numerical claim for this metric (e.g. 7.5, 0.95). Extract EXACTLY as written. DO NOT normalize or guess.
164
+ 7. model_name - The primary model architecture
165
+ 8. key_claims - List of 3-5 key claims from the paper
166
+
167
+ Respond ONLY with valid JSON.
168
+
169
+ Paper text (first 3000 chars):
170
+ {text[:3000]}
171
+ """
172
+ try:
173
+ result = llm.generate_structured(prompt)
174
+ safe_print(f"[DEBUG] LLM raw result: {json.dumps(result)[:500]}")
175
+ if "error" not in result:
176
+ # Clean github links from LLM too
177
+ gh_links = result.get("github_links", [])
178
+ if isinstance(gh_links, str):
179
+ gh_links = [gh_links] if gh_links else []
180
+ gh_links = [re.sub(r'[.,;:)\]]+$', '', u).rstrip('.') for u in gh_links]
181
+
182
+ return {
183
+ "title": result.get("title", ""),
184
+ "abstract": result.get("abstract", ""),
185
+ "github_links": gh_links,
186
+ "datasets": result.get("datasets", []) if isinstance(result.get("datasets"), list) else [result.get("datasets", "")],
187
+ "metrics": [
188
+ {
189
+ "name": result.get("target_metric_name", "accuracy"),
190
+ "value": str(result.get("target_metric_value", "")),
191
+ }
192
+ ] if result.get("target_metric_value") else [],
193
+ "model_name": result.get("model_name", ""),
194
+ "key_claims": result.get("key_claims", []) if isinstance(result.get("key_claims"), list) else [],
195
+ }
196
+ else:
197
+ safe_print(f"[WARN] LLM returned error: {result.get('error')}")
198
+ except Exception as e:
199
+ safe_print(f"[WARN] LLM extraction failed: {e}")
200
+ import traceback
201
+ traceback.print_exc()
202
+
203
+ return {}
204
+
205
+
206
+ def run_easy_mode(pdf_file: Any) -> Tuple[str, str]:
207
+ """Easy Mode: Summary + PPT generation using modular handlers."""
208
+ if not pdf_file:
209
+ return "Error: No file uploaded.", ""
210
+
211
+ pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else str(pdf_file)
212
+ safe_print(f"[EasyMode] Starting for {pdf_file}")
213
+
214
+ safe_print("[EasyMode] Extracting text using fitz...")
215
+ text = extract_text_fitz(pdf_path)
216
+ if not text:
217
+ return "Error: Could not extract text from PDF.", ""
218
+
219
+ # 1. Use modular LLM handler for summary and ppt structure
220
+ safe_print("[EasyMode] Calling Gemini via modular handler...")
221
+ data = generate_summary_and_ppt_content(text)
222
+
223
+ paper_desc = data.get("description", "Failed to generate a description.")
224
+ slides_data = data.get("ppt_slides", [])
225
+
226
+ if not slides_data:
227
+ # Fallback if no slides were generated
228
+ slides_data = [{"title": "Overview", "content": [paper_desc]}]
229
+
230
+ # 2. Use modular PPT generator
231
+ safe_print("[EasyMode] Generating premium PPT...")
232
+ ppt_filename = f"summary_{uuid.uuid4().hex[:8]}.pptx"
233
+ ppt_path = Path("data/tmp") / ppt_filename
234
+ ppt_path.parent.mkdir(parents=True, exist_ok=True)
235
+
236
+ create_ppt(slides_data, str(ppt_path))
237
+
238
+ safe_print(f"[EasyMode] Saving PPT to {ppt_path}...")
239
+ safe_print("[EasyMode] Done.")
240
+
241
+ return paper_desc, str(ppt_path)
242
+
243
+
244
+ # ---------------------------------------------------------------------------
245
+ # Tab 1: Reproduce a Paper
246
+ # ---------------------------------------------------------------------------
247
+
248
+ def run_paper_reproduction(
249
+ pdf_file,
250
+ paper_url: str,
251
+ use_llm: bool,
252
+ max_steps: int,
253
+ execution_mode: str,
254
+ clone_dir: str,
255
+ ) -> Generator:
256
+ """
257
+ Main reproduction pipeline.
258
+ Yields (log_md, paper_info_md, metrics_md, state_json) as it progresses.
259
+ """
260
+ log_lines: List[str] = []
261
+
262
+ def log(msg: str):
263
+ log_lines.append(msg)
264
+ return "\n".join(log_lines)
265
+
266
+ empty = ("", "", "{}", "{}")
267
+
268
+ # --- Step 0: Input validation ---
269
+ if pdf_file is None and not paper_url.strip():
270
+ yield (log("**Please upload a PDF or paste a paper URL.**"), "", "{}", "{}")
271
+ return
272
+
273
+ yield (log("### Starting ReproAgent...\n"), "", "{}", "{}")
274
+ time.sleep(0.3)
275
+
276
+ # --- Step 1: Get paper text ---
277
+ paper_text = ""
278
+ paper_title = ""
279
+
280
+ if pdf_file is not None:
281
+ pdf_path = pdf_file.name if hasattr(pdf_file, "name") else str(pdf_file)
282
+ yield (log(f"**Step 1/9: Reading PDF** `{Path(pdf_path).name}`..."), "", "{}", "{}")
283
+ time.sleep(0.2)
284
+ paper_text = extract_text_from_pdf(pdf_path)
285
+ if not paper_text:
286
+ yield (log("- Could not extract text from PDF. Is it a scanned image?"), "", "{}", "{}")
287
+ return
288
+ yield (log(f"- Extracted **{len(paper_text):,} characters** from PDF\n"), "", "{}", "{}")
289
+ elif paper_url.strip():
290
+ yield (log(f"**Step 1/9: Fetching paper** from `{paper_url.strip()[:80]}`..."), "", "{}", "{}")
291
+ time.sleep(0.3)
292
+ # Try to fetch URL content
293
+ try:
294
+ import requests
295
+ resp = requests.get(paper_url.strip(), timeout=15)
296
+ if resp.status_code == 200:
297
+ if paper_url.strip().endswith(".pdf"):
298
+ # Save temp PDF and extract
299
+ tmp_path = Path("data/tmp_paper.pdf")
300
+ tmp_path.parent.mkdir(parents=True, exist_ok=True)
301
+ tmp_path.write_bytes(resp.content)
302
+ paper_text = extract_text_from_pdf(str(tmp_path))
303
+ else:
304
+ paper_text = resp.text[:10000]
305
+ yield (log(f"- Fetched **{len(paper_text):,} characters**\n"), "", "{}", "{}")
306
+ else:
307
+ yield (log(f"- Failed to fetch URL (status {resp.status_code})\n"), "", "{}", "{}")
308
+ return
309
+ except Exception as e:
310
+ yield (log(f"- Error fetching URL: {e}\n"), "", "{}", "{}")
311
+ return
312
+
313
+ # --- Step 2: Extract paper info ---
314
+ yield (log("**Step 2/9: Analyzing paper content**..."), "", "{}", "{}")
315
+ time.sleep(0.2)
316
+
317
+ # Try LLM first, fallback to regex
318
+ llm_client = None
319
+ paper_info = {}
320
+ if use_llm:
321
+ try:
322
+ # Enforce Groq for Medium/Advanced Mode
323
+ llm_client = LLMClient(provider="groq")
324
+ if llm_client.provider != "mock":
325
+ yield (log(f"- Using **{llm_client.provider.upper()}** LLM for intelligent extraction"), "", "{}", "{}")
326
+ paper_info = extract_paper_info_llm(paper_text, llm_client)
327
+ except Exception:
328
+ pass
329
+
330
+ if not paper_info or not paper_info.get("title"):
331
+ yield (log("- Using **regex** extraction (LLM unavailable or failed)"), "", "{}", "{}")
332
+ paper_info = extract_paper_info_regex(paper_text)
333
+
334
+ paper_title = paper_info.get("title", "Unknown Paper")
335
+ github_links = paper_info.get("github_links", [])
336
+ datasets = paper_info.get("datasets", [])
337
+ metrics = paper_info.get("metrics", [])
338
+ model_name = paper_info.get("model_name", "Unknown")
339
+ key_claims = paper_info.get("key_claims", [])
340
+
341
+ # Determine target metric
342
+ target_metric = 0.0
343
+ metric_name = "Unknown"
344
+ if metrics:
345
+ metric_name = metrics[0].get("name", "Unknown")
346
+ try:
347
+ val = float(metrics[0].get("value", "0.0"))
348
+ target_metric = val
349
+ except (ValueError, TypeError):
350
+ pass
351
+
352
+ # Build paper info markdown
353
+ paper_info_md = f"""## Paper Information
354
+
355
+ | Field | Value |
356
+ |-------|-------|
357
+ | **Title** | {paper_title[:100]} |
358
+ | **Model** | {model_name} |
359
+ | **Dataset(s)** | {', '.join(datasets) if datasets else 'Not detected'} |
360
+ | **Target Metric** | {target_metric:.3f} ({metric_name}) |
361
+ | **GitHub Links** | {', '.join(f'[link]({u})' for u in github_links) if github_links else 'None found'} |
362
+
363
+ """
364
+ if key_claims:
365
+ paper_info_md += "### Key Claims\n"
366
+ for claim in key_claims[:5]:
367
+ paper_info_md += f"- {claim}\n"
368
+
369
+ yield (log(f"- Title: **{paper_title[:80]}**"), paper_info_md, "{}", "{}")
370
+ time.sleep(0.2)
371
+ yield (log(f"- Found **{len(github_links)}** GitHub link(s)"), paper_info_md, "{}", "{}")
372
+ yield (log(f"- Target: **{target_metric:.3f}** ({metric_name})\n"), paper_info_md, "{}", "{}")
373
+
374
+ # --- Step 3-9: Run agent loop via environment ---
375
+ yield (log("**Step 3/9: Initializing reproduction environment**...\n"), paper_info_md, "{}", "{}")
376
+ time.sleep(0.2)
377
+
378
+ try:
379
+ env = ReproAgentEnv(
380
+ difficulty="easy",
381
+ max_steps=int(max_steps),
382
+ use_llm=use_llm,
383
+ render_mode=None,
384
+ exec_mode=execution_mode,
385
+ workspace_dir=clone_dir.strip() if clone_dir.strip() else "/tmp/reproagent",
386
+ )
387
+ # Override paper state with what we extracted
388
+ obs, info = env.reset()
389
+ env.state.paper = PaperState(
390
+ title=paper_title,
391
+ dataset=datasets[0] if datasets else "Unknown",
392
+ model=model_name,
393
+ target_metric=target_metric,
394
+ metric_name=metric_name,
395
+ github_links=github_links,
396
+ key_claims=key_claims,
397
+ parsed=True,
398
+ confidence=0.85,
399
+ )
400
+ env.state.experiment.target_metric = target_metric
401
+ env.state.experiment.gap = target_metric
402
+
403
+ agent = create_agent(env, agent_type="reasoning", use_llm=use_llm)
404
+ agent.reset()
405
+
406
+ except Exception as e:
407
+ yield (log(f"\n**Error initializing:** {e}"), paper_info_md, "{}", "{}")
408
+ return
409
+
410
+ yield (log("- Environment ready. Starting agent loop...\n"), paper_info_md, "{}", "{}")
411
+
412
+ step_labels = {
413
+ "parse_pdf": ("Step 3/9", "Reading paper"),
414
+ "extract_github": ("Step 4/9", "Finding GitHub repo"),
415
+ "extract_metrics": ("Step 4/9", "Extracting metrics"),
416
+ "validate_parsing": ("Step 4/9", "Validating parse"),
417
+ "clone_repo": ("Step 5/9", "Cloning repository"),
418
+ "read_readme": ("Step 5/9", "Reading README"),
419
+ "analyze_code": ("Step 5/9", "Analyzing code structure"),
420
+ "find_entry_point": ("Step 5/9", "Finding entry point"),
421
+ "extract_deps": ("Step 5/9", "Extracting dependencies"),
422
+ "create_venv": ("Step 6/9", "Creating environment"),
423
+ "install_requirements": ("Step 6/9", "Installing dependencies"),
424
+ "install_package": ("Step 6/9", "Installing package"),
425
+ "download_data": ("Step 6/9", "Downloading data"),
426
+ "verify_setup": ("Step 6/9", "Verifying setup"),
427
+ "run_training": ("Step 7/9", "Running code"),
428
+ "run_eval": ("Step 7/9", "Running evaluation"),
429
+ "analyze_error": ("Step 7/9", "Debugging error"),
430
+ "apply_fix": ("Step 7/9", "Applying fix"),
431
+ "search_solution": ("Step 7/9", "Searching for solution"),
432
+ "modify_code": ("Step 7/9", "Modifying code"),
433
+ "test_fix": ("Step 7/9", "Testing fix"),
434
+ "run_experiment": ("Step 8/9", "Tuning hyperparameters"),
435
+ "modify_learning_rate": ("Step 8/9", "Adjusting learning rate"),
436
+ "modify_batch_size": ("Step 8/9", "Adjusting batch size"),
437
+ "modify_optimizer": ("Step 8/9", "Trying different optimizer"),
438
+ "compare_results": ("Step 9/9", "Comparing results"),
439
+ }
440
+
441
+ total_reward = 0.0
442
+ step = 0
443
+ terminated = False
444
+ truncated = False
445
+
446
+ while not (terminated or truncated) and step < int(max_steps):
447
+ action = agent.select_action(obs, info)
448
+ obs, reward, terminated, truncated, info = env.step(action)
449
+
450
+ action_name = info.get("action_type", "unknown")
451
+ label = step_labels.get(action_name, ("", action_name))
452
+ total_reward += reward
453
+ step += 1
454
+
455
+ # Get latest logs from env
456
+ latest_logs = info.get("logs", [])
457
+ log_detail = latest_logs[-1] if latest_logs else ""
458
+
459
+ phase_icon = {
460
+ "parsing": "📄", "repo_analysis": "🔍", "setup": "📦",
461
+ "execution": "🚀", "debugging": "🐛", "experimentation": "🧪",
462
+ "comparison": "📊",
463
+ }.get(info.get("phase", ""), "▶")
464
+
465
+ metric_str = f" | metric: **{info.get('current_metric', 0):.3f}**" if info.get("current_metric", 0) > 0 else ""
466
+ reward_str = f" | reward: {reward:+.2f}" if abs(reward) > 0.01 else ""
467
+
468
+ line = f"{phase_icon} `{label[0]}` **{label[1]}**{metric_str}{reward_str}"
469
+ if log_detail:
470
+ line += f"\n - {log_detail}"
471
+
472
+ current_metrics = json.dumps({
473
+ "step": step,
474
+ "current_metric": round(info.get("current_metric", 0), 4),
475
+ "target_metric": round(info.get("target_metric", 0), 4),
476
+ "gap": round(info.get("gap", 0), 4),
477
+ "total_reward": round(total_reward, 2),
478
+ "phase": info.get("phase", ""),
479
+ "success": info.get("success", False),
480
+ }, indent=2)
481
+
482
+ yield (log(line), paper_info_md, current_metrics, json.dumps(env.state.to_dict(), indent=2))
483
+ time.sleep(0.15)
484
+
485
+ # --- Final summary ---
486
+ success = info.get("success", False)
487
+ final_metric = info.get("current_metric", 0)
488
+ gap = info.get("gap", 0)
489
+
490
+ result_icon = "✅" if success else "⚠️"
491
+ summary = f"""
492
+ ---
493
+ ### {result_icon} Reproduction {'Complete!' if success else 'Incomplete'}
494
+
495
+ | Metric | Value |
496
+ |--------|-------|
497
+ | Steps | {step} |
498
+ | Final Metric | {final_metric:.4f} |
499
+ | Target | {target_metric:.4f} |
500
+ | Gap | {gap:.4f} |
501
+ | Total Reward | {total_reward:.2f} |
502
+ | Success | {'Yes' if success else 'No'} |
503
+ """
504
+ if not success:
505
+ summary += "\n*Try increasing max steps or enabling LLM for better results.*"
506
+
507
+ yield (log(summary), paper_info_md,
508
+ json.dumps({
509
+ "final_metric": round(final_metric, 4),
510
+ "target_metric": round(target_metric, 4),
511
+ "gap": round(gap, 4),
512
+ "steps": step,
513
+ "total_reward": round(total_reward, 2),
514
+ "success": success,
515
+ }, indent=2),
516
+ json.dumps(env.state.to_dict(), indent=2))
517
+
518
+
519
+ # ---------------------------------------------------------------------------
520
+ # Tab 2: Simulation Demo (preserved from original)
521
+ # ---------------------------------------------------------------------------
522
+
523
+ class SimulationRunner:
524
+ """Runs simulation episodes with pre-loaded papers."""
525
+
526
+ def __init__(self):
527
+ self.env = None
528
+ self.agent = None
529
+
530
+ def run_episode(
531
+ self,
532
+ difficulty: str,
533
+ agent_type: str,
534
+ max_steps: int,
535
+ use_llm: bool,
536
+ progress=gr.Progress(),
537
+ ) -> Tuple[str, str, str, str]:
538
+ try:
539
+ self.env = ReproAgentEnv(
540
+ difficulty=difficulty,
541
+ max_steps=int(max_steps),
542
+ use_llm=use_llm,
543
+ render_mode=None,
544
+ )
545
+ self.agent = create_agent(self.env, agent_type=agent_type, use_llm=use_llm)
546
+
547
+ obs, info = self.env.reset()
548
+ self.agent.reset()
549
+
550
+ progress(0, desc="Starting episode...")
551
+
552
+ step = 0
553
+ terminated = False
554
+ truncated = False
555
+ total_reward = 0.0
556
+ step_logs: List[str] = []
557
+
558
+ while not (terminated or truncated) and step < int(max_steps):
559
+ progress((step + 1) / max_steps, desc=f"Step {step + 1}/{int(max_steps)}")
560
+
561
+ action = self.agent.select_action(obs, info)
562
+ reasoning = self.agent.get_reasoning(self.env.state, action)
563
+ obs, reward, terminated, truncated, info = self.env.step(action)
564
+
565
+ action_name = info.get("action_type", "unknown")
566
+ latest = info.get("logs", [])
567
+ log_line = latest[-1] if latest else ""
568
+
569
+ step_log = (
570
+ f"### Step {step + 1}\n"
571
+ f"**Phase:** `{info.get('phase', '?')}` \n"
572
+ f"**Action:** {action_name} \n"
573
+ f"**Reasoning:** {reasoning} \n"
574
+ f"**Reward:** {reward:.2f} \n"
575
+ f"**Metric:** {info.get('current_metric', 0):.3f}\n"
576
+ )
577
+ if log_line:
578
+ step_log += f"\n> {log_line}\n"
579
+
580
+ step_logs.append(step_log)
581
+ total_reward += reward
582
+ step += 1
583
+ time.sleep(0.05)
584
+
585
+ progress(1.0, desc="Done!")
586
+
587
+ # Summary
588
+ current_metric = info.get("current_metric", 0)
589
+ target_metric = info.get("target_metric", 0)
590
+ gap = info.get("gap", 0)
591
+ success = terminated
592
+
593
+ icon = "✅" if success else "❌"
594
+ summary = f"""# {icon} Episode Summary
595
+
596
+ ## Results
597
+
598
+ | Metric | Value |
599
+ |--------|-------|
600
+ | **Steps Taken** | {step} |
601
+ | **Total Reward** | {total_reward:.2f} |
602
+ | **Current Metric** | {current_metric:.3f} |
603
+ | **Target Metric** | {target_metric:.3f} |
604
+ | **Gap** | {gap:.3f} |
605
+ | **Success** | {'Yes' if success else 'No'} |
606
+
607
+ ## Progress
608
+ Progress: {(current_metric / target_metric * 100) if target_metric > 0 else 0:.1f}%
609
+ """
610
+ if success:
611
+ summary += "\n## 🎉 Reproduction Successful!"
612
+ else:
613
+ summary += f"\n## ⚠️ Reproduction Incomplete\nGap remaining: {gap:.3f}"
614
+
615
+ metrics_json = json.dumps({
616
+ "current_metric": current_metric,
617
+ "target_metric": target_metric,
618
+ "gap": gap,
619
+ "success": success,
620
+ "phase": info.get("phase", ""),
621
+ }, indent=2)
622
+
623
+ return (
624
+ summary,
625
+ "\n\n---\n\n".join(step_logs),
626
+ metrics_json,
627
+ json.dumps(self.env.state.to_dict(), indent=2),
628
+ )
629
+
630
+ except Exception as e:
631
+ error_msg = f"**Error:** {e}\n\n```\n{traceback.format_exc()}\n```"
632
+ return (error_msg, "", "{}", "{}")
633
+
634
+
635
+ # ---------------------------------------------------------------------------
636
+ # Tab 3: Benchmark
637
+ # ---------------------------------------------------------------------------
638
+
639
+ def run_benchmark(difficulty: str, num_episodes: int, progress=gr.Progress()):
640
+ """Compare reasoning agent vs random agent."""
641
+ try:
642
+ results = {"reasoning": [], "random": []}
643
+
644
+ for agent_type in ["reasoning", "random"]:
645
+ for ep in range(int(num_episodes)):
646
+ progress(
647
+ (ep + 1) / (int(num_episodes) * 2),
648
+ desc=f"{agent_type.title()} agent — episode {ep + 1}/{int(num_episodes)}",
649
+ )
650
+
651
+ env = ReproAgentEnv(difficulty=difficulty, max_steps=30, use_llm=False)
652
+ agent = create_agent(env, agent_type=agent_type, use_llm=False)
653
+
654
+ obs, info = env.reset()
655
+ agent.reset()
656
+
657
+ total_reward = 0
658
+ steps = 0
659
+ terminated = False
660
+ truncated = False
661
+
662
+ while not (terminated or truncated):
663
+ action = agent.select_action(obs, info)
664
+ obs, reward, terminated, truncated, info = env.step(action)
665
+ total_reward += reward
666
+ steps += 1
667
+
668
+ results[agent_type].append({
669
+ "episode": ep + 1,
670
+ "success": terminated,
671
+ "steps": steps,
672
+ "reward": total_reward,
673
+ "metric": info.get("current_metric", 0),
674
+ })
675
+
676
+ progress(1.0, desc="Done!")
677
+
678
+ # Build comparison markdown
679
+ def stats(data):
680
+ success_rate = sum(1 for d in data if d["success"]) / len(data) * 100
681
+ avg_reward = np.mean([d["reward"] for d in data])
682
+ avg_metric = np.mean([d["metric"] for d in data])
683
+ avg_steps = np.mean([d["steps"] for d in data])
684
+ return success_rate, avg_reward, avg_metric, avg_steps
685
+
686
+ r_stats = stats(results["reasoning"])
687
+ rand_stats = stats(results["random"])
688
+
689
+ winner = "Reasoning Agent" if r_stats[0] >= rand_stats[0] else "Random Agent"
690
+
691
+ report = f"""# Benchmark Results
692
+
693
+ **Difficulty:** {difficulty} | **Episodes per agent:** {int(num_episodes)}
694
+
695
+ | Metric | Reasoning Agent | Random Agent |
696
+ |--------|:-:|:-:|
697
+ | **Success Rate** | {r_stats[0]:.0f}% | {rand_stats[0]:.0f}% |
698
+ | **Avg Reward** | {r_stats[1]:.1f} | {rand_stats[1]:.1f} |
699
+ | **Avg Final Metric** | {r_stats[2]:.3f} | {rand_stats[2]:.3f} |
700
+ | **Avg Steps** | {r_stats[3]:.1f} | {rand_stats[3]:.1f} |
701
+
702
+ ### Winner: **{winner}** 🏆
703
+ """
704
+ return report
705
+
706
+ except Exception as e:
707
+ return f"**Error:** {e}\n```\n{traceback.format_exc()}\n```"
708
+
709
+
710
+ # ---------------------------------------------------------------------------
711
+ # Build Gradio App
712
+ # ---------------------------------------------------------------------------
713
+
714
+ CUSTOM_CSS = """
715
+ /* Dark premium theme overrides */
716
+ .gradio-container {
717
+ max-width: 1200px !important;
718
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
719
+ }
720
+ .header-block {
721
+ text-align: center;
722
+ padding: 28px 20px 18px;
723
+ background: linear-gradient(135deg, #0f0c29 0%, #302b63 50%, #24243e 100%);
724
+ color: #fff;
725
+ border-radius: 14px;
726
+ margin-bottom: 18px;
727
+ border: 1px solid rgba(255,255,255,0.08);
728
+ }
729
+ .header-block h1 {
730
+ margin: 0 0 4px 0;
731
+ font-size: 2.2rem;
732
+ font-weight: 800;
733
+ background: linear-gradient(90deg, #a78bfa, #60a5fa, #34d399);
734
+ -webkit-background-clip: text;
735
+ -webkit-text-fill-color: transparent;
736
+ }
737
+ .header-block p {
738
+ margin: 4px 0 0;
739
+ opacity: 0.85;
740
+ font-size: 1.05rem;
741
+ }
742
+ .step-badge {
743
+ display: inline-block;
744
+ background: rgba(167,139,250,0.15);
745
+ border: 1px solid rgba(167,139,250,0.3);
746
+ border-radius: 6px;
747
+ padding: 2px 8px;
748
+ font-size: 0.85rem;
749
+ color: #a78bfa;
750
+ margin-right: 6px;
751
+ }
752
+ """
753
+
754
+
755
+ def create_demo():
756
+ """Create the full Gradio demo."""
757
+ try:
758
+ create_sample_papers()
759
+ except Exception:
760
+ pass
761
+
762
+ sim_runner = SimulationRunner()
763
+
764
+ with gr.Blocks(
765
+ title="ReproAgent - ML Paper Reproduction",
766
+ css=CUSTOM_CSS,
767
+ theme=gr.themes.Base(
768
+ primary_hue=gr.themes.colors.violet,
769
+ secondary_hue=gr.themes.colors.blue,
770
+ neutral_hue=gr.themes.colors.slate,
771
+ font=gr.themes.GoogleFont("Inter"),
772
+ ).set(
773
+ body_background_fill="#0f172a",
774
+ body_background_fill_dark="#0f172a",
775
+ block_background_fill="#1e293b",
776
+ block_background_fill_dark="#1e293b",
777
+ block_border_color="#334155",
778
+ block_label_text_color="#94a3b8",
779
+ block_title_text_color="#e2e8f0",
780
+ input_background_fill="#0f172a",
781
+ input_background_fill_dark="#0f172a",
782
+ button_primary_background_fill="linear-gradient(135deg, #7c3aed 0%, #2563eb 100%)",
783
+ button_primary_text_color="#ffffff",
784
+ ),
785
+ ) as demo:
786
+ # --- Header ---
787
+ gr.HTML("""
788
+ <div class="header-block">
789
+ <h1>ReproAgent</h1>
790
+ <p>AI Agent for Reproducing ML Research Papers</p>
791
+ <p style="font-size:0.85rem; opacity:0.6; margin-top:6px;">
792
+ Upload a PDF &rarr; Agent reads paper &rarr; Finds repo &rarr; Runs code &rarr; Debugs errors &rarr; Tunes hyperparameters &rarr; Compares results
793
+ </p>
794
+ </div>
795
+ """)
796
+
797
+ # --- API Endpoints (Hidden) ---
798
+ with gr.Group(visible=False):
799
+ easy_mode_input = gr.File(label="Easy Input")
800
+ easy_mode_output_text = gr.Textbox(label="Easy Text")
801
+ easy_mode_output_file = gr.File(label="Easy File")
802
+ easy_mode_btn = gr.Button("run_easy_mode")
803
+ easy_mode_btn.click(
804
+ fn=run_easy_mode,
805
+ inputs=[easy_mode_input],
806
+ outputs=[easy_mode_output_text, easy_mode_output_file],
807
+ api_name="run_easy_mode"
808
+ )
809
+
810
+ with gr.Tabs():
811
+ # ============================================================
812
+ # TAB 1 — Reproduce a Paper
813
+ # ============================================================
814
+ with gr.Tab("📄 Reproduce a Paper", id="tab_reproduce"):
815
+ gr.Markdown("### Provide a paper to reproduce")
816
+
817
+ with gr.Row():
818
+ with gr.Column(scale=1):
819
+ pdf_upload = gr.File(
820
+ label="Upload PDF",
821
+ type="filepath",
822
+ )
823
+ paper_url = gr.Textbox(
824
+ label="Or paste paper / arXiv URL",
825
+ placeholder="https://arxiv.org/abs/2301.xxxxx or https://arxiv.org/pdf/2301.xxxxx.pdf",
826
+ lines=1,
827
+ )
828
+
829
+ gr.Markdown("---")
830
+
831
+ with gr.Row():
832
+ use_llm_tab1 = gr.Checkbox(
833
+ value=True,
834
+ label="Use LLM (Groq)",
835
+ info="Uses Groq API for intelligent parsing",
836
+ )
837
+ exec_mode = gr.Radio(
838
+ choices=["Simulation", "Real Execution"],
839
+ value="Simulation",
840
+ label="Execution Mode",
841
+ info="Simulation is faster & safer",
842
+ )
843
+
844
+ with gr.Row():
845
+ max_steps_tab1 = gr.Slider(
846
+ minimum=10, maximum=100, value=30, step=5,
847
+ label="Max Steps",
848
+ )
849
+ clone_dir_tab1 = gr.Textbox(
850
+ label="Clone Directory (for Real Execution)",
851
+ placeholder="/tmp/reproagent",
852
+ value="/tmp/reproagent",
853
+ lines=1,
854
+ )
855
+
856
+ reproduce_btn = gr.Button(
857
+ "🚀 Start Reproduction",
858
+ variant="primary",
859
+ size="lg",
860
+ )
861
+
862
+ with gr.Column(scale=2):
863
+ with gr.Tabs():
864
+ with gr.Tab("📋 Agent Log"):
865
+ agent_log = gr.Markdown("*Upload a PDF or paste a URL to begin.*")
866
+
867
+ with gr.Tab("📄 Paper Info"):
868
+ paper_info_display = gr.Markdown("*Paper details will appear here.*")
869
+
870
+ with gr.Tab("📈 Metrics"):
871
+ metrics_display = gr.Code(language="json", label="Live Metrics")
872
+
873
+ with gr.Tab("🔍 State"):
874
+ state_display = gr.Code(language="json", label="Environment State")
875
+
876
+ reproduce_btn.click(
877
+ fn=run_paper_reproduction,
878
+ api_name="run_paper_reproduction",
879
+ inputs=[pdf_upload, paper_url, use_llm_tab1, max_steps_tab1, exec_mode, clone_dir_tab1],
880
+ outputs=[agent_log, paper_info_display, metrics_display, state_display],
881
+ )
882
+
883
+ # ============================================================
884
+ # TAB 2 — Simulation Demo
885
+ # ============================================================
886
+ with gr.Tab("🎮 Simulation Demo", id="tab_simulation"):
887
+ gr.Markdown(
888
+ "### Quick simulation with pre-loaded papers\n"
889
+ "Test the agent on built-in paper configurations without uploading anything."
890
+ )
891
+
892
+ with gr.Row():
893
+ with gr.Column(scale=1):
894
+ sim_difficulty = gr.Radio(
895
+ ["easy", "medium", "hard"],
896
+ value="easy",
897
+ label="Difficulty",
898
+ info="Easy: Clean repo | Medium: Needs debugging | Hard: No code",
899
+ )
900
+ sim_agent = gr.Radio(
901
+ ["reasoning", "random"],
902
+ value="reasoning",
903
+ label="Agent Type",
904
+ )
905
+ sim_steps = gr.Slider(10, 100, value=30, step=5, label="Max Steps")
906
+ sim_llm = gr.Checkbox(value=False, label="Use LLM")
907
+ sim_btn = gr.Button("🚀 Run Simulation", variant="primary", size="lg")
908
+
909
+ with gr.Column(scale=2):
910
+ with gr.Tabs():
911
+ with gr.Tab("📋 Summary"):
912
+ sim_summary = gr.Markdown("*Run a simulation to see results*")
913
+ with gr.Tab("📝 Step Log"):
914
+ sim_steplog = gr.Markdown("*Step logs appear here*")
915
+ with gr.Tab("📈 Metrics"):
916
+ sim_metrics = gr.Code(language="json", label="Metrics")
917
+ with gr.Tab("🔍 State"):
918
+ sim_state = gr.Code(language="json", label="State")
919
+
920
+ sim_btn.click(
921
+ fn=sim_runner.run_episode,
922
+ inputs=[sim_difficulty, sim_agent, sim_steps, sim_llm],
923
+ outputs=[sim_summary, sim_steplog, sim_metrics, sim_state],
924
+ )
925
+
926
+ # ============================================================
927
+ # TAB 3 — Benchmark
928
+ # ============================================================
929
+ with gr.Tab("📊 Benchmark", id="tab_benchmark"):
930
+ gr.Markdown(
931
+ "### Compare agents\n"
932
+ "Run multiple episodes and compare the Reasoning agent vs Random baseline."
933
+ )
934
+
935
+ with gr.Row():
936
+ bench_difficulty = gr.Radio(
937
+ ["easy", "medium", "hard"],
938
+ value="easy",
939
+ label="Difficulty",
940
+ )
941
+ bench_episodes = gr.Slider(
942
+ 2, 20, value=5, step=1,
943
+ label="Episodes per agent",
944
+ )
945
+ bench_btn = gr.Button("📊 Run Benchmark", variant="primary")
946
+
947
+ bench_result = gr.Markdown("*Click Run Benchmark to start*")
948
+
949
+ bench_btn.click(
950
+ fn=run_benchmark,
951
+ inputs=[bench_difficulty, bench_episodes],
952
+ outputs=[bench_result],
953
+ )
954
+
955
+ # Footer
956
+ gr.HTML("""
957
+ <div style="text-align:center; padding:16px; opacity:0.5; font-size:0.8rem; margin-top:12px;">
958
+ ReproAgent &mdash; AI Agent Hackathon 2024 &mdash;
959
+ Gymnasium / OpenEnv compatible &mdash;
960
+ Groq &bull; PyTorch &bull; Gradio
961
+ </div>
962
+ """)
963
+
964
+ return demo
965
+
966
+
967
+ # ---------------------------------------------------------------------------
968
+ # Entry point
969
+ # ---------------------------------------------------------------------------
970
+
971
+ if __name__ == "__main__":
972
+ demo = create_demo()
973
+ demo.launch(
974
+ server_name="localhost",
975
+ server_port=7860,
976
+ share=True,
977
+ show_error=True,
978
+ )
server/llm_handler.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import google.generativeai as genai
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ def get_gemini_client():
9
+ api_key = os.getenv("GEMINI_API_KEY")
10
+ if not api_key:
11
+ raise ValueError("GEMINI_API_KEY not found in environment variables")
12
+ genai.configure(api_key=api_key)
13
+ # Note: User specified gemini-2.5-flash, but we'll fallback to 1.5-flash if needed
14
+ try:
15
+ return genai.GenerativeModel('gemini-2.5-flash')
16
+ except:
17
+ return genai.GenerativeModel('gemini-pro')
18
+
19
+ def generate_summary_and_ppt_content(text: str):
20
+ """
21
+ Generates a summary and PPT structure from research paper text.
22
+ """
23
+ model = get_gemini_client()
24
+
25
+ prompt = f"""
26
+ Analyze the research paper and provide two things:
27
+ 1. A summary in a clean, structured format.
28
+ 2. A structured plan for an impressive PowerPoint presentation.
29
+
30
+ STRICT FORMATTING RULES FOR THE SUMMARY:
31
+ - Use clear section headings like: 1. Core Idea, 2. Background, etc.
32
+ - Do NOT use emojis.
33
+ - Do NOT use excessive bold formatting inside paragraphs.
34
+ - Only bold the section titles.
35
+ - Use bullet points (•) instead of long paragraphs.
36
+ - Keep sentences short and clear.
37
+ - Avoid decorative or marketing-style language.
38
+ - Keep it concise but informative.
39
+ - Do not use * at all.
40
+
41
+ SUMMARY STRUCTURE:
42
+ 1. Core Idea
43
+ 2. Background / Problem
44
+ 3. Key Observation
45
+ 4. Method (How it works)
46
+ 5. Results
47
+ 6. Contributions
48
+ 7. Limitations (if any)
49
+
50
+ Format your response as a valid JSON object. Ensure all strings (especially the 'description') are properly escaped for JSON (e.g., use \\n for newlines).
51
+
52
+ JSON structure:
53
+ {{
54
+ "description": "The summary following the formatting rules above",
55
+ "ppt_slides": [
56
+ {{
57
+ "title": "Slide Title",
58
+ "content": ["Key point 1", "Key point 2", ...]
59
+ }}
60
+ ]
61
+ }}
62
+
63
+ Research Paper Text:
64
+ {text[:30000]}
65
+ """
66
+
67
+
68
+ response = model.generate_content(prompt)
69
+
70
+ try:
71
+ # Clean the response to ensure it's valid JSON
72
+ content = response.text.strip()
73
+ if content.startswith("```json"):
74
+ content = content[7:-3].strip()
75
+ elif content.startswith("```"):
76
+ content = content[3:-3].strip()
77
+
78
+ # Use strict=False to be more lenient with control characters
79
+ return json.loads(content, strict=False)
80
+ except Exception as e:
81
+ print(f"Error parsing LLM response: {e}")
82
+ return {
83
+ "description": "Error generating description. Please try again.",
84
+ "ppt_slides": []
85
+ }
86
+
87
+ def analyze_installation_error(error_log: str, repo_structure: str):
88
+ """
89
+ Uses AI to analyze an installation error and suggest a fix.
90
+ """
91
+ model = get_gemini_client()
92
+
93
+ prompt = f"""
94
+ You are an expert DevOps and ML Engineer. A Python environment installation failed with the following error:
95
+
96
+ ERROR LOG:
97
+ {error_log[-2000:]}
98
+
99
+ REPOSITORY STRUCTURE:
100
+ {repo_structure}
101
+
102
+ Based on the error, provide a solution to fix the installation.
103
+ Format your response as a JSON object:
104
+ {{
105
+ "diagnosis": "Short explanation of what went wrong",
106
+ "action": "install_package" | "edit_requirements" | "change_python_version",
107
+ "command": "The exact command to run to fix it (if any)",
108
+ "file_to_edit": "path/to/file (if any)",
109
+ "new_content": "New content for the file (if any)"
110
+ }}
111
+ """
112
+
113
+ response = model.generate_content(prompt)
114
+ try:
115
+ content = response.text.strip()
116
+ if content.startswith("```json"):
117
+ content = content[7:-3].strip()
118
+ elif content.startswith("```"):
119
+ content = content[3:-3].strip()
120
+ return json.loads(content, strict=False)
121
+ except:
122
+ return None
123
+
124
+ def extract_execution_instructions(repo_structure: str, readme_text: str):
125
+ """
126
+ Asks AI to figure out how to run the evaluation/test script.
127
+ """
128
+ model = get_gemini_client()
129
+ prompt = f"""
130
+ Based on the repository structure and README, what is the exact command to run the evaluation or test script to verify the results?
131
+
132
+ STRUCTURE:
133
+ {repo_structure}
134
+
135
+ README SNIPPET:
136
+ {readme_text[:5000]}
137
+
138
+ Return a JSON object:
139
+ {{
140
+ "command": "python eval.py ...",
141
+ "explanation": "Why this command is selected"
142
+ }}
143
+ """
144
+ response = model.generate_content(prompt)
145
+ try:
146
+ content = response.text.strip()
147
+ if content.startswith("```json"):
148
+ content = content[7:-3].strip()
149
+ elif content.startswith("```"):
150
+ content = content[3:-3].strip()
151
+ return json.loads(content, strict=False)
152
+ except:
153
+ return {{"command": "python main.py", "explanation": "Fallback to main.py"}}
154
+
155
+ def extract_claimed_metrics(paper_text: str):
156
+ """
157
+ Extracts the main results reported in the paper.
158
+ """
159
+ model = get_gemini_client()
160
+ prompt = f"""
161
+ Extract the primary performance metrics (accuracy, F1, FID, etc.) reported in the following paper text.
162
+ Focus on the main results table.
163
+
164
+ TEXT:
165
+ {paper_text[:20000]}
166
+
167
+ Return a JSON object:
168
+ {{
169
+ "metrics": [
170
+ {{"name": "Accuracy", "value": "94.2%", "context": "ImageNet validation"}},
171
+ ...
172
+ ]
173
+ }}
174
+ """
175
+ response = model.generate_content(prompt)
176
+ try:
177
+ content = response.text.strip()
178
+ if content.startswith("```json"):
179
+ content = content[7:-3].strip()
180
+ elif content.startswith("```"):
181
+ content = content[3:-3].strip()
182
+ return json.loads(content, strict=False)
183
+ except:
184
+ return {{"metrics": []}}
185
+
186
+ def extract_metrics_from_logs(logs: str):
187
+ """
188
+ Parses execution logs to find resulting metrics.
189
+ """
190
+ model = get_gemini_client()
191
+ prompt = f"""
192
+ The following is the output log of a research paper's evaluation script.
193
+ Identify and extract the final performance metrics achieved.
194
+
195
+ LOGS:
196
+ {logs[-5000:]}
197
+
198
+ Return a JSON object:
199
+ {{
200
+ "metrics": [
201
+ {{"name": "Accuracy", "value": "93.8%"}},
202
+ ...
203
+ ]
204
+ }}
205
+ """
206
+ response = model.generate_content(prompt)
207
+ try:
208
+ content = response.text.strip()
209
+ if content.startswith("```json"):
210
+ content = content[7:-3].strip()
211
+ elif content.startswith("```"):
212
+ content = content[3:-3].strip()
213
+ return json.loads(content, strict=False)
214
+ except:
215
+ return {{"metrics": []}}
server/pdf_processor.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+
3
+ def extract_text_from_pdf(file_path: str) -> str:
4
+ """
5
+ Extracts text from a PDF file using PyMuPDF.
6
+ """
7
+ text = ""
8
+ try:
9
+ doc = fitz.open(file_path)
10
+ for page in doc:
11
+ text += page.get_text()
12
+ doc.close()
13
+ except Exception as e:
14
+ print(f"Error extracting text from PDF: {e}")
15
+ return ""
16
+
17
+ return text
server/ppt_generator.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pptx import Presentation
3
+ from pptx.util import Inches, Pt
4
+ from pptx.enum.text import PP_ALIGN
5
+ from pptx.dml.color import RGBColor
6
+
7
+ def create_ppt(slides_data, output_path):
8
+ """
9
+ Creates a premium, highly stylized PowerPoint presentation.
10
+ """
11
+ prs = Presentation()
12
+
13
+ # Define colors
14
+ bg_color = RGBColor(15, 23, 42) # #0F172A (Deep Navy)
15
+ accent_color = RGBColor(59, 130, 246) # #3B82F6 (Bright Blue)
16
+ text_color = RGBColor(248, 250, 252) # #F8FAFC (Near White)
17
+
18
+ def apply_slide_background(slide):
19
+ background = slide.background
20
+ fill = background.fill
21
+ fill.solid()
22
+ fill.fore_color.rgb = bg_color
23
+
24
+ def add_header_bar(slide):
25
+ # Add a decorative accent bar at the top
26
+ left = Inches(0)
27
+ top = Inches(0)
28
+ width = prs.slide_width
29
+ height = Inches(0.1)
30
+ shape = slide.shapes.add_shape(1, left, top, width, height) # 1 is Rectangle
31
+ shape.fill.solid()
32
+ shape.fill.fore_color.rgb = accent_color
33
+ shape.line.fill.background()
34
+
35
+ # 1. Title Slide
36
+ title_layout = prs.slide_layouts[0]
37
+ slide = prs.slides.add_slide(title_layout)
38
+ apply_slide_background(slide)
39
+ add_header_bar(slide)
40
+
41
+ title = slide.shapes.title
42
+ subtitle = slide.placeholders[1]
43
+
44
+ title.text = slides_data[0].get("title", "Research Presentation") if slides_data else "Research Presentation"
45
+ subtitle.text = "Generated by RepoAgent · Easy Mode"
46
+
47
+ # Style title
48
+ title.text_frame.paragraphs[0].font.color.rgb = text_color
49
+ title.text_frame.paragraphs[0].font.bold = True
50
+ subtitle.text_frame.paragraphs[0].font.color.rgb = accent_color
51
+
52
+ # 2. Content Slides
53
+ for slide_info in slides_data[1:]:
54
+ # Use a blank layout to have full control
55
+ blank_layout = prs.slide_layouts[6]
56
+ slide = prs.slides.add_slide(blank_layout)
57
+ apply_slide_background(slide)
58
+ add_header_bar(slide)
59
+
60
+ # Title
61
+ left = Inches(0.5)
62
+ top = Inches(0.4)
63
+ width = prs.slide_width - Inches(1)
64
+ height = Inches(1)
65
+
66
+ txBox = slide.shapes.add_textbox(left, top, width, height)
67
+ tf = txBox.text_frame
68
+ p = tf.paragraphs[0]
69
+ p.text = slide_info.get("title", "Key Point")
70
+ p.font.bold = True
71
+ p.font.size = Pt(32)
72
+ p.font.color.rgb = accent_color
73
+
74
+ # Content
75
+ left = Inches(0.5)
76
+ top = Inches(1.5)
77
+ width = prs.slide_width - Inches(1)
78
+ height = prs.slide_height - Inches(2)
79
+
80
+ contentBox = slide.shapes.add_textbox(left, top, width, height)
81
+ ctf = contentBox.text_frame
82
+ ctf.word_wrap = True
83
+
84
+ for idx, point in enumerate(slide_info.get("content", [])):
85
+ if idx == 0:
86
+ p = ctf.paragraphs[0]
87
+ else:
88
+ p = ctf.add_paragraph()
89
+
90
+ p.text = f"• {point}"
91
+ p.font.size = Pt(18)
92
+ p.font.color.rgb = text_color
93
+ p.space_after = Pt(10)
94
+
95
+ # Save the presentation
96
+ prs.save(output_path)
97
+ return output_path