Spaces:

sammoftah
/

paper-to-code

Sleeping

App Files Files Community

paper-to-code / app.py

sammoftah

Deploy Paper to Code

46ba771 verified 15 days ago

raw

history blame contribute delete

7.7 kB

	"""
	Paper to Code
	Turn a method description into an implementation plan, PyTorch scaffold, and reproducibility checklist.
	"""

	import json
	import os
	import re
	import sys
	from textwrap import dedent
	from typing import Dict

	import gradio as gr

	sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
	from shared.components import create_footer, create_method_panel, create_premium_hero


	try:
	from huggingface_hub import InferenceClient
	except Exception: # pragma: no cover - optional dependency
	InferenceClient = None


	SAMPLE_METHOD = """We introduce a retrieval-augmented classifier for support tickets.
	Each ticket is embedded with a sentence-transformer, nearest historical cases are retrieved,
	and a lightweight cross-encoder reranks them before the final label is produced.
	The model reports confidence, top evidence snippets, and an abstain decision when evidence is weak."""


	def _extract_keywords(text: str):
	words = re.findall(r"[A-Za-z][A-Za-z\-]{3,}", text.lower())
	stop = {"with", "that", "this", "from", "before", "after", "when", "model", "method", "using"}
	counts = {}
	for word in words:
	if word not in stop:
	counts[word] = counts.get(word, 0) + 1
	return [word for word, _ in sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:8]]


	def _hf_plan(method_text: str, target: str) -> Dict[str, str]:
	token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	if not token or InferenceClient is None:
	return {}

	prompt = f"""
	You are an ML engineer converting papers into clean implementation plans.
	Return JSON with keys: summary, modules, code, checklist.
	Target artifact: {target}
	Method text:
	{method_text}
	"""
	client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=token)
	response = client.text_generation(prompt, max_new_tokens=900, temperature=0.25)
	match = re.search(r"\{[\s\S]*\}", response)
	if not match:
	return {}
	try:
	parsed = json.loads(match.group(0))
	except json.JSONDecodeError:
	return {}
	return {key: str(parsed.get(key, "")).strip() for key in ["summary", "modules", "code", "checklist"]}


	def _fallback_plan(method_text: str, target: str) -> Dict[str, str]:
	keywords = _extract_keywords(method_text)
	technique = ", ".join(keywords[:5]) or "model pipeline"
	summary = (
	f"This looks like a {technique} workflow. The safest implementation path is to "
	"separate data preparation, model logic, inference, and evaluation so each claim can be tested."
	)
	modules = dedent(f"""
	1. `data.py` - parse examples, labels, and evidence fields.
	2. `model.py` - implement the core method as a small, testable module.
	3. `retrieve.py` - isolate nearest-neighbor or feature lookup logic if retrieval is involved.
	4. `train.py` - keep hyperparameters explicit and serializable.
	5. `evaluate.py` - report task metric, calibration, and failure cases.

	Detected method signals: {", ".join(keywords) if keywords else "none"}
	""").strip()

	code = dedent('''
	import torch
	from torch import nn


	class PaperModule(nn.Module):
	"""Minimal scaffold generated from the method description."""

	def __init__(self, encoder: nn.Module, hidden_size: int, num_labels: int):
	super().__init__()
	self.encoder = encoder
	self.classifier = nn.Sequential(
	nn.Linear(hidden_size, hidden_size),
	nn.GELU(),
	nn.Dropout(0.1),
	nn.Linear(hidden_size, num_labels),
	)

	def forward(self, inputs, evidence=None):
	encoded = self.encoder(**inputs)
	pooled = encoded.last_hidden_state[:, 0]
	logits = self.classifier(pooled)
	confidence = torch.softmax(logits, dim=-1).max(dim=-1).values
	return {"logits": logits, "confidence": confidence, "evidence": evidence}
	''').strip()

	if target == "Evaluation Harness":
	code = dedent('''
	from sklearn.metrics import accuracy_score, f1_score


	def evaluate(predictions, labels, confidences, abstain_threshold=0.55):
	keep = [score >= abstain_threshold for score in confidences]
	covered_preds = [pred for pred, ok in zip(predictions, keep) if ok]
	covered_labels = [label for label, ok in zip(labels, keep) if ok]
	return {
	"coverage": sum(keep) / max(1, len(keep)),
	"accuracy": accuracy_score(covered_labels, covered_preds) if covered_preds else 0.0,
	"macro_f1": f1_score(covered_labels, covered_preds, average="macro") if covered_preds else 0.0,
	}
	''').strip()

	checklist = dedent("""
	- Define the exact dataset split and leakage checks.
	- Log every hyperparameter needed to reproduce the run.
	- Add one baseline that is simpler than the proposed method.
	- Report both aggregate metrics and 5-10 qualitative failures.
	- Save model card notes: intended use, limitations, and ethical risks.
	""").strip()
	return {"summary": summary, "modules": modules, "code": code, "checklist": checklist}


	def generate_scaffold(method_text: str, target: str):
	if not method_text or len(method_text.strip()) < 40:
	return "Paste at least a paragraph of method text.", "", "", ""

	plan = _hf_plan(method_text, target) or _fallback_plan(method_text, target)
	return plan["summary"], plan["modules"], plan["code"], plan["checklist"]


	with gr.Blocks(title="Paper to Code", theme=gr.themes.Soft()) as app:
	create_premium_hero(
	"Paper to Code",
	"Convert a paper method into a reproducible engineering scaffold: modules, PyTorch code, evaluation harness, and checklist.",
	"🧪",
	badge="Research Engineering",
	highlights=["Method parsing", "PyTorch scaffold", "Reproducibility checklist"],
	)
	create_method_panel({
	"Technique": "Paper implementation planning with optional Hugging Face inference.",
	"What it proves": "You can translate research claims into testable software boundaries.",
	"HF capability": "Use HF-hosted LLMs when `HF_TOKEN` is available; fall back locally otherwise.",
	})

	with gr.Row():
	with gr.Column(scale=1):
	method_input = gr.Textbox(
	label="Paper method or abstract",
	value=SAMPLE_METHOD,
	lines=12,
	placeholder="Paste the method section, abstract, or algorithm summary...",
	)
	target = gr.Radio(
	["PyTorch Module", "Training Loop", "Evaluation Harness", "Experiment Checklist"],
	value="PyTorch Module",
	label="Target artifact",
	)
	generate_btn = gr.Button("Generate Research Scaffold", variant="primary")
	with gr.Column(scale=1):
	summary = gr.Markdown(label="Technique extraction")
	modules = gr.Markdown(label="Implementation plan")

	code = gr.Code(label="Code scaffold", language="python", lines=18)
	checklist = gr.Markdown(label="Reproducibility checklist")

	generate_btn.click(generate_scaffold, inputs=[method_input, target], outputs=[summary, modules, code, checklist])

	gr.Markdown("""
	## Why This Is Useful

	Many AI demos stop at summarizing papers. This Space shows the engineering step after reading: identify components, define test boundaries, scaffold code, and make reproducibility explicit.
	""")
	create_footer("Paper to Code")


	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0", server_port=7860)