dunkindonuts123 commited on
Commit
4465cb6
·
0 Parent(s):

NLP java summarization experiment

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .venv/
6
+ venv/
7
+ env/
8
+
9
+ # Secrets
10
+ .env
11
+ .env.*
12
+
13
+ # OS / IDE
14
+ .DS_Store
15
+ .idea/
16
+ .vscode/
17
+
18
+ # Jupyter
19
+ .ipynb_checkpoints/
20
+
21
+ # Generated experiment outputs
22
+ per_sample_results.csv
23
+ output.png
24
+ rouge_comparison.png
25
+
26
+ # App cache (rebuilt on first run)
27
+ cache/
aggregate_comparison.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Tier,Type,ROUGE-1,ROUGE-2,ROUGE-L
2
+ CodeT5 ⚑,Code-specific fine-tuned,Abstractive,0.37223313234517386,0.15107941059535412,0.3433441352676735
3
+ TF-IDF,Corpus-fitted extractive,Extractive,0.11096833984563331,0.009263625959092267,0.09634245619789128
4
+ LexRank,Corpus-fitted extractive,Extractive,0.10392705338524205,0.007088415587196533,0.0920576916692153
5
+ SentenceTransformers,General-language pretrained,Extractive,0.10231463767555077,0.007377401336714412,0.09048400648023576
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI web app — Java README summarization comparison."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from contextlib import asynccontextmanager
7
+ from fastapi import FastAPI, File, HTTPException, Request, UploadFile
8
+ from fastapi.responses import HTMLResponse, JSONResponse
9
+ from fastapi.staticfiles import StaticFiles
10
+ from fastapi.templating import Jinja2Templates
11
+
12
+ from engine.pipeline import MODEL_CATALOG, SummarizationPipeline
13
+
14
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(name)s: %(message)s")
15
+ logger = logging.getLogger(__name__)
16
+
17
+ pipeline = SummarizationPipeline(top_n=5)
18
+ templates = Jinja2Templates(directory="templates")
19
+
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(_: FastAPI):
23
+ logger.info("Starting model load (first run may download datasets and weights) ...")
24
+ try:
25
+ pipeline.load()
26
+ except Exception as exc:
27
+ logger.error("Startup load failed: %s", exc)
28
+ yield
29
+
30
+
31
+ app = FastAPI(
32
+ title="Auto-README Java Summarizer",
33
+ description="Compare four summarization models on Java source files.",
34
+ lifespan=lifespan,
35
+ )
36
+ app.mount("/static", StaticFiles(directory="static"), name="static")
37
+
38
+
39
+ @app.get("/", response_class=HTMLResponse)
40
+ async def index(request: Request):
41
+ return templates.TemplateResponse(
42
+ request=request,
43
+ name="index.html",
44
+ context={
45
+ "models": MODEL_CATALOG,
46
+ "ready": pipeline.ready,
47
+ "loading": pipeline.loading,
48
+ "error": pipeline.error,
49
+ },
50
+ )
51
+
52
+
53
+ @app.get("/api/health")
54
+ async def health():
55
+ return {
56
+ "ready": pipeline.ready,
57
+ "loading": pipeline.loading,
58
+ "error": pipeline.error,
59
+ "models": MODEL_CATALOG,
60
+ }
61
+
62
+
63
+ @app.post("/api/summarize")
64
+ async def summarize(file: UploadFile = File(...)):
65
+ if not pipeline.ready:
66
+ if pipeline.loading:
67
+ raise HTTPException(status_code=503, detail="Models are still loading. Try again shortly.")
68
+ raise HTTPException(
69
+ status_code=503,
70
+ detail=pipeline.error or "Models failed to load. Check server logs.",
71
+ )
72
+
73
+ filename = file.filename or "upload.java"
74
+ if not filename.lower().endswith(".java"):
75
+ raise HTTPException(status_code=400, detail="Please upload a .java file.")
76
+
77
+ raw = await file.read()
78
+ try:
79
+ java_source = raw.decode("utf-8")
80
+ except UnicodeDecodeError as exc:
81
+ raise HTTPException(status_code=400, detail="File must be UTF-8 encoded text.") from exc
82
+
83
+ if not java_source.strip():
84
+ raise HTTPException(status_code=400, detail="File is empty.")
85
+
86
+ try:
87
+ result = pipeline.compare(java_source, filename=filename)
88
+ except ValueError as exc:
89
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
90
+ except Exception as exc:
91
+ logger.exception("Summarization failed")
92
+ raise HTTPException(status_code=500, detail="Summarization failed.") from exc
93
+
94
+ return JSONResponse({
95
+ "filename": result.filename,
96
+ "char_count": result.char_count,
97
+ "token_count": result.token_count,
98
+ "statement_count": result.statement_count,
99
+ "method_count": result.method_count,
100
+ "top_n": result.top_n,
101
+ "total_elapsed_ms": round(result.total_elapsed_ms, 1),
102
+ "summaries": [
103
+ {
104
+ "model_id": s.model_id,
105
+ "model": s.model,
106
+ "tier": s.tier,
107
+ "approach": s.approach,
108
+ "accent": s.accent,
109
+ "summary": s.summary,
110
+ "elapsed_ms": round(s.elapsed_ms, 1),
111
+ "methods": [
112
+ {"name": m.name, "summary": m.summary}
113
+ for m in s.methods
114
+ ],
115
+ }
116
+ for s in result.summaries
117
+ ],
118
+ })
engine/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
engine/models.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Summarization models — ported from readme_summarization_experiment.ipynb."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import re
7
+ from collections import Counter, defaultdict
8
+
9
+ import numpy as np
10
+ import torch
11
+ from huggingface_hub import hf_hub_download
12
+ from sentence_transformers import SentenceTransformer
13
+ from tokenizers import ByteLevelBPETokenizer
14
+ from transformers import AutoModelForSeq2SeqLM
15
+
16
+ from engine.preprocessing import tokenize
17
+
18
+
19
+ class TFIDFModel:
20
+ """Extractive summarizer using TF-IDF sentence scoring."""
21
+
22
+ def __init__(self) -> None:
23
+ self.idf: dict[str, float] = {}
24
+ self.N = 0
25
+
26
+ def fit(self, corpus: list[str]) -> TFIDFModel:
27
+ n = len(corpus)
28
+ df: dict[str, int] = defaultdict(int)
29
+ for sent in corpus:
30
+ for term in set(tokenize(sent)):
31
+ df[term] += 1
32
+ self.idf = {
33
+ term: math.log((n + 1) / (freq + 1)) + 1
34
+ for term, freq in df.items()
35
+ }
36
+ self.N = n
37
+ return self
38
+
39
+ def load_idf(self, idf: dict[str, float], n: int) -> TFIDFModel:
40
+ self.idf = idf
41
+ self.N = n
42
+ return self
43
+
44
+ def _score(self, sentence: str) -> float:
45
+ tokens = tokenize(sentence)
46
+ if not tokens:
47
+ return 0.0
48
+ tf = Counter(tokens)
49
+ return sum(tf[t] / len(tokens) * self.idf.get(t, 1.0) for t in tf)
50
+
51
+ def summarize(self, sentences: list[str], top_n: int = 1) -> list[str]:
52
+ if not sentences:
53
+ return [""]
54
+ scored = sorted(sentences, key=self._score, reverse=True)
55
+ return scored[:top_n]
56
+
57
+
58
+ class LexRankModel:
59
+ """Graph-based extractive summarizer (LexRank)."""
60
+
61
+ THRESHOLD = 0.1
62
+ DAMPING = 0.85
63
+ MAX_ITER = 100
64
+ TOL = 1e-6
65
+
66
+ def __init__(self) -> None:
67
+ self.idf: dict[str, float] = {}
68
+
69
+ def fit(self, corpus: list[str]) -> LexRankModel:
70
+ n = len(corpus)
71
+ df: dict[str, int] = defaultdict(int)
72
+ for sent in corpus:
73
+ for term in set(tokenize(sent)):
74
+ df[term] += 1
75
+ self.idf = {
76
+ term: math.log((n + 1) / (freq + 1)) + 1
77
+ for term, freq in df.items()
78
+ }
79
+ return self
80
+
81
+ def load_idf(self, idf: dict[str, float]) -> LexRankModel:
82
+ self.idf = idf
83
+ return self
84
+
85
+ def _tfidf_vec(self, sentence: str) -> dict[str, float]:
86
+ tokens = tokenize(sentence)
87
+ if not tokens:
88
+ return {}
89
+ tf = Counter(tokens)
90
+ return {t: (tf[t] / len(tokens)) * self.idf.get(t, 1.0) for t in tf}
91
+
92
+ @staticmethod
93
+ def _cosine(a: dict[str, float], b: dict[str, float]) -> float:
94
+ common = set(a) & set(b)
95
+ if not common:
96
+ return 0.0
97
+ dot = sum(a[t] * b[t] for t in common)
98
+ norm_a = math.sqrt(sum(v ** 2 for v in a.values()))
99
+ norm_b = math.sqrt(sum(v ** 2 for v in b.values()))
100
+ if norm_a == 0 or norm_b == 0:
101
+ return 0.0
102
+ return dot / (norm_a * norm_b)
103
+
104
+ def _pagerank(self, matrix: np.ndarray) -> np.ndarray:
105
+ n = len(matrix)
106
+ row_sums = matrix.sum(axis=1, keepdims=True)
107
+ row_sums[row_sums == 0] = 1
108
+ p = matrix / row_sums
109
+ scores = np.ones(n) / n
110
+ for _ in range(self.MAX_ITER):
111
+ new_scores = (1 - self.DAMPING) / n + self.DAMPING * p.T @ scores
112
+ if np.abs(new_scores - scores).sum() < self.TOL:
113
+ break
114
+ scores = new_scores
115
+ return scores
116
+
117
+ def summarize(self, sentences: list[str], top_n: int = 1) -> list[str]:
118
+ if len(sentences) == 1:
119
+ return sentences[:top_n]
120
+ vecs = [self._tfidf_vec(s) for s in sentences]
121
+ n = len(sentences)
122
+ sim = np.zeros((n, n))
123
+ for i in range(n):
124
+ for j in range(i + 1, n):
125
+ c = self._cosine(vecs[i], vecs[j])
126
+ if c >= self.THRESHOLD:
127
+ sim[i, j] = sim[j, i] = c
128
+ if sim.sum() == 0:
129
+ scored = sorted(range(n), key=lambda i: sum(vecs[i].values()), reverse=True)
130
+ return [sentences[i] for i in scored[:top_n]]
131
+ scores = self._pagerank(sim)
132
+ ranked = np.argsort(scores)[::-1]
133
+ return [sentences[i] for i in ranked[:top_n]]
134
+
135
+
136
+ class SentenceTransformerModel:
137
+ """Extractive summarizer using frozen sentence embeddings."""
138
+
139
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2") -> None:
140
+ self.model = SentenceTransformer(model_name)
141
+
142
+ def summarize(self, sentences: list[str], top_n: int = 1) -> list[str]:
143
+ if not sentences:
144
+ return [""]
145
+ embeddings = self.model.encode(sentences, convert_to_numpy=True)
146
+ centroid = embeddings.mean(axis=0)
147
+ norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
148
+ norms[norms == 0] = 1
149
+ sims = (embeddings / norms) @ (centroid / (np.linalg.norm(centroid) + 1e-9))
150
+ ranked = np.argsort(sims)[::-1]
151
+ return [sentences[i] for i in ranked[:top_n]]
152
+
153
+
154
+ class CodeT5Model:
155
+ """Abstractive summarizer using CodeT5 fine-tuned on Java."""
156
+
157
+ MODEL_NAME = "Salesforce/codet5-base-codexglue-sum-java"
158
+ VOCAB_REPO = "Salesforce/codet5-base"
159
+ _SPECIAL_TOKENS = ("<pad>", "<s>", "</s>", "<unk>", "<mask>")
160
+
161
+ def __init__(self) -> None:
162
+ vocab_file = hf_hub_download(self.VOCAB_REPO, "vocab.json")
163
+ merges_file = hf_hub_download(self.VOCAB_REPO, "merges.txt")
164
+ self.tokenizer = ByteLevelBPETokenizer(vocab_file, merges_file)
165
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.MODEL_NAME)
166
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
167
+ self.model.to(self.device)
168
+ self.model.eval()
169
+
170
+ def _clean(self, text: str) -> str:
171
+ for tok in self._SPECIAL_TOKENS:
172
+ text = text.replace(tok, " ")
173
+ text = re.sub(r"<extra_id_\d+>", " ", text)
174
+ return re.sub(r"\s+", " ", text).strip()
175
+
176
+ def summarize(self, raw_code: str) -> str:
177
+ if not raw_code or not raw_code.strip():
178
+ return ""
179
+
180
+ ids = self.tokenizer.encode(raw_code).ids[:256]
181
+ input_ids = torch.tensor([ids], device=self.device)
182
+ attention = torch.ones_like(input_ids)
183
+
184
+ with torch.no_grad():
185
+ output_ids = self.model.generate(
186
+ input_ids=input_ids,
187
+ attention_mask=attention,
188
+ max_new_tokens=48,
189
+ num_beams=4,
190
+ early_stopping=True,
191
+ no_repeat_ngram_size=3,
192
+ )
193
+
194
+ decoded = self.tokenizer.decode(output_ids[0].tolist(), skip_special_tokens=False)
195
+ return self._clean(decoded)
engine/pipeline.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Load models once and run four-way Java summarization comparisons."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import pickle
7
+ import time
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ from datasets import concatenate_datasets, load_dataset
12
+
13
+ from engine.models import CodeT5Model, LexRankModel, SentenceTransformerModel, TFIDFModel
14
+ from engine.preprocessing import split_code_statements, split_java_methods
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ CACHE_DIR = Path(__file__).resolve().parent.parent / "cache"
19
+ IDF_CACHE = CACHE_DIR / "idf_weights_train_val.pkl"
20
+ DATASET = "google/code_x_glue_ct_code_to_text"
21
+ FIT_SPLITS = ("train", "validation")
22
+
23
+
24
+ @dataclass
25
+ class MethodSummary:
26
+ name: str
27
+ summary: str
28
+
29
+
30
+ @dataclass
31
+ class ModelSummary:
32
+ model_id: str
33
+ model: str
34
+ tier: str
35
+ approach: str
36
+ accent: str
37
+ summary: str
38
+ elapsed_ms: float
39
+ methods: list[MethodSummary]
40
+
41
+
42
+ @dataclass
43
+ class ComparisonResult:
44
+ filename: str
45
+ char_count: int
46
+ token_count: int
47
+ statement_count: int
48
+ method_count: int
49
+ top_n: int
50
+ summaries: list[ModelSummary]
51
+ total_elapsed_ms: float
52
+
53
+
54
+ MODEL_CATALOG = [
55
+ {
56
+ "id": "tfidf",
57
+ "name": "TF-IDF",
58
+ "glyph": "TF",
59
+ "accent": "#2dd4bf",
60
+ "family": "Statistical · Bag-of-words",
61
+ "tier": "Corpus-fitted extractive",
62
+ "approach": "Extractive",
63
+ "input": "Statement fragments",
64
+ "checkpoint": None,
65
+ "description": "Scores each code statement by TF-IDF using IDF weights fitted on the CodeXGLUE Java train + validation corpus.",
66
+ "tagline": "Picks statements packed with rare, high-signal terms.",
67
+ "steps": [
68
+ "Fit inverse-document-frequency (IDF) weights over the Java train + validation corpus.",
69
+ "Tokenize each statement: split identifiers, lowercase, drop stopwords.",
70
+ "Score every statement as the sum of term-frequency x IDF.",
71
+ "Return the top-N highest-scoring statements as the summary.",
72
+ ],
73
+ "strengths": ["Fast and fully offline", "Interpretable scores", "No GPU required"],
74
+ "limitations": ["Output is code-like, not prose", "Ignores word order and context"],
75
+ "speed": "Instant",
76
+ },
77
+ {
78
+ "id": "lexrank",
79
+ "name": "LexRank",
80
+ "glyph": "LR",
81
+ "accent": "#38bdf8",
82
+ "family": "Graph · Centrality",
83
+ "tier": "Corpus-fitted extractive",
84
+ "approach": "Extractive",
85
+ "input": "Statement fragments",
86
+ "checkpoint": None,
87
+ "description": "Builds a similarity graph over statements and runs PageRank to pick the most central fragments.",
88
+ "tagline": "Selects statements most representative of the whole file.",
89
+ "steps": [
90
+ "Build a TF-IDF vector for each statement using shared corpus IDF.",
91
+ "Compute pairwise cosine similarity to form a statement graph.",
92
+ "Threshold weak edges, then run PageRank over the graph.",
93
+ "Return the most central (highest-ranked) statements.",
94
+ ],
95
+ "strengths": ["Captures redundancy / centrality", "Offline and interpretable", "Robust on longer files"],
96
+ "limitations": ["Needs several statements to rank", "Still extractive, not generative"],
97
+ "speed": "Fast",
98
+ },
99
+ {
100
+ "id": "sentence_transformers",
101
+ "name": "SentenceTransformers",
102
+ "glyph": "ST",
103
+ "accent": "#a78bfa",
104
+ "family": "Neural · Sentence embeddings",
105
+ "tier": "General-language pretrained",
106
+ "approach": "Extractive",
107
+ "input": "Statement fragments",
108
+ "checkpoint": "sentence-transformers/all-MiniLM-L6-v2",
109
+ "description": "Encodes statements with all-MiniLM-L6-v2 and selects those closest to the centroid embedding.",
110
+ "tagline": "Uses semantic meaning to find the most central statements.",
111
+ "steps": [
112
+ "Embed each statement with the all-MiniLM-L6-v2 transformer.",
113
+ "Average the embeddings into a single centroid vector.",
114
+ "Rank statements by cosine similarity to the centroid.",
115
+ "Return the statements closest to the semantic center.",
116
+ ],
117
+ "strengths": ["Understands English semantics", "Order-aware encoder", "No corpus fitting needed"],
118
+ "limitations": ["Pretrained on prose, not code", "Heavier than TF-IDF/LexRank"],
119
+ "speed": "Moderate",
120
+ },
121
+ {
122
+ "id": "codet5",
123
+ "name": "CodeT5",
124
+ "glyph": "T5",
125
+ "accent": "#f59e0b",
126
+ "family": "Transformer · Seq2seq",
127
+ "tier": "Code-specific fine-tuned",
128
+ "approach": "Abstractive",
129
+ "input": "Per-method Java source (256-token window each)",
130
+ "checkpoint": "Salesforce/codet5-base-codexglue-sum-java",
131
+ "description": "Generates natural-language summaries from raw Java source using a CodeT5 checkpoint fine-tuned on CodeXGLUE.",
132
+ "tagline": "Writes a fresh English sentence describing the code.",
133
+ "steps": [
134
+ "Split the file into individual Java methods.",
135
+ "Byte-level BPE tokenize each method (first 256 tokens).",
136
+ "Decode with beam search — one English sentence per method, same as evaluation.",
137
+ "Show each method summary separately in the results view.",
138
+ ],
139
+ "strengths": ["True natural-language output", "Fine-tuned on Java code-comment pairs", "Best quality summaries"],
140
+ "limitations": ["Slow on CPU", "256-token input limit", "Can hallucinate details"],
141
+ "speed": "Slowest",
142
+ },
143
+ ]
144
+
145
+
146
+ class SummarizationPipeline:
147
+ def __init__(self, top_n: int = 5) -> None:
148
+ self.top_n = top_n
149
+ self.ready = False
150
+ self.loading = False
151
+ self.error: str | None = None
152
+ self.tfidf: TFIDFModel | None = None
153
+ self.lexrank: LexRankModel | None = None
154
+ self.st_model: SentenceTransformerModel | None = None
155
+ self.codet5: CodeT5Model | None = None
156
+
157
+ def load(self) -> None:
158
+ if self.ready or self.loading:
159
+ return
160
+ self.loading = True
161
+ self.error = None
162
+ try:
163
+ idf, n = self._load_or_build_idf()
164
+ logger.info("Fitting TF-IDF and LexRank from cached IDF (%d terms)", len(idf))
165
+ self.tfidf = TFIDFModel().load_idf(idf, n)
166
+ self.lexrank = LexRankModel().load_idf(idf)
167
+
168
+ logger.info("Loading SentenceTransformers ...")
169
+ self.st_model = SentenceTransformerModel()
170
+
171
+ logger.info("Loading CodeT5 ...")
172
+ self.codet5 = CodeT5Model()
173
+
174
+ self.ready = True
175
+ logger.info("Pipeline ready.")
176
+ except Exception as exc:
177
+ self.error = str(exc)
178
+ logger.exception("Failed to load summarization pipeline")
179
+ raise
180
+ finally:
181
+ self.loading = False
182
+
183
+ def _load_or_build_idf(self) -> tuple[dict[str, float], int]:
184
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
185
+ if IDF_CACHE.exists():
186
+ logger.info("Loading IDF cache from %s", IDF_CACHE)
187
+ with IDF_CACHE.open("rb") as f:
188
+ payload = pickle.load(f)
189
+ return payload["idf"], payload["N"]
190
+
191
+ logger.info("Building IDF from %s (%s) ...", DATASET, " + ".join(FIT_SPLITS))
192
+ dataset = load_dataset(DATASET, "java")
193
+ fit_data = concatenate_datasets([dataset[split] for split in FIT_SPLITS])
194
+ fit_corpus: list[str] = []
195
+ for row in fit_data:
196
+ fit_corpus.extend(split_code_statements(row["code"]))
197
+
198
+ tfidf = TFIDFModel().fit(fit_corpus)
199
+ with IDF_CACHE.open("wb") as f:
200
+ pickle.dump({"idf": tfidf.idf, "N": tfidf.N}, f)
201
+ logger.info(
202
+ "Cached IDF weights (%d terms, %d statements from %d methods)",
203
+ len(tfidf.idf),
204
+ len(fit_corpus),
205
+ len(fit_data),
206
+ )
207
+ return tfidf.idf, tfidf.N
208
+
209
+ def compare(self, java_source: str, filename: str = "upload.java") -> ComparisonResult:
210
+ if not self.ready:
211
+ raise RuntimeError("Pipeline is not ready. Call load() first.")
212
+
213
+ source = java_source.strip()
214
+ if not source:
215
+ raise ValueError("Java source is empty.")
216
+
217
+ statements = split_code_statements(source)
218
+ java_methods = split_java_methods(source)
219
+ summaries: list[ModelSummary] = []
220
+ t_total = time.perf_counter()
221
+ catalog_by_id = {m["id"]: m for m in MODEL_CATALOG}
222
+
223
+ extractive_runners = [
224
+ ("tfidf", lambda: " ".join(self.tfidf.summarize(statements, self.top_n))),
225
+ ("lexrank", lambda: " ".join(self.lexrank.summarize(statements, self.top_n))),
226
+ (
227
+ "sentence_transformers",
228
+ lambda: " ".join(self.st_model.summarize(statements, self.top_n)),
229
+ ),
230
+ ]
231
+
232
+ for model_id, run in extractive_runners:
233
+ meta = catalog_by_id[model_id]
234
+ t0 = time.perf_counter()
235
+ text = run()
236
+ summaries.append(ModelSummary(
237
+ model_id=model_id,
238
+ model=meta["name"],
239
+ tier=meta["tier"],
240
+ approach=meta["approach"],
241
+ accent=meta["accent"],
242
+ summary=text,
243
+ elapsed_ms=(time.perf_counter() - t0) * 1000,
244
+ methods=[],
245
+ ))
246
+
247
+ codet5_meta = catalog_by_id["codet5"]
248
+ t0 = time.perf_counter()
249
+ codet5_methods: list[MethodSummary] = []
250
+ for method in java_methods:
251
+ codet5_methods.append(MethodSummary(
252
+ name=method["name"],
253
+ summary=self.codet5.summarize(method["code"]),
254
+ ))
255
+ codet5_combined = "\n".join(
256
+ m.summary.strip() for m in codet5_methods if m.summary.strip()
257
+ )
258
+ summaries.append(ModelSummary(
259
+ model_id="codet5",
260
+ model=codet5_meta["name"],
261
+ tier=codet5_meta["tier"],
262
+ approach=codet5_meta["approach"],
263
+ accent=codet5_meta["accent"],
264
+ summary=codet5_combined,
265
+ elapsed_ms=(time.perf_counter() - t0) * 1000,
266
+ methods=codet5_methods,
267
+ ))
268
+
269
+ return ComparisonResult(
270
+ filename=filename,
271
+ char_count=len(source),
272
+ token_count=len(source.split()),
273
+ statement_count=len(statements),
274
+ method_count=len(java_methods),
275
+ top_n=self.top_n,
276
+ summaries=summaries,
277
+ total_elapsed_ms=(time.perf_counter() - t_total) * 1000,
278
+ )
engine/preprocessing.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ MIN_FRAGMENT_TOKENS = 3
6
+
7
+ STOPWORDS = {
8
+ "public", "private", "protected", "void", "new", "null", "int", "string",
9
+ "static", "final", "return", "class", "this", "super", "true", "false",
10
+ "boolean", "long", "double", "float", "byte", "char", "short", "object",
11
+ "list", "map", "set", "if", "else", "for", "while", "do", "try", "catch",
12
+ "finally", "throw", "throws", "import", "package", "extends", "implements",
13
+ "instanceof", "interface", "abstract", "synchronized", "volatile",
14
+ "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "of",
15
+ "is", "are", "was", "were", "be", "been", "being", "have", "has", "had",
16
+ "do", "does", "did", "will", "would", "could", "should", "may", "might",
17
+ "it", "its", "with", "for", "not", "by", "from", "as", "that", "this",
18
+ "which", "who", "when", "where", "how", "all", "each", "both", "more",
19
+ "s", "e",
20
+ }
21
+
22
+ _SUBWORD_RE = re.compile(r"[A-Z]+(?=[A-Z][a-z])|[A-Z]?[a-z]+|[A-Z]+|\d+")
23
+
24
+
25
+ def split_identifier(token: str) -> list[str]:
26
+ out: list[str] = []
27
+ for part in re.split(r"[_\s]+", token):
28
+ out.extend(_SUBWORD_RE.findall(part))
29
+ return [w.lower() for w in out if w]
30
+
31
+
32
+ def tokenize(text: str) -> list[str]:
33
+ """Sub-tokenize identifiers, lowercase, and drop stopwords."""
34
+ toks: list[str] = []
35
+ for raw in text.split():
36
+ for sub in split_identifier(raw):
37
+ if sub and sub not in STOPWORDS:
38
+ toks.append(sub)
39
+ return toks
40
+
41
+
42
+ _METHOD_HEAD = re.compile(
43
+ r"(?m)^[ \t]*"
44
+ r"(?:@(?:\w+\.)*\w+(?:\([^)]*\))?\s+)*"
45
+ r"(?:(?:public|private|protected)\s+)?"
46
+ r"(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?"
47
+ r"(?:<[^>]+>\s+)?"
48
+ r"[\w\[\]<>,.\s?]+\s+"
49
+ r"(\w+)\s*"
50
+ r"\(",
51
+ )
52
+
53
+ _NON_METHOD_NAMES = frozenset({
54
+ "if", "for", "while", "switch", "catch", "do", "try", "else", "return", "new",
55
+ })
56
+
57
+
58
+ def split_java_methods(code: str) -> list[dict[str, str]]:
59
+ source = code.strip()
60
+ if not source:
61
+ return []
62
+
63
+ spans: list[tuple[int, int, str]] = []
64
+
65
+ for match in _METHOD_HEAD.finditer(source):
66
+ name = match.group(1)
67
+ if name in _NON_METHOD_NAMES:
68
+ continue
69
+
70
+ start = match.start()
71
+ brace_start = source.find("{", match.end())
72
+ if brace_start == -1:
73
+ continue
74
+
75
+ header = source[match.end():brace_start]
76
+ if ";" in header.split("//", 1)[0]:
77
+ continue
78
+
79
+ depth = 0
80
+ end = brace_start
81
+ for idx in range(brace_start, len(source)):
82
+ ch = source[idx]
83
+ if ch == "{":
84
+ depth += 1
85
+ elif ch == "}":
86
+ depth -= 1
87
+ if depth == 0:
88
+ end = idx + 1
89
+ break
90
+ else:
91
+ continue
92
+
93
+ if any(start < existing_end and end > existing_start for existing_start, existing_end, _ in spans):
94
+ continue
95
+
96
+ spans.append((start, end, name))
97
+
98
+ spans.sort(key=lambda item: item[0])
99
+
100
+ methods = [
101
+ {"name": name, "code": source[start:end].strip()}
102
+ for start, end, name in spans
103
+ ]
104
+
105
+ if methods:
106
+ return methods
107
+
108
+ return [{"name": "(entire file)", "code": source}]
109
+
110
+
111
+ def format_method_summaries(parts: list[tuple[str, str]]) -> str:
112
+ lines: list[str] = []
113
+ for name, text in parts:
114
+ cleaned = text.strip()
115
+ if cleaned:
116
+ lines.append(f"• {name}: {cleaned}")
117
+ else:
118
+ lines.append(f"• {name}: (no output)")
119
+ return "\n".join(lines)
120
+
121
+
122
+ def split_code_statements(code: str) -> list[str]:
123
+ parts = re.split(r"[;{}\n]", code)
124
+ frags = [re.sub(r"\s+", " ", p).strip() for p in parts]
125
+ frags = [f for f in frags if f]
126
+
127
+ merged: list[str] = []
128
+ for frag in frags:
129
+ if len(frag.split()) < MIN_FRAGMENT_TOKENS and merged:
130
+ merged[-1] += " " + frag
131
+ else:
132
+ merged.append(frag)
133
+
134
+ return merged if merged else [re.sub(r"\s+", " ", code).strip()]
java_summarization_experiment.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110.0
2
+ uvicorn[standard]>=0.27.0
3
+ python-multipart>=0.0.9
4
+ jinja2>=3.1.0
5
+ datasets>=2.18.0
6
+ sentence-transformers>=2.6.0
7
+ transformers>=4.38.0
8
+ torch>=2.1.0
9
+ numpy>=1.24.0
10
+ huggingface-hub>=0.21.0
11
+ tokenizers>=0.15.0
samples/UserService.java ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package com.example.service;
2
+
3
+ import java.util.HashMap;
4
+ import java.util.Map;
5
+ import java.util.Optional;
6
+
7
+ /**
8
+ * Manages user accounts: lookup, registration, and password updates.
9
+ */
10
+ public class UserService {
11
+
12
+ private final Map<String, String> usersByEmail = new HashMap<>();
13
+
14
+ public Optional<String> findUserIdByEmail(String email) {
15
+ if (email == null || email.isBlank()) {
16
+ return Optional.empty();
17
+ }
18
+ String normalized = email.trim().toLowerCase();
19
+ return Optional.ofNullable(usersByEmail.get(normalized));
20
+ }
21
+
22
+ public boolean registerUser(String email, String userId) {
23
+ if (email == null || userId == null || email.isBlank() || userId.isBlank()) {
24
+ return false;
25
+ }
26
+ String normalized = email.trim().toLowerCase();
27
+ if (usersByEmail.containsKey(normalized)) {
28
+ return false;
29
+ }
30
+ usersByEmail.put(normalized, userId);
31
+ return true;
32
+ }
33
+
34
+ public boolean updatePassword(String email, String newPasswordHash) {
35
+ if (email == null || newPasswordHash == null || newPasswordHash.isBlank()) {
36
+ return false;
37
+ }
38
+ String normalized = email.trim().toLowerCase();
39
+ if (!usersByEmail.containsKey(normalized)) {
40
+ return false;
41
+ }
42
+ // In a real app this would persist to a credential store.
43
+ return true;
44
+ }
45
+ }
static/css/style.css ADDED
@@ -0,0 +1,825 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --bg: #07090f;
3
+ --bg-2: #0c1018;
4
+ --surface: rgba(20, 27, 41, 0.72);
5
+ --surface-solid: #141b29;
6
+ --surface-2: rgba(33, 43, 64, 0.6);
7
+ --border: rgba(120, 140, 180, 0.16);
8
+ --border-strong: rgba(120, 140, 180, 0.32);
9
+ --text: #eef3fb;
10
+ --muted: #93a4c2;
11
+ --faint: #64748b;
12
+ --accent: #5b9cff;
13
+ --accent-2: #8b5cff;
14
+ --extractive: #2dd4bf;
15
+ --abstractive: #f59e0b;
16
+ --good: #34d399;
17
+ --warn: #fbbf24;
18
+ --error: #f87171;
19
+ --radius: 16px;
20
+ --radius-sm: 10px;
21
+ --shadow: 0 24px 60px rgba(0, 0, 0, 0.45);
22
+ --shadow-sm: 0 8px 24px rgba(0, 0, 0, 0.3);
23
+ --font: "Inter", system-ui, -apple-system, "Segoe UI", sans-serif;
24
+ --mono: "JetBrains Mono", ui-monospace, "SF Mono", monospace;
25
+ }
26
+
27
+ * { box-sizing: border-box; }
28
+
29
+ html { scroll-behavior: smooth; scroll-padding-top: 90px; }
30
+
31
+ body {
32
+ margin: 0;
33
+ font-family: var(--font);
34
+ background: var(--bg);
35
+ color: var(--text);
36
+ line-height: 1.65;
37
+ overflow-x: hidden;
38
+ -webkit-font-smoothing: antialiased;
39
+ }
40
+
41
+ a { color: inherit; }
42
+
43
+ code {
44
+ font-family: var(--mono);
45
+ font-size: 0.85em;
46
+ background: rgba(120, 140, 180, 0.14);
47
+ padding: 0.12em 0.4em;
48
+ border-radius: 5px;
49
+ border: 1px solid var(--border);
50
+ }
51
+
52
+ .container { width: min(1180px, 92vw); margin: 0 auto; }
53
+
54
+ /* ---------- Animated background ---------- */
55
+ .bg-aurora {
56
+ position: fixed;
57
+ inset: 0;
58
+ z-index: -1;
59
+ overflow: hidden;
60
+ background: radial-gradient(120% 80% at 50% -10%, #11182b 0%, var(--bg) 60%);
61
+ }
62
+
63
+ .blob {
64
+ position: absolute;
65
+ border-radius: 50%;
66
+ filter: blur(80px);
67
+ opacity: 0.5;
68
+ animation: drift 22s ease-in-out infinite;
69
+ }
70
+
71
+ .blob-1 { width: 460px; height: 460px; background: #2b5cff; top: -120px; left: -80px; }
72
+ .blob-2 { width: 520px; height: 520px; background: #7c3aed; top: 10%; right: -160px; animation-delay: -6s; }
73
+ .blob-3 { width: 420px; height: 420px; background: #0ea5a4; bottom: -160px; left: 30%; animation-delay: -12s; }
74
+
75
+ @keyframes drift {
76
+ 0%, 100% { transform: translate(0, 0) scale(1); }
77
+ 33% { transform: translate(40px, 50px) scale(1.08); }
78
+ 66% { transform: translate(-30px, 20px) scale(0.95); }
79
+ }
80
+
81
+ .grid-overlay {
82
+ position: absolute;
83
+ inset: 0;
84
+ background-image:
85
+ linear-gradient(rgba(120, 140, 180, 0.05) 1px, transparent 1px),
86
+ linear-gradient(90deg, rgba(120, 140, 180, 0.05) 1px, transparent 1px);
87
+ background-size: 46px 46px;
88
+ mask-image: radial-gradient(circle at 50% 30%, black, transparent 80%);
89
+ }
90
+
91
+ /* ---------- Header ---------- */
92
+ .site-header {
93
+ position: sticky;
94
+ top: 0;
95
+ z-index: 50;
96
+ border-bottom: 1px solid transparent;
97
+ transition: background 0.3s, border-color 0.3s, backdrop-filter 0.3s;
98
+ }
99
+
100
+ .site-header.scrolled {
101
+ background: rgba(7, 9, 15, 0.78);
102
+ backdrop-filter: blur(16px);
103
+ border-bottom-color: var(--border);
104
+ }
105
+
106
+ .header-inner {
107
+ display: flex;
108
+ align-items: center;
109
+ gap: 1.5rem;
110
+ padding: 0.9rem 0;
111
+ }
112
+
113
+ .brand {
114
+ display: flex;
115
+ align-items: center;
116
+ gap: 0.75rem;
117
+ flex: 1;
118
+ text-decoration: none;
119
+ }
120
+
121
+ .brand-icon {
122
+ display: grid;
123
+ place-items: center;
124
+ width: 42px;
125
+ height: 42px;
126
+ border-radius: 11px;
127
+ font-family: var(--mono);
128
+ font-weight: 700;
129
+ font-size: 0.85rem;
130
+ color: white;
131
+ background: linear-gradient(135deg, var(--accent), var(--accent-2));
132
+ box-shadow: 0 6px 20px rgba(91, 156, 255, 0.4);
133
+ }
134
+
135
+ .brand h1 { margin: 0; font-size: 1.1rem; letter-spacing: -0.01em; }
136
+ .brand p { margin: 0; color: var(--muted); font-size: 0.8rem; }
137
+
138
+ .nav { display: flex; gap: 0.4rem; }
139
+
140
+ .nav-link {
141
+ position: relative;
142
+ color: var(--muted);
143
+ text-decoration: none;
144
+ font-size: 0.9rem;
145
+ font-weight: 500;
146
+ padding: 0.4rem 0.85rem;
147
+ border-radius: 8px;
148
+ transition: color 0.2s, background 0.2s;
149
+ }
150
+
151
+ .nav-link:hover { color: var(--text); background: var(--surface-2); }
152
+ .nav-link.active { color: var(--text); }
153
+ .nav-link.active::after {
154
+ content: "";
155
+ position: absolute;
156
+ left: 0.85rem;
157
+ right: 0.85rem;
158
+ bottom: 0.1rem;
159
+ height: 2px;
160
+ border-radius: 2px;
161
+ background: linear-gradient(90deg, var(--accent), var(--accent-2));
162
+ }
163
+
164
+ .status-badge {
165
+ display: inline-flex;
166
+ align-items: center;
167
+ gap: 0.5rem;
168
+ font-size: 0.8rem;
169
+ font-weight: 500;
170
+ padding: 0.4rem 0.8rem;
171
+ border-radius: 999px;
172
+ border: 1px solid var(--border);
173
+ color: var(--muted);
174
+ background: var(--surface);
175
+ }
176
+
177
+ .status-dot {
178
+ width: 8px;
179
+ height: 8px;
180
+ border-radius: 50%;
181
+ background: var(--faint);
182
+ }
183
+
184
+ .status-badge.ready { color: var(--good); border-color: rgba(52, 211, 153, 0.4); }
185
+ .status-badge.ready .status-dot { background: var(--good); box-shadow: 0 0 10px var(--good); }
186
+ .status-badge.loading { color: var(--abstractive); border-color: rgba(245, 158, 11, 0.4); }
187
+ .status-badge.loading .status-dot { background: var(--abstractive); animation: pulse 1.2s ease-in-out infinite; }
188
+ .status-badge.error { color: var(--error); border-color: rgba(248, 113, 113, 0.4); }
189
+ .status-badge.error .status-dot { background: var(--error); }
190
+
191
+ @keyframes pulse {
192
+ 0%, 100% { opacity: 1; transform: scale(1); }
193
+ 50% { opacity: 0.4; transform: scale(0.7); }
194
+ }
195
+
196
+ /* ---------- Hero ---------- */
197
+ .hero { padding: 5rem 0 3rem; }
198
+
199
+ .hero-content { max-width: 760px; }
200
+
201
+ .eyebrow {
202
+ display: inline-block;
203
+ font-size: 0.72rem;
204
+ letter-spacing: 0.22em;
205
+ font-weight: 600;
206
+ color: var(--accent);
207
+ padding: 0.35rem 0.8rem;
208
+ border: 1px solid var(--border-strong);
209
+ border-radius: 999px;
210
+ background: var(--surface);
211
+ margin-bottom: 1.5rem;
212
+ }
213
+
214
+ .hero-title {
215
+ margin: 0 0 1.25rem;
216
+ font-size: clamp(2.3rem, 6vw, 4rem);
217
+ line-height: 1.05;
218
+ letter-spacing: -0.03em;
219
+ font-weight: 800;
220
+ }
221
+
222
+ .gradient-text {
223
+ display: block;
224
+ background: linear-gradient(110deg, #5b9cff, #8b5cff 45%, #2dd4bf);
225
+ -webkit-background-clip: text;
226
+ background-clip: text;
227
+ color: transparent;
228
+ }
229
+
230
+ .hero-sub {
231
+ font-size: 1.1rem;
232
+ color: var(--muted);
233
+ max-width: 60ch;
234
+ margin: 0 0 2rem;
235
+ }
236
+
237
+ .hero-actions { display: flex; gap: 1rem; flex-wrap: wrap; margin-bottom: 2.75rem; }
238
+
239
+ .hero-stats { display: flex; gap: 2.5rem; }
240
+
241
+ .stat { display: flex; flex-direction: column; }
242
+ .stat-num {
243
+ font-size: 1.8rem;
244
+ font-weight: 800;
245
+ background: linear-gradient(135deg, var(--text), var(--muted));
246
+ -webkit-background-clip: text;
247
+ background-clip: text;
248
+ color: transparent;
249
+ }
250
+ .stat-label { font-size: 0.82rem; color: var(--faint); text-transform: uppercase; letter-spacing: 0.1em; }
251
+
252
+ /* ---------- Buttons ---------- */
253
+ .btn {
254
+ display: inline-flex;
255
+ align-items: center;
256
+ gap: 0.5rem;
257
+ padding: 0.75rem 1.5rem;
258
+ border-radius: 10px;
259
+ border: 1px solid transparent;
260
+ font-size: 0.95rem;
261
+ font-weight: 600;
262
+ font-family: inherit;
263
+ cursor: pointer;
264
+ text-decoration: none;
265
+ transition: transform 0.18s, box-shadow 0.18s, background 0.18s, border-color 0.18s;
266
+ }
267
+
268
+ .btn.primary {
269
+ background: linear-gradient(135deg, var(--accent), var(--accent-2));
270
+ color: white;
271
+ box-shadow: 0 10px 30px rgba(91, 156, 255, 0.35);
272
+ }
273
+ .btn.primary:hover:not(:disabled) { transform: translateY(-2px); box-shadow: 0 16px 40px rgba(91, 156, 255, 0.5); }
274
+ .btn.primary:disabled { opacity: 0.45; cursor: not-allowed; box-shadow: none; }
275
+
276
+ .btn.ghost {
277
+ background: var(--surface);
278
+ border-color: var(--border-strong);
279
+ color: var(--text);
280
+ }
281
+ .btn.ghost:hover { background: var(--surface-2); transform: translateY(-2px); }
282
+
283
+ /* ---------- Sections ---------- */
284
+ .section { padding: 4rem 0; }
285
+
286
+ .section-head { max-width: 720px; margin-bottom: 2.75rem; }
287
+
288
+ .section-kicker {
289
+ font-size: 0.78rem;
290
+ font-weight: 600;
291
+ letter-spacing: 0.16em;
292
+ color: var(--accent);
293
+ text-transform: uppercase;
294
+ }
295
+
296
+ .section-head h2 {
297
+ margin: 0.6rem 0 0.75rem;
298
+ font-size: clamp(1.7rem, 3.5vw, 2.4rem);
299
+ letter-spacing: -0.02em;
300
+ font-weight: 700;
301
+ }
302
+
303
+ .section-lead { color: var(--muted); font-size: 1.05rem; margin: 0; }
304
+
305
+ /* ---------- Pipeline diagram ---------- */
306
+ .pipeline {
307
+ display: flex;
308
+ align-items: stretch;
309
+ gap: 0.5rem;
310
+ padding: 2rem 1.5rem;
311
+ margin-bottom: 2.5rem;
312
+ background: var(--surface);
313
+ border: 1px solid var(--border);
314
+ border-radius: var(--radius);
315
+ box-shadow: var(--shadow-sm);
316
+ backdrop-filter: blur(12px);
317
+ overflow-x: auto;
318
+ }
319
+
320
+ .pipe-stage { display: flex; align-items: center; justify-content: center; }
321
+ .pipe-stage.models-fan { flex-direction: column; gap: 0.6rem; }
322
+
323
+ .pipe-node {
324
+ position: relative;
325
+ text-align: center;
326
+ padding: 1rem 1.1rem;
327
+ border-radius: var(--radius-sm);
328
+ border: 1px solid var(--border-strong);
329
+ background: var(--surface-solid);
330
+ min-width: 130px;
331
+ transition: transform 0.2s, border-color 0.2s, box-shadow 0.2s;
332
+ }
333
+ .pipe-node:hover { transform: translateY(-4px); box-shadow: var(--shadow-sm); }
334
+ .pipe-node h4 { margin: 0.5rem 0 0.2rem; font-size: 0.95rem; }
335
+ .pipe-node p { margin: 0; font-size: 0.78rem; color: var(--muted); }
336
+ .pipe-icon { font-size: 1.4rem; }
337
+
338
+ .pipe-node.input { border-color: rgba(91, 156, 255, 0.5); }
339
+ .pipe-node.process { border-color: rgba(147, 164, 194, 0.5); }
340
+ .pipe-node.output { border-color: rgba(52, 211, 153, 0.5); }
341
+
342
+ .pipe-node.model {
343
+ display: flex;
344
+ align-items: center;
345
+ gap: 0.7rem;
346
+ text-align: left;
347
+ padding: 0.6rem 0.9rem;
348
+ min-width: 200px;
349
+ }
350
+ .pipe-node.model h4 { margin: 0; }
351
+ .pipe-node.model.extractive { border-left: 3px solid var(--extractive); }
352
+ .pipe-node.model.abstractive { border-left: 3px solid var(--abstractive); }
353
+
354
+ .model-glyph {
355
+ display: grid;
356
+ place-items: center;
357
+ width: 34px;
358
+ height: 34px;
359
+ border-radius: 9px;
360
+ font-family: var(--mono);
361
+ font-weight: 700;
362
+ font-size: 0.8rem;
363
+ color: var(--c, var(--accent));
364
+ background: color-mix(in srgb, var(--c, var(--accent)) 16%, transparent);
365
+ border: 1px solid color-mix(in srgb, var(--c, var(--accent)) 40%, transparent);
366
+ flex-shrink: 0;
367
+ }
368
+
369
+ .pipe-connector {
370
+ display: flex;
371
+ align-items: center;
372
+ min-width: 36px;
373
+ flex-shrink: 0;
374
+ }
375
+ .pipe-connector span {
376
+ flex: 1;
377
+ height: 2px;
378
+ background: linear-gradient(90deg, transparent, var(--border-strong), transparent);
379
+ position: relative;
380
+ }
381
+ .pipe-connector span::after {
382
+ content: "›";
383
+ position: absolute;
384
+ right: -2px;
385
+ top: 50%;
386
+ transform: translateY(-50%);
387
+ color: var(--muted);
388
+ font-size: 1.1rem;
389
+ }
390
+
391
+ /* ---------- Arch cards ---------- */
392
+ .arch-grid {
393
+ display: grid;
394
+ grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
395
+ gap: 1.2rem;
396
+ }
397
+
398
+ .arch-card {
399
+ position: relative;
400
+ background: var(--surface);
401
+ border: 1px solid var(--border);
402
+ border-radius: var(--radius);
403
+ padding: 1.6rem;
404
+ backdrop-filter: blur(12px);
405
+ transition: transform 0.2s, border-color 0.2s, box-shadow 0.2s;
406
+ }
407
+ .arch-card:hover { transform: translateY(-4px); border-color: var(--border-strong); box-shadow: var(--shadow-sm); }
408
+
409
+ .arch-card-icon {
410
+ display: grid;
411
+ place-items: center;
412
+ width: 40px;
413
+ height: 40px;
414
+ border-radius: 10px;
415
+ font-size: 1.1rem;
416
+ font-weight: 700;
417
+ margin-bottom: 1rem;
418
+ color: var(--accent);
419
+ background: rgba(91, 156, 255, 0.12);
420
+ border: 1px solid rgba(91, 156, 255, 0.3);
421
+ }
422
+
423
+ .arch-card h3 { margin: 0 0 0.75rem; font-size: 1.1rem; }
424
+ .arch-card ul { margin: 0; padding-left: 1.1rem; color: var(--muted); font-size: 0.92rem; }
425
+ .arch-card li { margin-bottom: 0.4rem; }
426
+
427
+ /* ---------- Model cards ---------- */
428
+ .model-cards {
429
+ display: grid;
430
+ grid-template-columns: repeat(auto-fit, minmax(330px, 1fr));
431
+ gap: 1.4rem;
432
+ }
433
+
434
+ .model-card {
435
+ position: relative;
436
+ background: var(--surface);
437
+ border: 1px solid var(--border);
438
+ border-radius: var(--radius);
439
+ padding: 1.5rem;
440
+ backdrop-filter: blur(12px);
441
+ overflow: hidden;
442
+ transition: transform 0.2s, border-color 0.2s, box-shadow 0.2s;
443
+ }
444
+ .model-card::before {
445
+ content: "";
446
+ position: absolute;
447
+ inset: 0 0 auto 0;
448
+ height: 3px;
449
+ background: var(--accent);
450
+ }
451
+ .model-card:hover {
452
+ transform: translateY(-5px);
453
+ border-color: color-mix(in srgb, var(--accent) 45%, transparent);
454
+ box-shadow: 0 20px 50px color-mix(in srgb, var(--accent) 18%, transparent);
455
+ }
456
+ .model-card.open { border-color: color-mix(in srgb, var(--accent) 50%, transparent); }
457
+
458
+ .model-card-head {
459
+ display: flex;
460
+ align-items: center;
461
+ gap: 0.9rem;
462
+ width: 100%;
463
+ background: none;
464
+ border: none;
465
+ padding: 0;
466
+ cursor: pointer;
467
+ color: inherit;
468
+ font-family: inherit;
469
+ text-align: left;
470
+ }
471
+
472
+ .model-card-head .model-glyph { width: 46px; height: 46px; font-size: 1rem; }
473
+ .model-card-titles { flex: 1; }
474
+ .model-card-titles h3 { margin: 0; font-size: 1.25rem; letter-spacing: -0.01em; }
475
+ .model-family { font-size: 0.8rem; color: var(--faint); }
476
+
477
+ .model-chevron {
478
+ font-size: 1.3rem;
479
+ color: var(--muted);
480
+ transition: transform 0.3s;
481
+ line-height: 1;
482
+ }
483
+ .model-card.open .model-chevron { transform: rotate(180deg); }
484
+
485
+ .model-card-meta { display: flex; flex-wrap: wrap; gap: 0.45rem; margin: 1rem 0 0.85rem; }
486
+
487
+ .model-tagline { margin: 0; color: var(--text); font-size: 0.95rem; font-weight: 500; }
488
+
489
+ .tag {
490
+ display: inline-flex;
491
+ align-items: center;
492
+ font-size: 0.72rem;
493
+ font-weight: 600;
494
+ padding: 0.25rem 0.6rem;
495
+ border-radius: 999px;
496
+ border: 1px solid var(--border);
497
+ background: var(--surface-2);
498
+ color: var(--muted);
499
+ white-space: nowrap;
500
+ }
501
+ .tag-extractive { color: var(--extractive); border-color: rgba(45, 212, 191, 0.4); background: rgba(45, 212, 191, 0.08); }
502
+ .tag-abstractive { color: var(--abstractive); border-color: rgba(245, 158, 11, 0.4); background: rgba(245, 158, 11, 0.08); }
503
+
504
+ /* expandable body */
505
+ .model-card-body {
506
+ display: grid;
507
+ grid-template-rows: 0fr;
508
+ transition: grid-template-rows 0.35s ease, opacity 0.3s, margin 0.3s;
509
+ opacity: 0;
510
+ margin-top: 0;
511
+ }
512
+ .model-card-body > * { overflow: hidden; min-height: 0; }
513
+ .model-card.open .model-card-body {
514
+ grid-template-rows: 1fr;
515
+ opacity: 1;
516
+ margin-top: 1.25rem;
517
+ }
518
+
519
+ .model-desc { margin: 0 0 1.2rem; color: var(--muted); font-size: 0.92rem; }
520
+
521
+ .model-section { margin-bottom: 1.2rem; }
522
+ .model-section h4 {
523
+ margin: 0 0 0.7rem;
524
+ font-size: 0.78rem;
525
+ letter-spacing: 0.08em;
526
+ text-transform: uppercase;
527
+ color: var(--muted);
528
+ }
529
+ .model-section h4.good { color: var(--good); }
530
+ .model-section h4.warn { color: var(--warn); }
531
+
532
+ .model-steps { list-style: none; margin: 0; padding: 0; display: flex; flex-direction: column; gap: 0.6rem; }
533
+ .model-steps li { display: flex; gap: 0.7rem; align-items: flex-start; font-size: 0.9rem; color: var(--text); }
534
+ .step-num {
535
+ display: grid;
536
+ place-items: center;
537
+ width: 22px;
538
+ height: 22px;
539
+ flex-shrink: 0;
540
+ border-radius: 6px;
541
+ font-size: 0.72rem;
542
+ font-weight: 700;
543
+ font-family: var(--mono);
544
+ color: var(--accent);
545
+ background: color-mix(in srgb, var(--accent) 14%, transparent);
546
+ border: 1px solid color-mix(in srgb, var(--accent) 30%, transparent);
547
+ }
548
+
549
+ .model-cols { display: grid; grid-template-columns: 1fr 1fr; gap: 1.2rem; }
550
+
551
+ .pill-list { list-style: none; margin: 0; padding: 0; display: flex; flex-wrap: wrap; gap: 0.4rem; }
552
+ .pill {
553
+ font-size: 0.78rem;
554
+ padding: 0.28rem 0.6rem;
555
+ border-radius: 7px;
556
+ border: 1px solid var(--border);
557
+ background: var(--surface-2);
558
+ color: var(--muted);
559
+ }
560
+ .pill.good { border-color: rgba(52, 211, 153, 0.3); }
561
+ .pill.warn { border-color: rgba(251, 191, 36, 0.3); }
562
+
563
+ .model-facts {
564
+ margin: 1.2rem 0 0;
565
+ padding-top: 1.1rem;
566
+ border-top: 1px solid var(--border);
567
+ display: flex;
568
+ flex-direction: column;
569
+ gap: 0.6rem;
570
+ }
571
+ .model-facts > div { display: flex; gap: 0.75rem; align-items: baseline; }
572
+ .model-facts dt { font-size: 0.78rem; color: var(--faint); min-width: 84px; text-transform: uppercase; letter-spacing: 0.06em; }
573
+ .model-facts dd { margin: 0; font-size: 0.88rem; color: var(--text); word-break: break-word; }
574
+
575
+ /* ---------- Try-it ---------- */
576
+ .try-panel {
577
+ background: var(--surface);
578
+ border: 1px solid var(--border);
579
+ border-radius: var(--radius);
580
+ padding: 1.75rem;
581
+ backdrop-filter: blur(12px);
582
+ box-shadow: var(--shadow-sm);
583
+ }
584
+
585
+ .upload-zone { display: flex; flex-direction: column; gap: 1.1rem; }
586
+
587
+ .upload-inner {
588
+ border: 2px dashed var(--border-strong);
589
+ border-radius: var(--radius);
590
+ padding: 3rem 1.5rem;
591
+ text-align: center;
592
+ cursor: pointer;
593
+ transition: border-color 0.2s, background 0.2s, transform 0.2s;
594
+ }
595
+ .upload-inner:hover, .upload-inner.dragover {
596
+ border-color: var(--accent);
597
+ background: rgba(91, 156, 255, 0.06);
598
+ }
599
+ .upload-inner.dragover { transform: scale(1.01); }
600
+
601
+ .upload-icon {
602
+ display: inline-grid;
603
+ place-items: center;
604
+ width: 56px;
605
+ height: 56px;
606
+ margin-bottom: 0.75rem;
607
+ font-size: 1.6rem;
608
+ border-radius: 50%;
609
+ color: var(--accent);
610
+ background: rgba(91, 156, 255, 0.1);
611
+ border: 1px solid rgba(91, 156, 255, 0.3);
612
+ }
613
+ .upload-title { margin: 0 0 0.3rem; font-size: 1.05rem; font-weight: 600; }
614
+
615
+ .upload-bar { display: flex; align-items: center; justify-content: space-between; gap: 1rem; flex-wrap: wrap; }
616
+
617
+ .file-meta {
618
+ display: inline-flex;
619
+ align-items: center;
620
+ gap: 0.5rem;
621
+ padding: 0.55rem 0.9rem;
622
+ background: var(--surface-2);
623
+ border: 1px solid var(--border);
624
+ border-radius: 9px;
625
+ font-size: 0.88rem;
626
+ font-family: var(--mono);
627
+ }
628
+ .file-meta::before { content: "📄"; }
629
+
630
+ /* loader */
631
+ .loading { margin-top: 1.5rem; color: var(--muted); text-align: center; }
632
+ .loader-track {
633
+ height: 4px;
634
+ width: 100%;
635
+ border-radius: 4px;
636
+ background: var(--surface-2);
637
+ overflow: hidden;
638
+ margin-bottom: 0.85rem;
639
+ }
640
+ .loader-bar {
641
+ height: 100%;
642
+ width: 40%;
643
+ border-radius: 4px;
644
+ background: linear-gradient(90deg, var(--accent), var(--accent-2));
645
+ animation: indeterminate 1.3s ease-in-out infinite;
646
+ }
647
+ @keyframes indeterminate {
648
+ 0% { margin-left: -40%; }
649
+ 100% { margin-left: 100%; }
650
+ }
651
+
652
+ .alert { padding: 0.9rem 1.1rem; border-radius: 10px; margin-top: 1.25rem; font-size: 0.92rem; }
653
+ .alert.error { background: rgba(248, 113, 113, 0.12); border: 1px solid rgba(248, 113, 113, 0.4); color: var(--error); }
654
+
655
+ /* results */
656
+ .results { margin-top: 1.75rem; }
657
+ .results-meta {
658
+ display: flex;
659
+ flex-wrap: wrap;
660
+ gap: 0.6rem;
661
+ margin-bottom: 1.25rem;
662
+ }
663
+ .results-meta .chip {
664
+ display: inline-flex;
665
+ align-items: center;
666
+ gap: 0.4rem;
667
+ padding: 0.45rem 0.85rem;
668
+ background: var(--surface-2);
669
+ border: 1px solid var(--border);
670
+ border-radius: 9px;
671
+ font-size: 0.85rem;
672
+ }
673
+ .results-meta .chip strong { color: var(--muted); font-weight: 600; }
674
+
675
+ .results-grid {
676
+ display: grid;
677
+ grid-template-columns: repeat(auto-fit, minmax(290px, 1fr));
678
+ gap: 1.2rem;
679
+ }
680
+
681
+ .result-card {
682
+ position: relative;
683
+ background: var(--surface-solid);
684
+ border: 1px solid var(--border);
685
+ border-radius: var(--radius);
686
+ padding: 1.35rem;
687
+ display: flex;
688
+ flex-direction: column;
689
+ gap: 0.9rem;
690
+ transition: transform 0.2s, box-shadow 0.2s;
691
+ animation: cardIn 0.45s ease both;
692
+ }
693
+ .result-card::before {
694
+ content: "";
695
+ position: absolute;
696
+ inset: 0 0 auto 0;
697
+ height: 3px;
698
+ border-radius: var(--radius) var(--radius) 0 0;
699
+ background: var(--accent, var(--accent));
700
+ }
701
+ .result-card:hover { transform: translateY(-4px); box-shadow: var(--shadow-sm); }
702
+
703
+ @keyframes cardIn {
704
+ from { opacity: 0; transform: translateY(14px); }
705
+ to { opacity: 1; transform: translateY(0); }
706
+ }
707
+
708
+ .result-header { display: flex; justify-content: space-between; align-items: flex-start; gap: 0.5rem; }
709
+ .result-header-left { display: flex; align-items: center; gap: 0.7rem; }
710
+ .result-header h3 { margin: 0; font-size: 1.05rem; }
711
+ .result-tier { font-size: 0.75rem; color: var(--faint); }
712
+
713
+ .result-body {
714
+ font-size: 0.92rem;
715
+ line-height: 1.6;
716
+ white-space: pre-wrap;
717
+ word-break: break-word;
718
+ flex: 1;
719
+ color: var(--text);
720
+ font-family: var(--mono);
721
+ background: var(--bg-2);
722
+ border: 1px solid var(--border);
723
+ border-radius: var(--radius-sm);
724
+ padding: 0.85rem;
725
+ max-height: 240px;
726
+ overflow-y: auto;
727
+ }
728
+ .result-body.empty { color: var(--faint); font-style: italic; }
729
+ .result-body.method-list {
730
+ font-family: var(--font);
731
+ display: flex;
732
+ flex-direction: column;
733
+ gap: 0.65rem;
734
+ max-height: 320px;
735
+ }
736
+ .method-summary {
737
+ border-left: 2px solid var(--accent, var(--border));
738
+ padding-left: 0.65rem;
739
+ }
740
+ .method-summary.empty-method .method-text {
741
+ color: var(--faint);
742
+ font-style: italic;
743
+ }
744
+ .method-name {
745
+ font-size: 0.78rem;
746
+ font-weight: 600;
747
+ color: var(--accent, var(--muted));
748
+ font-family: var(--mono);
749
+ margin-bottom: 0.2rem;
750
+ }
751
+ .method-text {
752
+ font-size: 0.88rem;
753
+ line-height: 1.5;
754
+ color: var(--text);
755
+ }
756
+
757
+ .result-footer { display: flex; justify-content: space-between; align-items: center; }
758
+ .result-latency {
759
+ font-size: 0.78rem;
760
+ color: var(--muted);
761
+ font-family: var(--mono);
762
+ display: inline-flex;
763
+ align-items: center;
764
+ gap: 0.35rem;
765
+ }
766
+ .result-badge {
767
+ font-size: 0.7rem;
768
+ font-weight: 600;
769
+ padding: 0.2rem 0.55rem;
770
+ border-radius: 999px;
771
+ }
772
+ .result-badge.fastest { color: var(--good); background: rgba(52, 211, 153, 0.12); border: 1px solid rgba(52, 211, 153, 0.35); }
773
+ .result-badge.longest { color: var(--accent); background: rgba(91, 156, 255, 0.12); border: 1px solid rgba(91, 156, 255, 0.35); }
774
+
775
+ .copy-btn {
776
+ background: var(--surface-2);
777
+ border: 1px solid var(--border);
778
+ color: var(--muted);
779
+ font-size: 0.75rem;
780
+ padding: 0.3rem 0.6rem;
781
+ border-radius: 7px;
782
+ cursor: pointer;
783
+ transition: color 0.2s, border-color 0.2s;
784
+ font-family: inherit;
785
+ }
786
+ .copy-btn:hover { color: var(--text); border-color: var(--border-strong); }
787
+ .copy-btn.copied { color: var(--good); border-color: rgba(52, 211, 153, 0.4); }
788
+
789
+ /* ---------- Footer ---------- */
790
+ .site-footer {
791
+ margin-top: 3rem;
792
+ padding: 2.5rem 0;
793
+ text-align: center;
794
+ color: var(--faint);
795
+ font-size: 0.85rem;
796
+ border-top: 1px solid var(--border);
797
+ }
798
+
799
+ /* ---------- Reveal animation ---------- */
800
+ .reveal { opacity: 0; transform: translateY(24px); transition: opacity 0.6s ease, transform 0.6s ease; }
801
+ .reveal.in { opacity: 1; transform: translateY(0); }
802
+
803
+ .hidden { display: none !important; }
804
+
805
+ /* ---------- Responsive ---------- */
806
+ @media (max-width: 860px) {
807
+ .nav { display: none; }
808
+ .pipeline { flex-direction: column; align-items: stretch; }
809
+ .pipe-connector { min-height: 28px; justify-content: center; }
810
+ .pipe-connector span { width: 2px; height: auto; min-height: 24px; background: linear-gradient(180deg, transparent, var(--border-strong), transparent); }
811
+ .pipe-connector span::after { content: "⌄"; right: 50%; transform: translateX(50%); top: auto; bottom: -4px; }
812
+ .pipe-stage.models-fan { flex-direction: column; }
813
+ }
814
+
815
+ @media (max-width: 560px) {
816
+ .hero-stats { gap: 1.5rem; }
817
+ .model-cols { grid-template-columns: 1fr; }
818
+ .upload-bar { flex-direction: column; align-items: stretch; }
819
+ .btn.primary { justify-content: center; }
820
+ }
821
+
822
+ @media (prefers-reduced-motion: reduce) {
823
+ *, *::before, *::after { animation-duration: 0.001s !important; transition-duration: 0.001s !important; }
824
+ .reveal { opacity: 1; transform: none; }
825
+ }
static/js/app.js ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const docBody = document.body;
2
+ window.APP_STATE = {
3
+ ready: docBody.dataset.appReady === "true",
4
+ loading: docBody.dataset.appLoading === "true",
5
+ error: docBody.dataset.appError || null,
6
+ };
7
+
8
+ const form = document.getElementById("upload-form");
9
+ const fileInput = document.getElementById("file-input");
10
+ const dropZone = document.getElementById("drop-zone");
11
+ const fileMeta = document.getElementById("file-meta");
12
+ const submitBtn = document.getElementById("submit-btn");
13
+ const loadingEl = document.getElementById("loading");
14
+ const errorEl = document.getElementById("error");
15
+ const resultsEl = document.getElementById("results");
16
+ const resultsMeta = document.getElementById("results-meta");
17
+ const resultsGrid = document.getElementById("results-grid");
18
+ const statusBadge = document.getElementById("status-badge");
19
+ const header = document.querySelector(".site-header");
20
+
21
+ let selectedFile = null;
22
+
23
+ /* ---------- Header shadow on scroll ---------- */
24
+ function onScroll() {
25
+ if (window.scrollY > 10) header.classList.add("scrolled");
26
+ else header.classList.remove("scrolled");
27
+ }
28
+ window.addEventListener("scroll", onScroll, { passive: true });
29
+ onScroll();
30
+
31
+ /* ---------- Scroll reveal ---------- */
32
+ const revealObserver = new IntersectionObserver(
33
+ (entries) => {
34
+ entries.forEach((entry) => {
35
+ if (entry.isIntersecting) {
36
+ entry.target.classList.add("in");
37
+ revealObserver.unobserve(entry.target);
38
+ }
39
+ });
40
+ },
41
+ { threshold: 0.12 }
42
+ );
43
+ document.querySelectorAll(".reveal").forEach((el) => revealObserver.observe(el));
44
+
45
+ /* ---------- Active nav link on scroll ---------- */
46
+ const navLinks = Array.from(document.querySelectorAll(".nav-link"));
47
+ const sections = navLinks
48
+ .map((link) => document.querySelector(link.getAttribute("href")))
49
+ .filter(Boolean);
50
+
51
+ const navObserver = new IntersectionObserver(
52
+ (entries) => {
53
+ entries.forEach((entry) => {
54
+ if (entry.isIntersecting) {
55
+ const id = entry.target.getAttribute("id");
56
+ navLinks.forEach((l) =>
57
+ l.classList.toggle("active", l.getAttribute("href") === `#${id}`)
58
+ );
59
+ }
60
+ });
61
+ },
62
+ { rootMargin: "-45% 0px -50% 0px" }
63
+ );
64
+ sections.forEach((s) => navObserver.observe(s));
65
+
66
+ /* ---------- Expandable model cards ---------- */
67
+ document.querySelectorAll(".model-card-head").forEach((head) => {
68
+ head.addEventListener("click", () => {
69
+ const card = head.closest(".model-card");
70
+ const open = card.classList.toggle("open");
71
+ head.setAttribute("aria-expanded", String(open));
72
+ });
73
+ });
74
+
75
+ /* ---------- Upload helpers ---------- */
76
+ function setError(message) {
77
+ if (!message) {
78
+ errorEl.classList.add("hidden");
79
+ errorEl.textContent = "";
80
+ return;
81
+ }
82
+ errorEl.textContent = message;
83
+ errorEl.classList.remove("hidden");
84
+ }
85
+
86
+ function updateFileMeta(file) {
87
+ selectedFile = file;
88
+ fileMeta.textContent = `${file.name} · ${(file.size / 1024).toFixed(1)} KB`;
89
+ fileMeta.classList.remove("hidden");
90
+ submitBtn.disabled = !window.APP_STATE.ready;
91
+ }
92
+
93
+ dropZone.addEventListener("click", () => fileInput.click());
94
+
95
+ dropZone.addEventListener("dragover", (e) => {
96
+ e.preventDefault();
97
+ dropZone.classList.add("dragover");
98
+ });
99
+ dropZone.addEventListener("dragleave", () => dropZone.classList.remove("dragover"));
100
+ dropZone.addEventListener("drop", (e) => {
101
+ e.preventDefault();
102
+ dropZone.classList.remove("dragover");
103
+ const file = e.dataTransfer.files[0];
104
+ if (file) updateFileMeta(file);
105
+ });
106
+
107
+ fileInput.addEventListener("change", () => {
108
+ if (fileInput.files[0]) updateFileMeta(fileInput.files[0]);
109
+ });
110
+
111
+ /* ---------- Submit ---------- */
112
+ form.addEventListener("submit", async (e) => {
113
+ e.preventDefault();
114
+ if (!selectedFile) return;
115
+
116
+ if (!window.APP_STATE.ready) {
117
+ setError("Models are not ready yet. Please wait for startup to finish.");
118
+ return;
119
+ }
120
+
121
+ setError("");
122
+ resultsEl.classList.add("hidden");
123
+ loadingEl.classList.remove("hidden");
124
+ submitBtn.disabled = true;
125
+
126
+ const payload = new FormData();
127
+ payload.append("file", selectedFile);
128
+
129
+ try {
130
+ const res = await fetch("/api/summarize", { method: "POST", body: payload });
131
+ const data = await res.json();
132
+ if (!res.ok) throw new Error(data.detail || "Summarization failed.");
133
+ renderResults(data);
134
+ resultsEl.classList.remove("hidden");
135
+ resultsEl.scrollIntoView({ behavior: "smooth", block: "nearest" });
136
+ } catch (err) {
137
+ setError(err.message);
138
+ } finally {
139
+ loadingEl.classList.add("hidden");
140
+ submitBtn.disabled = !window.APP_STATE.ready;
141
+ }
142
+ });
143
+
144
+ /* ---------- Render results ---------- */
145
+ function renderResults(data) {
146
+ resultsMeta.innerHTML = [
147
+ ["File", data.filename],
148
+ ["Methods", data.method_count],
149
+ ["Statements", data.statement_count],
150
+ ["Tokens", data.token_count],
151
+ ["Total time", `${data.total_elapsed_ms} ms`],
152
+ ]
153
+ .map(([k, v]) => `<span class="chip"><strong>${k}:</strong> ${escapeHtml(String(v))}</span>`)
154
+ .join("");
155
+
156
+ const summaries = data.summaries;
157
+ const withText = summaries.filter((s) => s.summary && s.summary.trim());
158
+ const fastest = summaries.reduce((a, b) => (b.elapsed_ms < a.elapsed_ms ? b : a));
159
+ const longest = withText.reduce(
160
+ (a, b) => (b.summary.length > (a ? a.summary.length : -1) ? b : a),
161
+ null
162
+ );
163
+
164
+ resultsGrid.innerHTML = summaries
165
+ .map((s, i) => {
166
+ const accent = s.accent || "#5b9cff";
167
+ const isEmpty = !s.summary || !s.summary.trim();
168
+ const badges = [];
169
+ if (s === fastest) badges.push(`<span class="result-badge fastest">⚡ Fastest</span>`);
170
+ if (longest && s === longest) badges.push(`<span class="result-badge longest">Most detail</span>`);
171
+
172
+ const isCodet5 = s.model_id === "codet5" && s.methods && s.methods.length > 0;
173
+ const bodyHtml = isEmpty
174
+ ? "(no output produced)"
175
+ : isCodet5
176
+ ? renderMethodSummaries(s.methods)
177
+ : escapeHtml(s.summary);
178
+ const bodyClass = isEmpty ? "empty" : isCodet5 ? "method-list" : "";
179
+
180
+ return `
181
+ <article class="result-card" style="--accent:${accent}; animation-delay:${i * 70}ms">
182
+ <div class="result-header">
183
+ <div class="result-header-left">
184
+ <span class="model-glyph" style="--c:${accent}">${escapeHtml(glyphFor(s.model_id, s.model))}</span>
185
+ <div>
186
+ <h3>${escapeHtml(s.model)}</h3>
187
+ <div class="result-tier">${escapeHtml(s.tier)}</div>
188
+ </div>
189
+ </div>
190
+ <span class="tag tag-${s.approach.toLowerCase()}">${escapeHtml(s.approach)}</span>
191
+ </div>
192
+ <div class="result-body ${bodyClass}">${bodyHtml}</div>
193
+ <div class="result-footer">
194
+ <span class="result-latency">⏱ ${s.elapsed_ms} ms ${badges.join(" ")}</span>
195
+ <button class="copy-btn" type="button" ${isEmpty ? "disabled" : ""}>Copy</button>
196
+ </div>
197
+ </article>
198
+ `;
199
+ })
200
+ .join("");
201
+
202
+ resultsGrid.querySelectorAll(".copy-btn").forEach((btn, idx) => {
203
+ btn.addEventListener("click", () => {
204
+ const text = summaries[idx].summary || "";
205
+ navigator.clipboard.writeText(text).then(() => {
206
+ btn.textContent = "Copied";
207
+ btn.classList.add("copied");
208
+ setTimeout(() => {
209
+ btn.textContent = "Copy";
210
+ btn.classList.remove("copied");
211
+ }, 1600);
212
+ });
213
+ });
214
+ });
215
+ }
216
+
217
+ function renderMethodSummaries(methods) {
218
+ if (!methods.length) return "";
219
+ return methods
220
+ .map((m) => {
221
+ const text = (m.summary || "").trim();
222
+ const summaryText = text || "(no output)";
223
+ const emptyClass = text ? "" : " empty-method";
224
+ return `
225
+ <div class="method-summary${emptyClass}">
226
+ <div class="method-name">${escapeHtml(m.name)}</div>
227
+ <div class="method-text">${escapeHtml(summaryText)}</div>
228
+ </div>
229
+ `;
230
+ })
231
+ .join("");
232
+ }
233
+
234
+ function glyphFor(id, name) {
235
+ const map = {
236
+ tfidf: "TF",
237
+ lexrank: "LR",
238
+ sentence_transformers: "ST",
239
+ codet5: "T5",
240
+ };
241
+ return map[id] || (name || "?").slice(0, 2).toUpperCase();
242
+ }
243
+
244
+ function escapeHtml(text) {
245
+ return text
246
+ .replace(/&/g, "&amp;")
247
+ .replace(/</g, "&lt;")
248
+ .replace(/>/g, "&gt;")
249
+ .replace(/"/g, "&quot;");
250
+ }
251
+
252
+ /* ---------- Health polling ---------- */
253
+ function setStatus(state, text) {
254
+ statusBadge.className = `status-badge ${state}`;
255
+ statusBadge.querySelector(".status-text").textContent = text;
256
+ }
257
+
258
+ async function pollHealth() {
259
+ try {
260
+ const res = await fetch("/api/health");
261
+ const data = await res.json();
262
+ window.APP_STATE.ready = data.ready;
263
+ window.APP_STATE.loading = data.loading;
264
+ window.APP_STATE.error = data.error;
265
+
266
+ if (data.ready) {
267
+ setStatus("ready", "Models ready");
268
+ if (selectedFile) submitBtn.disabled = false;
269
+ } else if (data.loading) {
270
+ setStatus("loading", "Loading models…");
271
+ } else {
272
+ setStatus("error", data.error ? "Load failed" : "Not ready");
273
+ if (data.error) setError(data.error);
274
+ }
275
+
276
+ if (!data.ready && !data.error) setTimeout(pollHealth, 3000);
277
+ } catch {
278
+ setTimeout(pollHealth, 5000);
279
+ }
280
+ }
281
+
282
+ if (!window.APP_STATE.ready) {
283
+ pollHealth();
284
+ } else if (selectedFile) {
285
+ submitBtn.disabled = false;
286
+ }
templates/index.html ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Auto-README — Java Summarizer</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" />
10
+ <link rel="stylesheet" href="/static/css/style.css" />
11
+ </head>
12
+ <body
13
+ data-app-ready="{{ 'true' if ready else 'false' }}"
14
+ data-app-loading="{{ 'true' if loading else 'false' }}"
15
+ data-app-error="{{ error|e if error else '' }}"
16
+ >
17
+ <div class="bg-aurora" aria-hidden="true">
18
+ <span class="blob blob-1"></span>
19
+ <span class="blob blob-2"></span>
20
+ <span class="blob blob-3"></span>
21
+ <span class="grid-overlay"></span>
22
+ </div>
23
+
24
+ <header class="site-header">
25
+ <div class="container header-inner">
26
+ <a class="brand" href="#top">
27
+ <span class="brand-icon">&lt;/&gt;</span>
28
+ <div>
29
+ <h1>Code Summarization</h1>
30
+ <p>Java code summarization comparison</p>
31
+ </div>
32
+ </a>
33
+ <nav class="nav" id="primary-nav">
34
+ <a href="#architecture" class="nav-link">Architecture</a>
35
+ <a href="#models" class="nav-link">Models</a>
36
+ <a href="#try-it" class="nav-link">Try it</a>
37
+ </nav>
38
+ <div id="status-badge" class="status-badge {% if ready %}ready{% elif loading %}loading{% else %}error{% endif %}">
39
+ <span class="status-dot"></span>
40
+ <span class="status-text">{% if ready %}Models ready{% elif loading %}Loading models…{% else %}Not ready{% endif %}</span>
41
+ </div>
42
+ </div>
43
+ </header>
44
+
45
+ <main id="top">
46
+ <section class="hero container">
47
+ <div class="hero-content reveal">
48
+ <span class="eyebrow">NLP · CODE SUMMARIZATION</span>
49
+ <h2 class="hero-title">
50
+ Four ways to summarize
51
+ <span class="gradient-text">Java source code</span>
52
+ </h2>
53
+ <p class="hero-sub">
54
+ Upload a <code>.java</code> file and watch corpus-fitted extractive baselines,
55
+ a semantic embedding model, and a fine-tuned transformer each generate a
56
+ java code summary, compared live side by side.
57
+ </p>
58
+ <div class="hero-actions">
59
+ <a href="#try-it" class="btn primary">Try it now</a>
60
+ <a href="#architecture" class="btn ghost">See how it works</a>
61
+ </div>
62
+ <div class="hero-stats">
63
+ <div class="stat"><span class="stat-num">4</span><span class="stat-label">models</span></div>
64
+ <div class="stat"><span class="stat-num">live</span><span class="stat-label">inference</span></div>
65
+ </div>
66
+ </div>
67
+ </section>
68
+
69
+ <section id="architecture" class="container section">
70
+ <div class="section-head reveal">
71
+ <span class="section-kicker">01 — Architecture</span>
72
+ <h2>How a file becomes four summaries</h2>
73
+ <p class="section-lead">
74
+ Extractive models summarize the whole file from split statements. CodeT5 runs
75
+ once per Java method, the same setup used in the CodeXGLUE evaluation.
76
+ </p>
77
+ </div>
78
+
79
+ <div class="pipeline reveal">
80
+ <div class="pipe-stage" data-stage="1">
81
+ <div class="pipe-node input">
82
+ <span class="pipe-icon">{ }</span>
83
+ <h4>Java upload</h4>
84
+ <p>A single <code>.java</code> file</p>
85
+ </div>
86
+ </div>
87
+
88
+ <div class="pipe-connector"><span></span></div>
89
+
90
+ <div class="pipe-stage" data-stage="2">
91
+ <div class="pipe-node process">
92
+ <span class="pipe-icon">⛓</span>
93
+ <h4>Preprocess</h4>
94
+ <p>Split statements; CodeT5 splits by method</p>
95
+ </div>
96
+ </div>
97
+
98
+ <div class="pipe-connector fan"><span></span></div>
99
+
100
+ <div class="pipe-stage models-fan" data-stage="3">
101
+ <div class="pipe-node model extractive">
102
+ <span class="model-glyph" style="--c:#2dd4bf">TF</span>
103
+ <h4>TF-IDF</h4>
104
+ <p>Term scoring</p>
105
+ </div>
106
+ <div class="pipe-node model extractive">
107
+ <span class="model-glyph" style="--c:#38bdf8">LR</span>
108
+ <h4>LexRank</h4>
109
+ <p>Graph centrality</p>
110
+ </div>
111
+ <div class="pipe-node model extractive">
112
+ <span class="model-glyph" style="--c:#a78bfa">ST</span>
113
+ <h4>Sentence-T</h4>
114
+ <p>Embeddings</p>
115
+ </div>
116
+ <div class="pipe-node model abstractive">
117
+ <span class="model-glyph" style="--c:#f59e0b">T5</span>
118
+ <h4>CodeT5</h4>
119
+ <p>Generation</p>
120
+ </div>
121
+ </div>
122
+
123
+ <div class="pipe-connector fan-in"><span></span></div>
124
+
125
+ <div class="pipe-stage" data-stage="4">
126
+ <div class="pipe-node output">
127
+ <span class="pipe-icon">▦</span>
128
+ <h4>Summary</h4>
129
+ <p>Four summaries</p>
130
+ </div>
131
+ </div>
132
+ </div>
133
+
134
+ <div class="arch-grid">
135
+ <article class="arch-card reveal">
136
+ <div class="arch-card-icon">①</div>
137
+ <h3>Preprocessing</h3>
138
+ <ul>
139
+ <li>Split on <code>;</code> <code>{</code> <code>}</code> and newlines</li>
140
+ <li>Merge tiny fragments (&lt; 3 tokens)</li>
141
+ <li>CamelCase / snake_case identifier splitting</li>
142
+ <li>Java keyword + English stopword filtering</li>
143
+ </ul>
144
+ </article>
145
+ <article class="arch-card reveal">
146
+ <div class="arch-card-icon">②</div>
147
+ <h3>Corpus fitting</h3>
148
+ <ul>
149
+ <li>TF-IDF &amp; LexRank IDF from CodeXGLUE Java train + validation</li>
150
+ <li>Weights cached to <code>cache/idf_weights_train_val.pkl</code></li>
151
+ <li>Neural models use frozen pretrained checkpoints</li>
152
+ <li>One-time load, then served from memory</li>
153
+ </ul>
154
+ </article>
155
+ <article class="arch-card reveal">
156
+ <div class="arch-card-icon">③</div>
157
+ <h3>Output</h3>
158
+ <ul>
159
+ <li>Extractive models return top-N statements from the whole file</li>
160
+ <li>CodeT5 generates one English sentence per method (evaluation setup)</li>
161
+ <li>Per-model latency tracked for each run</li>
162
+ <li>Results compared in a single view</li>
163
+ </ul>
164
+ </article>
165
+ </div>
166
+ </section>
167
+
168
+ <section id="models" class="container section">
169
+ <div class="section-head reveal">
170
+ <span class="section-kicker">02 — Models</span>
171
+ <h2>The four summarizers</h2>
172
+ <p class="section-lead">
173
+ Each model represents a different tier of prior knowledge. Click a card to expand
174
+ its step-by-step algorithm, strengths, and limitations.
175
+ </p>
176
+ </div>
177
+
178
+ <div class="model-cards">
179
+ {% for m in models %}
180
+ <article class="model-card reveal {{ m.approach|lower }}" style="--accent: {{ m.accent }}" data-model="{{ m.id }}">
181
+ <button class="model-card-head" aria-expanded="false">
182
+ <span class="model-glyph" style="--c: {{ m.accent }}">{{ m.glyph }}</span>
183
+ <div class="model-card-titles">
184
+ <h3>{{ m.name }}</h3>
185
+ <span class="model-family">{{ m.family }}</span>
186
+ </div>
187
+ <span class="model-chevron" aria-hidden="true">⌄</span>
188
+ </button>
189
+
190
+ <div class="model-card-meta">
191
+ <span class="tag tag-{{ m.approach|lower }}">{{ m.approach }}</span>
192
+ <span class="tag tag-tier">{{ m.tier }}</span>
193
+ <span class="tag tag-speed">{{ m.speed }}</span>
194
+ </div>
195
+
196
+ <p class="model-tagline">{{ m.tagline }}</p>
197
+
198
+ <div class="model-card-body">
199
+ <p class="model-desc">{{ m.description }}</p>
200
+
201
+ <div class="model-section">
202
+ <h4>How it works</h4>
203
+ <ol class="model-steps">
204
+ {% for step in m.steps %}
205
+ <li><span class="step-num">{{ loop.index }}</span><span>{{ step }}</span></li>
206
+ {% endfor %}
207
+ </ol>
208
+ </div>
209
+
210
+ <div class="model-cols">
211
+ <div class="model-section">
212
+ <h4 class="good">Strengths</h4>
213
+ <ul class="pill-list">
214
+ {% for s in m.strengths %}<li class="pill good">{{ s }}</li>{% endfor %}
215
+ </ul>
216
+ </div>
217
+ <div class="model-section">
218
+ <h4 class="warn">Limitations</h4>
219
+ <ul class="pill-list">
220
+ {% for l in m.limitations %}<li class="pill warn">{{ l }}</li>{% endfor %}
221
+ </ul>
222
+ </div>
223
+ </div>
224
+
225
+ <dl class="model-facts">
226
+ <div><dt>Input</dt><dd>{{ m.input }}</dd></div>
227
+
228
+ </dl>
229
+ </div>
230
+ </article>
231
+ {% endfor %}
232
+ </div>
233
+ </section>
234
+
235
+ <section id="try-it" class="container section">
236
+ <div class="section-head reveal">
237
+ <span class="section-kicker">03 — Try it</span>
238
+ <h2>Summarize your Java file</h2>
239
+ <p class="section-lead">Upload a <code>.java</code> file. Extractive models use the whole file; CodeT5 summarizes each method separately.</p>
240
+ </div>
241
+
242
+ <div class="try-panel reveal">
243
+ <form id="upload-form" class="upload-zone" enctype="multipart/form-data">
244
+ <input type="file" id="file-input" name="file" accept=".java" hidden />
245
+ <div class="upload-inner" id="drop-zone">
246
+ <div class="upload-icon">↑</div>
247
+ <p class="upload-title">Drop a <code>.java</code> file here</p>
248
+ <p class="muted">or click to browse · UTF-8 text · multi-method classes supported</p>
249
+ </div>
250
+ <div class="upload-bar">
251
+ <div id="file-meta" class="file-meta hidden"></div>
252
+ <button type="submit" id="submit-btn" class="btn primary" disabled>
253
+ <span class="btn-label">Generate summaries</span>
254
+ </button>
255
+ </div>
256
+ </form>
257
+
258
+ <div id="loading" class="loading hidden">
259
+ <div class="loader-track">
260
+ <div class="loader-bar"></div>
261
+ </div>
262
+ <p>Running all four models… CodeT5 runs once per method and may take longer on CPU.</p>
263
+ </div>
264
+
265
+ <div id="error" class="alert error hidden"></div>
266
+
267
+ <div id="results" class="results hidden">
268
+ <div class="results-meta" id="results-meta"></div>
269
+ <div class="results-grid" id="results-grid"></div>
270
+ </div>
271
+ </div>
272
+ </section>
273
+ </main>
274
+
275
+ <footer class="site-footer">
276
+
277
+ </footer>
278
+
279
+ <script src="/static/js/app.js"></script>
280
+ </body>
281
+ </html>