Molbap HF Staff commited on
Commit
46a9b7a
·
verified ·
1 Parent(s): c987363

Add FastAPI app + static UI

Browse files
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ ENV PYTHONDONTWRITEBYTECODE=1 \
6
+ PYTHONUNBUFFERED=1 \
7
+ HF_HUB_DISABLE_PROGRESS_BARS=1 \
8
+ TRANSFORMERS_VERBOSITY=error
9
+
10
+ COPY requirements.txt /app/requirements.txt
11
+ RUN pip install --no-cache-dir -r /app/requirements.txt
12
+
13
+ COPY app /app/app
14
+ COPY static /app/static
15
+
16
+ EXPOSE 7860
17
+
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__pycache__/detector.cpython-312.pyc ADDED
Binary file (24.1 kB). View file
 
app/__pycache__/graph.cpython-312.pyc ADDED
Binary file (5.11 kB). View file
 
app/__pycache__/main.cpython-312.pyc ADDED
Binary file (3.77 kB). View file
 
app/detector.py ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import json
3
+ import math
4
+ import os
5
+ import re
6
+ from dataclasses import dataclass
7
+ from functools import cache
8
+ from pathlib import Path
9
+
10
+ import numpy as np
11
+ from huggingface_hub import snapshot_download
12
+ from safetensors.numpy import load_file as safetensors_load
13
+ import torch
14
+ from transformers import AutoModel, AutoTokenizer
15
+
16
+ import transformers
17
+
18
+ MODELS_ROOT = Path(transformers.__file__).resolve().parent / "models"
19
+
20
+ EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B"
21
+ BATCH_SIZE = 16
22
+ MAX_LENGTH = 4096
23
+ HYBRID_ALPHA = 0.7
24
+ HUB_DATASET_DEFAULT = "hf-internal-testing/transformers_code_embeddings"
25
+
26
+
27
+ @dataclass
28
+ class Match:
29
+ identifier: str
30
+ relative_path: str
31
+ match_name: str
32
+ score: float
33
+
34
+
35
+ def _sanitize_for_embedding(code: str, model_hint: str | None, symbol_hint: str | None) -> str:
36
+ base = "\n".join(
37
+ line
38
+ for line in re.sub(r"#.*", "", re.sub(r'(\"\"\"|\'\'\')(?:.|\n)*?\1', "", code)).splitlines()
39
+ if not re.match(r"\s*(from|import)\s+", line)
40
+ )
41
+ variants = set()
42
+ if model_hint:
43
+ variants.add(model_hint)
44
+ variants.add(model_hint.replace("_", ""))
45
+ variants.add(re.sub(r"\d+", "", model_hint))
46
+ if symbol_hint:
47
+ match = re.match(r"^([A-Z][a-z0-9]+)", symbol_hint) or re.match(r"^([A-Za-z0-9]+)", symbol_hint)
48
+ prefix = match.group(1) if match else ""
49
+ if prefix:
50
+ variants.add(prefix)
51
+ variants.add(prefix.replace("_", ""))
52
+ variants.add(re.sub(r"\d+", "", prefix))
53
+ variants |= {variant.lower() for variant in list(variants)}
54
+ sanitized = base
55
+ for variant in sorted({x for x in variants if len(x) >= 3}, key=len, reverse=True):
56
+ sanitized = re.sub(re.escape(variant), "Model", sanitized, flags=re.IGNORECASE)
57
+ return sanitized
58
+
59
+
60
+ def _compute_idf(tokens_map: dict[str, list[str]]) -> tuple[dict[str, float], float]:
61
+ doc_count = len(tokens_map)
62
+ if doc_count == 0:
63
+ return {}, 1.0
64
+ df: dict[str, int] = {}
65
+ for tokens in tokens_map.values():
66
+ for token in set(tokens):
67
+ df[token] = df.get(token, 0) + 1
68
+ idf = {token: math.log((doc_count + 1) / (count + 1)) + 1.0 for token, count in df.items()}
69
+ default_idf = math.log((doc_count + 1) / 1) + 1.0
70
+ return idf, default_idf
71
+
72
+
73
+ def _weighted_jaccard(
74
+ query_tokens: set[str], candidate_tokens: set[str], idf_map: dict[str, float], default_idf: float
75
+ ) -> float:
76
+ if not query_tokens or not candidate_tokens:
77
+ return 0.0
78
+ intersection = query_tokens & candidate_tokens
79
+ if not intersection:
80
+ return 0.0
81
+ union = query_tokens | candidate_tokens
82
+ union_weight = sum(idf_map.get(token, default_idf) for token in union)
83
+ if union_weight <= 0:
84
+ return 0.0
85
+ intersection_weight = sum(idf_map.get(token, default_idf) for token in intersection)
86
+ return intersection_weight / union_weight
87
+
88
+
89
+ @cache
90
+ def _load_definition_line_map(relative_path: str) -> dict[str, int]:
91
+ file_path = MODELS_ROOT / relative_path
92
+ try:
93
+ source = file_path.read_text(encoding="utf-8")
94
+ except (FileNotFoundError, OSError):
95
+ return {}
96
+
97
+ try:
98
+ tree = ast.parse(source)
99
+ except SyntaxError:
100
+ return {}
101
+
102
+ line_map: dict[str, int] = {}
103
+ for node in ast.iter_child_nodes(tree):
104
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
105
+ line_map[node.name] = getattr(node, "lineno", None) or 1
106
+ if isinstance(node, ast.ClassDef):
107
+ for child in node.body:
108
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
109
+ key = f"{node.name}.{child.name}"
110
+ line_map[key] = getattr(child, "lineno", None) or 1
111
+ return line_map
112
+
113
+
114
+ def _resolve_definition_location(relative_path: str, definition: str) -> tuple[str, int | None]:
115
+ full_path = (MODELS_ROOT / relative_path).resolve()
116
+ line = _load_definition_line_map(relative_path).get(definition)
117
+ return str(full_path), line
118
+
119
+
120
+ class CodeSimilarityAnalyzer:
121
+ def __init__(self, hub_dataset: str, precision: str = "float32", granularity: str = "method"):
122
+ self.hub_dataset = hub_dataset
123
+ self.precision = precision
124
+ self.requested_granularity = granularity
125
+ self.index_granularity = granularity
126
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
127
+ self.dtype = torch.float16 if self.device.type == "cuda" else torch.float32
128
+ self.tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL, trust_remote_code=True)
129
+ self.model = AutoModel.from_pretrained(
130
+ EMBEDDING_MODEL, trust_remote_code=True, torch_dtype=self.dtype, device_map=None
131
+ ).to(self.device)
132
+ self.model.eval()
133
+ self.index_dir: Path | None = None
134
+ self.index_origin: str | None = None
135
+ self.missing_files: tuple[str, ...] = ()
136
+ self._index_cache: dict[str, object] | None = None
137
+
138
+ def _embedding_filename(self, granularity: str | None = None) -> str:
139
+ granularity = granularity or self.index_granularity
140
+ suffix = ""
141
+ if granularity == "method":
142
+ suffix += "_methods"
143
+ if self.precision == "int8":
144
+ suffix += "_int8"
145
+ if not suffix:
146
+ return "embeddings.safetensors"
147
+ return f"embeddings{suffix}.safetensors"
148
+
149
+ def _index_map_filename(self, granularity: str | None = None) -> str:
150
+ granularity = granularity or self.index_granularity
151
+ if granularity == "method":
152
+ return "code_index_map_methods.json"
153
+ return "code_index_map.json"
154
+
155
+ def _tokens_filename(self, granularity: str | None = None) -> str:
156
+ granularity = granularity or self.index_granularity
157
+ if granularity == "method":
158
+ return "code_index_tokens_methods.json"
159
+ return "code_index_tokens.json"
160
+
161
+ def _resolve_index_path(self, filename: str) -> Path:
162
+ if self.index_dir is None:
163
+ return Path(filename)
164
+ return self.index_dir / filename
165
+
166
+ def _required_index_files(self, granularity: str | None = None) -> tuple[str, ...]:
167
+ return (
168
+ self._embedding_filename(granularity),
169
+ self._index_map_filename(granularity),
170
+ self._tokens_filename(granularity),
171
+ )
172
+
173
+ def ensure_local_index(self) -> None:
174
+ required_files = self._required_index_files(self.requested_granularity)
175
+ if self.index_dir is not None and all((self.index_dir / fname).exists() for fname in required_files):
176
+ return
177
+
178
+ def missing_files(directory: Path, granularity: str) -> list[str]:
179
+ return [fname for fname in self._required_index_files(granularity) if not (directory / fname).exists()]
180
+
181
+ candidates: list[tuple[str, Path]] = []
182
+ env_dir = os.getenv("INDEX_DIR")
183
+ if env_dir:
184
+ candidates.append(("env", Path(env_dir)))
185
+ candidates.append(("cwd", Path.cwd()))
186
+ candidates.append(("repo", Path(__file__).resolve().parent.parent))
187
+
188
+ missing_preferred: list[str] = []
189
+ for origin, candidate in candidates:
190
+ missing_preferred = missing_files(candidate, self.requested_granularity)
191
+ if not missing_preferred:
192
+ self.index_dir = candidate
193
+ self.index_origin = origin
194
+ self.index_granularity = self.requested_granularity
195
+ self.missing_files = ()
196
+ self._index_cache = None
197
+ return
198
+
199
+ fallback_dir: Path | None = None
200
+ fallback_origin: str | None = None
201
+ fallback_missing: list[str] = []
202
+ if self.requested_granularity == "method":
203
+ for origin, candidate in candidates:
204
+ fallback_missing = missing_files(candidate, "definition")
205
+ if not fallback_missing:
206
+ fallback_dir = candidate
207
+ fallback_origin = origin
208
+ break
209
+
210
+ snapshot_dir = Path(snapshot_download(repo_id=self.hub_dataset, repo_type="dataset"))
211
+ hub_missing_preferred = missing_files(snapshot_dir, self.requested_granularity)
212
+ hub_missing_fallback: list[str] = []
213
+ if self.requested_granularity == "method":
214
+ hub_missing_fallback = missing_files(snapshot_dir, "definition")
215
+
216
+ if not hub_missing_preferred:
217
+ self.index_dir = snapshot_dir
218
+ self.index_origin = "hub"
219
+ self.index_granularity = self.requested_granularity
220
+ self.missing_files = ()
221
+ self._index_cache = None
222
+ return
223
+
224
+ if self.requested_granularity == "method" and not hub_missing_fallback:
225
+ self.index_dir = snapshot_dir
226
+ self.index_origin = "hub"
227
+ self.index_granularity = "definition"
228
+ self.missing_files = tuple(hub_missing_preferred)
229
+ self._index_cache = None
230
+ return
231
+
232
+ if fallback_dir is not None:
233
+ self.index_dir = fallback_dir
234
+ self.index_origin = fallback_origin
235
+ self.index_granularity = "definition"
236
+ self.missing_files = tuple(missing_preferred)
237
+ self._index_cache = None
238
+ return
239
+
240
+ missing_detail = ", ".join(hub_missing_preferred or missing_preferred)
241
+ raise FileNotFoundError(
242
+ "Missing expected files for requested granularity; unable to fall back to definition index. "
243
+ f"Missing: {missing_detail}"
244
+ )
245
+
246
+ def _load_index(self) -> dict[str, object]:
247
+ if self._index_cache is not None:
248
+ return self._index_cache
249
+ self.ensure_local_index()
250
+ embedding_path = self._resolve_index_path(self._embedding_filename())
251
+ base = safetensors_load(str(embedding_path))
252
+ base_embeddings = base["embeddings"]
253
+ scales = base.get("scale") if self.precision == "int8" else None
254
+ with open(self._resolve_index_path(self._index_map_filename()), "r", encoding="utf-8") as file:
255
+ identifier_map = {int(key): value for key, value in json.load(file).items()}
256
+ with open(self._resolve_index_path(self._tokens_filename()), "r", encoding="utf-8") as file:
257
+ tokens_map = json.load(file)
258
+ idf_map, default_idf = _compute_idf(tokens_map)
259
+ self._index_cache = {
260
+ "embeddings": base_embeddings,
261
+ "scales": scales,
262
+ "identifier_map": identifier_map,
263
+ "tokens_map": tokens_map,
264
+ "idf_map": idf_map,
265
+ "default_idf": default_idf,
266
+ }
267
+ return self._index_cache
268
+
269
+ def _encode_batch(self, texts: list[str]) -> np.ndarray:
270
+ encoded = self.tokenizer(texts, padding=True, truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
271
+ encoded = {key: value.to(self.device) for key, value in encoded.items()}
272
+ with (
273
+ torch.autocast(device_type=self.device.type, dtype=self.dtype)
274
+ if self.device.type == "cuda"
275
+ else torch.no_grad()
276
+ ):
277
+ output = self.model(**encoded)
278
+ if hasattr(output, "last_hidden_state"):
279
+ embeddings = output.last_hidden_state
280
+ mask = encoded["attention_mask"].unsqueeze(-1)
281
+ embeddings = (embeddings * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1e-9)
282
+ elif hasattr(output, "pooler_output"):
283
+ embeddings = output.pooler_output
284
+ else:
285
+ embeddings = output[0].mean(dim=1)
286
+ embeddings = torch.nn.functional.normalize(embeddings.float(), p=2, dim=1)
287
+ return embeddings.cpu().numpy().astype("float32")
288
+
289
+ def encode(self, texts: list[str]) -> np.ndarray:
290
+ if not texts:
291
+ return np.zeros((0, 0), dtype="float32")
292
+ output = []
293
+ for i in range(0, len(texts), BATCH_SIZE):
294
+ output.append(self._encode_batch(texts[i : i + BATCH_SIZE]))
295
+ if self.device.type == "cuda":
296
+ torch.cuda.empty_cache()
297
+ return np.vstack(output) if output else np.zeros((0, 0), dtype="float32")
298
+
299
+ def _topk(
300
+ self,
301
+ query_embedding_row: np.ndarray,
302
+ base_embeddings: np.ndarray,
303
+ scales: np.ndarray | None,
304
+ identifier_map: dict[int, str],
305
+ k: int,
306
+ pool_size: int | None = None,
307
+ ) -> list[tuple[str, float]]:
308
+ if self.precision == "int8":
309
+ if scales is None:
310
+ raise ValueError("Missing int8 scales for int8 search.")
311
+ weighted_query = (query_embedding_row * scales).astype("float32")
312
+ similarities = weighted_query @ base_embeddings.T.astype("float32")
313
+ else:
314
+ similarities = query_embedding_row @ base_embeddings.T
315
+ pool = k + 32 if pool_size is None else max(k, pool_size)
316
+ indices = np.argpartition(-similarities, pool)[:pool]
317
+ indices = indices[np.argsort(-similarities[indices])]
318
+ output = []
319
+ for match_id in indices:
320
+ identifier = identifier_map[int(match_id)]
321
+ output.append((identifier, float(similarities[match_id])))
322
+ if len(output) >= k:
323
+ break
324
+ return output
325
+
326
+ def _combine_hybrid(
327
+ self,
328
+ candidates: list[tuple[str, float]],
329
+ query_tokens: set[str],
330
+ tokens_map: dict[str, list[str]],
331
+ idf_map: dict[str, float],
332
+ default_idf: float,
333
+ k: int,
334
+ ) -> tuple[list[tuple[str, float]], dict[str, float], dict[str, float]]:
335
+ embedding_scores: dict[str, float] = {}
336
+ jaccard_scores: dict[str, float] = {}
337
+ hybrid_scores = []
338
+ for identifier, embedding_score in candidates:
339
+ tokens = set(tokens_map.get(identifier, []))
340
+ jaccard_score = _weighted_jaccard(query_tokens, tokens, idf_map, default_idf)
341
+ embedding_scores[identifier] = embedding_score
342
+ jaccard_scores[identifier] = jaccard_score
343
+ hybrid = HYBRID_ALPHA * max(0.0, embedding_score) + (1.0 - HYBRID_ALPHA) * jaccard_score
344
+ hybrid_scores.append((identifier, hybrid))
345
+ hybrid_scores.sort(key=lambda item: item[1], reverse=True)
346
+ return hybrid_scores[:k], embedding_scores, jaccard_scores
347
+
348
+ def _extract_definitions_from_code(
349
+ self,
350
+ code: str,
351
+ model_hint: str | None,
352
+ granularity: str,
353
+ ) -> tuple[dict[str, str], dict[str, str], dict[str, list[str]], dict[str, str]]:
354
+ definitions_raw: dict[str, str] = {}
355
+ definitions_sanitized: dict[str, str] = {}
356
+ definitions_tokens: dict[str, list[str]] = {}
357
+ definitions_kind: dict[str, str] = {}
358
+ lines = code.splitlines()
359
+ tree = ast.parse(code)
360
+ for node in ast.iter_child_nodes(tree):
361
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and granularity in ("definition", "method"):
362
+ segment = ast.get_source_segment(code, node)
363
+ if segment is None and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
364
+ start = max(0, node.lineno - 1)
365
+ end = node.end_lineno
366
+ segment = "\n".join(lines[start:end])
367
+ if not segment:
368
+ continue
369
+ identifier = node.name
370
+ definitions_raw[identifier] = segment
371
+ sanitized = _sanitize_for_embedding(segment, model_hint, node.name)
372
+ definitions_sanitized[identifier] = sanitized
373
+ definitions_tokens[identifier] = sorted(
374
+ set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
375
+ )
376
+ definitions_kind[identifier] = "function"
377
+ continue
378
+
379
+ if isinstance(node, ast.ClassDef):
380
+ class_segment = ast.get_source_segment(code, node)
381
+ if class_segment is None and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
382
+ start = max(0, node.lineno - 1)
383
+ end = node.end_lineno
384
+ class_segment = "\n".join(lines[start:end])
385
+ class_header = ""
386
+ if class_segment:
387
+ class_header = class_segment.splitlines()[0].strip()
388
+ class_docstring = ast.get_docstring(node)
389
+ class_context = class_header
390
+ if class_docstring:
391
+ first_line = class_docstring.strip().splitlines()[0]
392
+ class_context = f'{class_header}\n"""{first_line}"""' if class_header else first_line
393
+
394
+ if granularity == "definition":
395
+ if not class_segment:
396
+ continue
397
+ identifier = node.name
398
+ definitions_raw[identifier] = class_segment
399
+ sanitized = _sanitize_for_embedding(class_segment, model_hint, node.name)
400
+ definitions_sanitized[identifier] = sanitized
401
+ definitions_tokens[identifier] = sorted(
402
+ set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
403
+ )
404
+ definitions_kind[identifier] = "class"
405
+ continue
406
+
407
+ for child in node.body:
408
+ if not isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
409
+ continue
410
+ segment = ast.get_source_segment(code, child)
411
+ if segment is None and hasattr(child, "lineno") and hasattr(child, "end_lineno"):
412
+ start = max(0, child.lineno - 1)
413
+ end = child.end_lineno
414
+ segment = "\n".join(lines[start:end])
415
+ if not segment:
416
+ continue
417
+ method_name = child.name
418
+ combined = f"{class_context}\n{segment}" if class_context else segment
419
+ identifier = f"{node.name}.{method_name}"
420
+ definitions_raw[identifier] = segment
421
+ sanitized = _sanitize_for_embedding(combined, model_hint, node.name)
422
+ definitions_sanitized[identifier] = sanitized
423
+ definitions_tokens[identifier] = sorted(
424
+ set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
425
+ )
426
+ definitions_kind[identifier] = "method"
427
+ return definitions_raw, definitions_sanitized, definitions_tokens, definitions_kind
428
+
429
+ def analyze_code(
430
+ self,
431
+ code: str,
432
+ top_k_per_item: int = 5,
433
+ use_jaccard: bool = False,
434
+ model_hint: str | None = None,
435
+ ) -> dict[str, dict[str, object]]:
436
+ index_data = self._load_index()
437
+ base_embeddings = index_data["embeddings"]
438
+ scales = index_data["scales"]
439
+ identifier_map = index_data["identifier_map"]
440
+ tokens_map = index_data["tokens_map"]
441
+ idf_map = index_data["idf_map"]
442
+ default_idf = index_data["default_idf"]
443
+ identifiers = [identifier_map[i] for i in range(len(identifier_map))]
444
+
445
+ definitions_raw, definitions_sanitized, _, definitions_kind = self._extract_definitions_from_code(
446
+ code, model_hint, self.index_granularity
447
+ )
448
+ query_identifiers = list(definitions_raw.keys())
449
+ query_sources_sanitized = [definitions_sanitized[key] for key in query_identifiers]
450
+ query_tokens_list = [
451
+ set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", source)) for source in query_sources_sanitized
452
+ ]
453
+
454
+ query_embeddings = self.encode(query_sources_sanitized)
455
+
456
+ output = {}
457
+ for i, query_identifier in enumerate(query_identifiers):
458
+ query_name = query_identifier
459
+ pool_size = max(top_k_per_item * 5, top_k_per_item + 32)
460
+ candidates = self._topk(
461
+ query_embeddings[i],
462
+ base_embeddings,
463
+ scales,
464
+ identifier_map,
465
+ pool_size,
466
+ pool_size=pool_size,
467
+ )
468
+ embedding_top, embedding_scores, jaccard_scores = self._combine_hybrid(
469
+ candidates,
470
+ query_tokens_list[i],
471
+ tokens_map,
472
+ idf_map,
473
+ default_idf,
474
+ top_k_per_item,
475
+ )
476
+ entry: dict[str, object] = {
477
+ "kind": definitions_kind.get(query_identifier, "function"),
478
+ "embedding": [],
479
+ }
480
+ for identifier, score in embedding_top:
481
+ if ":" not in identifier:
482
+ continue
483
+ relative_path, match_name = identifier.split(":", 1)
484
+ full_path, line = _resolve_definition_location(relative_path, match_name)
485
+ entry["embedding"].append(
486
+ {
487
+ "identifier": identifier,
488
+ "relative_path": relative_path,
489
+ "match_name": match_name,
490
+ "score": score,
491
+ "embedding_score": embedding_scores.get(identifier),
492
+ "jaccard_score": jaccard_scores.get(identifier),
493
+ "full_path": full_path,
494
+ "line": line,
495
+ }
496
+ )
497
+ if use_jaccard:
498
+ entry["jaccard"] = []
499
+ output[query_name] = entry
500
+
501
+ aggregate_scores: dict[str, float] = {}
502
+ for data in output.values():
503
+ for match in data.get("embedding", []):
504
+ relative_path = match.get("relative_path")
505
+ score = match.get("score")
506
+ if relative_path is None or score is None:
507
+ continue
508
+ aggregate_scores[relative_path] = aggregate_scores.get(relative_path, 0.0) + float(score)
509
+
510
+ overall = sorted(
511
+ (
512
+ {"relative_path": relative_path, "score": score}
513
+ for relative_path, score in aggregate_scores.items()
514
+ ),
515
+ key=lambda item: item["score"],
516
+ reverse=True,
517
+ )
518
+
519
+ return {
520
+ "results": output,
521
+ "overall": overall,
522
+ }
523
+
524
+ def index_status(self) -> dict[str, object]:
525
+ return {
526
+ "requested_granularity": self.requested_granularity,
527
+ "resolved_granularity": self.index_granularity,
528
+ "precision": self.precision,
529
+ "hub_dataset": self.hub_dataset,
530
+ "index_dir": str(self.index_dir) if self.index_dir else None,
531
+ "index_origin": self.index_origin,
532
+ "missing_files": list(self.missing_files),
533
+ "embedding_model": EMBEDDING_MODEL,
534
+ }
535
+
536
+
537
+ def get_default_hub_dataset() -> str:
538
+ return os.getenv("HUB_DATASET", HUB_DATASET_DEFAULT)
app/graph.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass
6
+ class Graph:
7
+ nodes: list[dict[str, str]]
8
+ edges: list[dict[str, str]]
9
+
10
+
11
+ def _call_target_name(node: ast.AST) -> str | None:
12
+ if isinstance(node, ast.Name):
13
+ return node.id
14
+ if isinstance(node, ast.Attribute) and isinstance(node.attr, str):
15
+ return node.attr
16
+ return None
17
+
18
+
19
+ def build_graph(code: str) -> Graph:
20
+ tree = ast.parse(code)
21
+ functions = {}
22
+ classes: dict[str, list[str]] = {}
23
+
24
+ for node in tree.body:
25
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
26
+ functions[node.name] = node
27
+ elif isinstance(node, ast.ClassDef):
28
+ method_names = []
29
+ for child in node.body:
30
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
31
+ method_names.append(child.name)
32
+ classes[node.name] = method_names
33
+
34
+ nodes: list[dict[str, str]] = []
35
+ edges: list[dict[str, str]] = []
36
+
37
+ for class_name, method_names in classes.items():
38
+ nodes.append({"id": class_name, "label": class_name, "type": "class"})
39
+ for method_name in method_names:
40
+ method_id = f"{class_name}.{method_name}"
41
+ nodes.append({"id": method_id, "label": method_name, "type": "method"})
42
+ edges.append({"source": class_name, "target": method_id, "type": "contains"})
43
+
44
+ for func_name in functions:
45
+ nodes.append({"id": func_name, "label": func_name, "type": "function"})
46
+
47
+ known_nodes = {node["id"] for node in nodes}
48
+ call_edges = set()
49
+
50
+ def add_call_edge(source: str, target: str) -> None:
51
+ if source == target:
52
+ return
53
+ if target not in known_nodes:
54
+ return
55
+ call_edges.add((source, target))
56
+
57
+ for func_name, func_node in functions.items():
58
+ for call in [n for n in ast.walk(func_node) if isinstance(n, ast.Call)]:
59
+ target = _call_target_name(call.func)
60
+ if target is None:
61
+ continue
62
+ if target in functions:
63
+ add_call_edge(func_name, target)
64
+
65
+ for class_name, method_names in classes.items():
66
+ for node in tree.body:
67
+ if isinstance(node, ast.ClassDef) and node.name == class_name:
68
+ for child in node.body:
69
+ if not isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
70
+ continue
71
+ source_id = f"{class_name}.{child.name}"
72
+ for call in [n for n in ast.walk(child) if isinstance(n, ast.Call)]:
73
+ target = call.func
74
+ if isinstance(target, ast.Name):
75
+ if target.id in functions:
76
+ add_call_edge(source_id, target.id)
77
+ continue
78
+ if isinstance(target, ast.Attribute):
79
+ if isinstance(target.value, ast.Name):
80
+ if target.value.id == "self":
81
+ target_id = f"{class_name}.{target.attr}"
82
+ add_call_edge(source_id, target_id)
83
+ elif target.value.id in classes:
84
+ target_id = f"{target.value.id}.{target.attr}"
85
+ add_call_edge(source_id, target_id)
86
+
87
+ for source, target in sorted(call_edges):
88
+ edges.append({"source": source, "target": target, "type": "calls"})
89
+
90
+ return Graph(nodes=nodes, edges=edges)
app/main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from fastapi import FastAPI, HTTPException
4
+ from fastapi.responses import FileResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ from pydantic import BaseModel, Field
7
+
8
+ from app.detector import CodeSimilarityAnalyzer, get_default_hub_dataset
9
+ from app.graph import build_graph
10
+
11
+ BASE_DIR = Path(__file__).resolve().parent.parent
12
+ STATIC_DIR = BASE_DIR / "static"
13
+
14
+ app = FastAPI(title="Modular Model Graph")
15
+ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
16
+
17
+
18
+ class AnalyzeRequest(BaseModel):
19
+ code: str = Field(..., min_length=1)
20
+ top_k: int = Field(default=5, ge=1, le=25)
21
+ use_jaccard: bool = False
22
+ granularity: str = "method"
23
+ precision: str = "float32"
24
+ hub_dataset: str | None = None
25
+
26
+
27
+ _ANALYZERS: dict[tuple[str, str, str], CodeSimilarityAnalyzer] = {}
28
+
29
+
30
+ def _get_analyzer(precision: str, granularity: str, hub_dataset: str) -> CodeSimilarityAnalyzer:
31
+ key = (precision, granularity, hub_dataset)
32
+ if key in _ANALYZERS:
33
+ return _ANALYZERS[key]
34
+ analyzer = CodeSimilarityAnalyzer(
35
+ hub_dataset=hub_dataset,
36
+ precision=precision,
37
+ granularity=granularity,
38
+ )
39
+ _ANALYZERS[key] = analyzer
40
+ return analyzer
41
+
42
+
43
+ @app.get("/")
44
+ async def index() -> FileResponse:
45
+ return FileResponse(STATIC_DIR / "index.html")
46
+
47
+
48
+ @app.post("/api/analyze")
49
+ async def analyze(request: AnalyzeRequest) -> dict:
50
+ hub_dataset = request.hub_dataset or get_default_hub_dataset()
51
+ if request.granularity not in ("method", "definition"):
52
+ raise HTTPException(status_code=400, detail="granularity must be 'method' or 'definition'")
53
+ if request.precision not in ("float32", "int8"):
54
+ raise HTTPException(status_code=400, detail="precision must be 'float32' or 'int8'")
55
+ analyzer = _get_analyzer(request.precision, request.granularity, hub_dataset)
56
+ try:
57
+ graph = build_graph(request.code)
58
+ except SyntaxError as exc:
59
+ raise HTTPException(status_code=400, detail=f"Syntax error: {exc.msg} at line {exc.lineno}") from exc
60
+ results = analyzer.analyze_code(
61
+ request.code,
62
+ top_k_per_item=request.top_k,
63
+ use_jaccard=request.use_jaccard,
64
+ )
65
+ return {
66
+ "graph": {"nodes": graph.nodes, "edges": graph.edges},
67
+ "results": results["results"],
68
+ "overall": results["overall"],
69
+ "index_info": analyzer.index_status(),
70
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn==0.30.6
3
+ sentence-transformers
4
+ transformers
5
+ huggingface_hub==0.24.6
6
+ safetensors==0.4.5
7
+ numpy==1.26.4
static/app.js ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const analyzeBtn = document.getElementById("analyzeBtn");
2
+ const codeInput = document.getElementById("codeInput");
3
+ const statusEl = document.getElementById("status");
4
+ const indexInfoEl = document.getElementById("indexInfo");
5
+ const graphEl = document.getElementById("graph");
6
+ const matchesEl = document.getElementById("matches");
7
+ const overallEl = document.getElementById("overall");
8
+
9
+ function setStatus(message) {
10
+ statusEl.textContent = message;
11
+ }
12
+
13
+ function renderIndexInfo(info) {
14
+ if (!indexInfoEl) return;
15
+ if (!info) {
16
+ indexInfoEl.textContent = "";
17
+ return;
18
+ }
19
+ const requested = info.requested_granularity || "method";
20
+ const resolved = info.resolved_granularity || requested;
21
+ const origin = info.index_origin;
22
+ const dir = info.index_dir;
23
+ const pieces = [`Using ${resolved} index`];
24
+ if (requested !== resolved) {
25
+ pieces.push(`(fallback from ${requested})`);
26
+ }
27
+ if (origin === "hub") {
28
+ pieces.push("from Hub");
29
+ } else if (origin) {
30
+ pieces.push(`from ${origin}`);
31
+ }
32
+ if (dir) {
33
+ pieces.push(`@ ${dir}`);
34
+ }
35
+ indexInfoEl.textContent = pieces.join(" ");
36
+ }
37
+
38
+ function renderOverall(overall) {
39
+ overallEl.innerHTML = "";
40
+ if (!overall || overall.length === 0) {
41
+ overallEl.textContent = "No aggregate matches yet.";
42
+ return;
43
+ }
44
+ const slice = overall.slice(0, 10);
45
+ for (const entry of slice) {
46
+ const div = document.createElement("div");
47
+ div.className = "overall-item";
48
+ div.textContent = `${entry.relative_path} (${entry.score.toFixed(4)})`;
49
+ overallEl.appendChild(div);
50
+ }
51
+ }
52
+
53
+ function renderMatches(results) {
54
+ matchesEl.innerHTML = "";
55
+ const keys = Object.keys(results);
56
+ if (keys.length === 0) {
57
+ matchesEl.textContent = "No matches returned.";
58
+ return;
59
+ }
60
+ for (const symbol of keys) {
61
+ const card = document.createElement("div");
62
+ card.className = "match-card";
63
+ const title = document.createElement("h3");
64
+ title.textContent = symbol;
65
+ card.appendChild(title);
66
+ const list = document.createElement("div");
67
+ list.className = "match-list";
68
+ const matches = results[symbol].embedding || [];
69
+ for (const match of matches) {
70
+ const row = document.createElement("div");
71
+ row.className = "match-row";
72
+ const left = document.createElement("span");
73
+ left.textContent = `${match.match_name} (${match.score.toFixed(4)})`;
74
+ const right = document.createElement("span");
75
+ right.textContent = match.relative_path;
76
+ row.appendChild(left);
77
+ row.appendChild(right);
78
+ list.appendChild(row);
79
+ }
80
+ card.appendChild(list);
81
+ matchesEl.appendChild(card);
82
+ }
83
+ }
84
+
85
+ function renderGraph(graph) {
86
+ graphEl.innerHTML = "";
87
+ const width = graphEl.clientWidth;
88
+ const height = graphEl.clientHeight;
89
+ const svg = d3
90
+ .select(graphEl)
91
+ .append("svg")
92
+ .attr("width", width)
93
+ .attr("height", height);
94
+
95
+ const nodes = graph.nodes.map((node) => ({ ...node }));
96
+ const links = graph.edges.map((edge) => ({ ...edge }));
97
+
98
+ const color = (type) => {
99
+ if (type === "class") return "#d6572b";
100
+ if (type === "method") return "#2b6fd6";
101
+ if (type === "function") return "#1b8d57";
102
+ return "#666";
103
+ };
104
+
105
+ const simulation = d3
106
+ .forceSimulation(nodes)
107
+ .force("link", d3.forceLink(links).id((d) => d.id).distance(80))
108
+ .force("charge", d3.forceManyBody().strength(-220))
109
+ .force("center", d3.forceCenter(width / 2, height / 2));
110
+
111
+ const link = svg
112
+ .append("g")
113
+ .attr("stroke", "#333")
114
+ .attr("stroke-opacity", 0.4)
115
+ .selectAll("line")
116
+ .data(links)
117
+ .join("line")
118
+ .attr("stroke-width", (d) => (d.type === "contains" ? 1.5 : 1));
119
+
120
+ const node = svg
121
+ .append("g")
122
+ .attr("stroke", "#fff")
123
+ .attr("stroke-width", 1.5)
124
+ .selectAll("circle")
125
+ .data(nodes)
126
+ .join("circle")
127
+ .attr("r", (d) => (d.type === "class" ? 9 : 6))
128
+ .attr("fill", (d) => color(d.type))
129
+ .call(drag(simulation));
130
+
131
+ const labels = svg
132
+ .append("g")
133
+ .selectAll("text")
134
+ .data(nodes)
135
+ .join("text")
136
+ .text((d) => d.label)
137
+ .attr("font-size", 11)
138
+ .attr("fill", "#2b1e13")
139
+ .attr("dx", 12)
140
+ .attr("dy", 3);
141
+
142
+ node.append("title").text((d) => d.id);
143
+
144
+ simulation.on("tick", () => {
145
+ link
146
+ .attr("x1", (d) => d.source.x)
147
+ .attr("y1", (d) => d.source.y)
148
+ .attr("x2", (d) => d.target.x)
149
+ .attr("y2", (d) => d.target.y);
150
+
151
+ node.attr("cx", (d) => d.x).attr("cy", (d) => d.y);
152
+
153
+ labels.attr("x", (d) => d.x).attr("y", (d) => d.y);
154
+ });
155
+
156
+ function drag(sim) {
157
+ function dragstarted(event, d) {
158
+ if (!event.active) sim.alphaTarget(0.3).restart();
159
+ d.fx = d.x;
160
+ d.fy = d.y;
161
+ }
162
+
163
+ function dragged(event, d) {
164
+ d.fx = event.x;
165
+ d.fy = event.y;
166
+ }
167
+
168
+ function dragended(event, d) {
169
+ if (!event.active) sim.alphaTarget(0);
170
+ d.fx = null;
171
+ d.fy = null;
172
+ }
173
+
174
+ return d3.drag().on("start", dragstarted).on("drag", dragged).on("end", dragended);
175
+ }
176
+ }
177
+
178
+ analyzeBtn.addEventListener("click", async () => {
179
+ const code = codeInput.value.trim();
180
+ if (!code) {
181
+ setStatus("Paste some code first.");
182
+ return;
183
+ }
184
+ renderIndexInfo(null);
185
+ setStatus("Analyzing... this can take a bit on first run.");
186
+ analyzeBtn.disabled = true;
187
+ try {
188
+ const payload = {
189
+ code,
190
+ top_k: Number(document.getElementById("topK").value || 5),
191
+ granularity: document.getElementById("granularity").value,
192
+ use_jaccard: document.getElementById("useJaccard").checked,
193
+ precision: "float32",
194
+ };
195
+ const response = await fetch("/api/analyze", {
196
+ method: "POST",
197
+ headers: { "Content-Type": "application/json" },
198
+ body: JSON.stringify(payload),
199
+ });
200
+ if (!response.ok) {
201
+ const detail = await response.text();
202
+ throw new Error(detail || "Request failed");
203
+ }
204
+ const data = await response.json();
205
+ renderGraph(data.graph);
206
+ renderOverall(data.overall);
207
+ renderMatches(data.results);
208
+ renderIndexInfo(data.index_info);
209
+ setStatus("Done.");
210
+ } catch (error) {
211
+ setStatus(`Error: ${error.message || error}`);
212
+ } finally {
213
+ analyzeBtn.disabled = false;
214
+ }
215
+ });
static/index.html ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>Modular Model Graph</title>
7
+ <link rel="stylesheet" href="/static/styles.css" />
8
+ <script src="https://d3js.org/d3.v7.min.js"></script>
9
+ </head>
10
+ <body>
11
+ <div class="page">
12
+ <header class="hero">
13
+ <div>
14
+ <p class="eyebrow">Transformers similarity explorer</p>
15
+ <h1>Modular Model Graph</h1>
16
+ <p class="subhead">
17
+ Paste a modeling file, visualize its structure, and compare against Transformers models.
18
+ </p>
19
+ </div>
20
+ </header>
21
+
22
+ <section class="panel">
23
+ <div class="panel-header">
24
+ <h2>Input</h2>
25
+ <div class="controls">
26
+ <label>
27
+ Top K
28
+ <input id="topK" type="number" value="5" min="1" max="25" />
29
+ </label>
30
+ <label>
31
+ Granularity
32
+ <select id="granularity">
33
+ <option value="method" selected>method</option>
34
+ <option value="definition">definition</option>
35
+ </select>
36
+ </label>
37
+ <label class="checkbox">
38
+ <input id="useJaccard" type="checkbox" />
39
+ Use Jaccard
40
+ </label>
41
+ <button id="analyzeBtn">Analyze</button>
42
+ </div>
43
+ </div>
44
+ <textarea id="codeInput" placeholder="Paste modeling file code here..."></textarea>
45
+ <p id="status" class="status"></p>
46
+ <p id="indexInfo" class="status"></p>
47
+ </section>
48
+
49
+ <section class="grid">
50
+ <div class="panel">
51
+ <div class="panel-header">
52
+ <h2>Graph</h2>
53
+ <div class="legend">
54
+ <span class="dot class">Class</span>
55
+ <span class="dot method">Method</span>
56
+ <span class="dot function">Function</span>
57
+ <span class="dot call">Call edge</span>
58
+ </div>
59
+ </div>
60
+ <div id="graph" class="graph"></div>
61
+ </div>
62
+
63
+ <div class="panel">
64
+ <div class="panel-header">
65
+ <h2>Closest Models</h2>
66
+ </div>
67
+ <div id="overall" class="overall"></div>
68
+ </div>
69
+ </section>
70
+
71
+ <section class="panel">
72
+ <div class="panel-header">
73
+ <h2>Matches by Symbol</h2>
74
+ </div>
75
+ <div id="matches" class="matches"></div>
76
+ </section>
77
+ </div>
78
+
79
+ <script src="/static/app.js"></script>
80
+ </body>
81
+ </html>
static/styles.css ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url("https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&display=swap");
2
+
3
+ :root {
4
+ --bg: #f6f0e6;
5
+ --panel: #fff7ee;
6
+ --ink: #1b1b1b;
7
+ --muted: #6b5f55;
8
+ --accent: #d6572b;
9
+ --accent-2: #2b6fd6;
10
+ --accent-3: #1b8d57;
11
+ --shadow: rgba(27, 27, 27, 0.1);
12
+ }
13
+
14
+ * {
15
+ box-sizing: border-box;
16
+ }
17
+
18
+ body {
19
+ margin: 0;
20
+ font-family: "Space Grotesk", system-ui, sans-serif;
21
+ color: var(--ink);
22
+ background: radial-gradient(circle at 20% 20%, #ffe4c7 0%, transparent 55%),
23
+ radial-gradient(circle at 85% 15%, #f5d2e8 0%, transparent 40%),
24
+ radial-gradient(circle at 70% 80%, #d8f0e2 0%, transparent 45%),
25
+ var(--bg);
26
+ }
27
+
28
+ .page {
29
+ max-width: 1200px;
30
+ margin: 0 auto;
31
+ padding: 32px 24px 64px;
32
+ }
33
+
34
+ .hero {
35
+ display: flex;
36
+ justify-content: space-between;
37
+ align-items: flex-end;
38
+ gap: 24px;
39
+ margin-bottom: 24px;
40
+ }
41
+
42
+ .eyebrow {
43
+ text-transform: uppercase;
44
+ letter-spacing: 0.12em;
45
+ font-size: 12px;
46
+ margin: 0 0 8px;
47
+ color: var(--muted);
48
+ }
49
+
50
+ h1 {
51
+ font-size: 40px;
52
+ margin: 0 0 12px;
53
+ }
54
+
55
+ .subhead {
56
+ font-size: 16px;
57
+ max-width: 640px;
58
+ margin: 0;
59
+ color: var(--muted);
60
+ }
61
+
62
+ .panel {
63
+ background: var(--panel);
64
+ border-radius: 20px;
65
+ padding: 20px;
66
+ box-shadow: 0 14px 30px var(--shadow);
67
+ margin-bottom: 24px;
68
+ }
69
+
70
+ .panel-header {
71
+ display: flex;
72
+ justify-content: space-between;
73
+ align-items: center;
74
+ flex-wrap: wrap;
75
+ gap: 12px;
76
+ }
77
+
78
+ h2 {
79
+ margin: 0;
80
+ font-size: 20px;
81
+ }
82
+
83
+ .controls {
84
+ display: flex;
85
+ align-items: center;
86
+ gap: 12px;
87
+ flex-wrap: wrap;
88
+ }
89
+
90
+ .controls label {
91
+ display: flex;
92
+ gap: 8px;
93
+ align-items: center;
94
+ font-size: 14px;
95
+ color: var(--muted);
96
+ }
97
+
98
+ .controls input,
99
+ .controls select {
100
+ border: 1px solid #d9cbbd;
101
+ border-radius: 10px;
102
+ padding: 6px 8px;
103
+ background: #fff;
104
+ font-size: 14px;
105
+ }
106
+
107
+ .controls button {
108
+ background: var(--accent);
109
+ color: #fff;
110
+ border: none;
111
+ border-radius: 12px;
112
+ padding: 8px 16px;
113
+ font-weight: 600;
114
+ cursor: pointer;
115
+ transition: transform 0.2s ease;
116
+ }
117
+
118
+ .controls button:hover {
119
+ transform: translateY(-1px);
120
+ }
121
+
122
+ textarea {
123
+ width: 100%;
124
+ min-height: 240px;
125
+ margin-top: 16px;
126
+ border-radius: 16px;
127
+ border: 1px solid #d9cbbd;
128
+ padding: 16px;
129
+ font-family: "Space Grotesk", monospace;
130
+ background: #fff;
131
+ resize: vertical;
132
+ }
133
+
134
+ .status {
135
+ margin-top: 8px;
136
+ color: var(--muted);
137
+ }
138
+
139
+ .grid {
140
+ display: grid;
141
+ grid-template-columns: minmax(0, 2fr) minmax(0, 1fr);
142
+ gap: 24px;
143
+ }
144
+
145
+ .graph {
146
+ width: 100%;
147
+ height: 480px;
148
+ border-radius: 18px;
149
+ background: #fff;
150
+ border: 1px solid #e3d6c8;
151
+ }
152
+
153
+ .legend {
154
+ display: flex;
155
+ gap: 10px;
156
+ align-items: center;
157
+ flex-wrap: wrap;
158
+ font-size: 12px;
159
+ color: var(--muted);
160
+ }
161
+
162
+ .dot {
163
+ display: inline-flex;
164
+ align-items: center;
165
+ gap: 6px;
166
+ }
167
+
168
+ .dot::before {
169
+ content: "";
170
+ width: 10px;
171
+ height: 10px;
172
+ border-radius: 50%;
173
+ background: var(--muted);
174
+ }
175
+
176
+ .dot.class::before {
177
+ background: var(--accent);
178
+ }
179
+
180
+ .dot.method::before {
181
+ background: var(--accent-2);
182
+ }
183
+
184
+ .dot.function::before {
185
+ background: var(--accent-3);
186
+ }
187
+
188
+ .dot.call::before {
189
+ background: #333;
190
+ }
191
+
192
+ .overall {
193
+ display: flex;
194
+ flex-direction: column;
195
+ gap: 12px;
196
+ margin-top: 12px;
197
+ }
198
+
199
+ .overall-item {
200
+ padding: 10px 12px;
201
+ background: #fff;
202
+ border-radius: 12px;
203
+ border: 1px solid #eadccd;
204
+ }
205
+
206
+ .matches {
207
+ display: grid;
208
+ gap: 16px;
209
+ margin-top: 16px;
210
+ }
211
+
212
+ .match-card {
213
+ background: #fff;
214
+ border-radius: 16px;
215
+ border: 1px solid #eadccd;
216
+ padding: 16px;
217
+ }
218
+
219
+ .match-card h3 {
220
+ margin: 0 0 8px;
221
+ font-size: 16px;
222
+ }
223
+
224
+ .match-list {
225
+ display: grid;
226
+ gap: 6px;
227
+ font-size: 14px;
228
+ color: var(--muted);
229
+ }
230
+
231
+ .match-row {
232
+ display: flex;
233
+ justify-content: space-between;
234
+ gap: 12px;
235
+ }
236
+
237
+ @media (max-width: 960px) {
238
+ .grid {
239
+ grid-template-columns: 1fr;
240
+ }
241
+ }