Spaces:
Sleeping
Sleeping
Add FastAPI app + static UI
Browse files- Dockerfile +18 -0
- app/__pycache__/detector.cpython-312.pyc +0 -0
- app/__pycache__/graph.cpython-312.pyc +0 -0
- app/__pycache__/main.cpython-312.pyc +0 -0
- app/detector.py +538 -0
- app/graph.py +90 -0
- app/main.py +70 -0
- requirements.txt +7 -0
- static/app.js +215 -0
- static/index.html +81 -0
- static/styles.css +241 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
+
PYTHONUNBUFFERED=1 \
|
| 7 |
+
HF_HUB_DISABLE_PROGRESS_BARS=1 \
|
| 8 |
+
TRANSFORMERS_VERBOSITY=error
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt /app/requirements.txt
|
| 11 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY app /app/app
|
| 14 |
+
COPY static /app/static
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/__pycache__/detector.cpython-312.pyc
ADDED
|
Binary file (24.1 kB). View file
|
|
|
app/__pycache__/graph.cpython-312.pyc
ADDED
|
Binary file (5.11 kB). View file
|
|
|
app/__pycache__/main.cpython-312.pyc
ADDED
|
Binary file (3.77 kB). View file
|
|
|
app/detector.py
ADDED
|
@@ -0,0 +1,538 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import json
|
| 3 |
+
import math
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from functools import cache
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
from huggingface_hub import snapshot_download
|
| 12 |
+
from safetensors.numpy import load_file as safetensors_load
|
| 13 |
+
import torch
|
| 14 |
+
from transformers import AutoModel, AutoTokenizer
|
| 15 |
+
|
| 16 |
+
import transformers
|
| 17 |
+
|
| 18 |
+
MODELS_ROOT = Path(transformers.__file__).resolve().parent / "models"
|
| 19 |
+
|
| 20 |
+
EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-4B"
|
| 21 |
+
BATCH_SIZE = 16
|
| 22 |
+
MAX_LENGTH = 4096
|
| 23 |
+
HYBRID_ALPHA = 0.7
|
| 24 |
+
HUB_DATASET_DEFAULT = "hf-internal-testing/transformers_code_embeddings"
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class Match:
|
| 29 |
+
identifier: str
|
| 30 |
+
relative_path: str
|
| 31 |
+
match_name: str
|
| 32 |
+
score: float
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _sanitize_for_embedding(code: str, model_hint: str | None, symbol_hint: str | None) -> str:
|
| 36 |
+
base = "\n".join(
|
| 37 |
+
line
|
| 38 |
+
for line in re.sub(r"#.*", "", re.sub(r'(\"\"\"|\'\'\')(?:.|\n)*?\1', "", code)).splitlines()
|
| 39 |
+
if not re.match(r"\s*(from|import)\s+", line)
|
| 40 |
+
)
|
| 41 |
+
variants = set()
|
| 42 |
+
if model_hint:
|
| 43 |
+
variants.add(model_hint)
|
| 44 |
+
variants.add(model_hint.replace("_", ""))
|
| 45 |
+
variants.add(re.sub(r"\d+", "", model_hint))
|
| 46 |
+
if symbol_hint:
|
| 47 |
+
match = re.match(r"^([A-Z][a-z0-9]+)", symbol_hint) or re.match(r"^([A-Za-z0-9]+)", symbol_hint)
|
| 48 |
+
prefix = match.group(1) if match else ""
|
| 49 |
+
if prefix:
|
| 50 |
+
variants.add(prefix)
|
| 51 |
+
variants.add(prefix.replace("_", ""))
|
| 52 |
+
variants.add(re.sub(r"\d+", "", prefix))
|
| 53 |
+
variants |= {variant.lower() for variant in list(variants)}
|
| 54 |
+
sanitized = base
|
| 55 |
+
for variant in sorted({x for x in variants if len(x) >= 3}, key=len, reverse=True):
|
| 56 |
+
sanitized = re.sub(re.escape(variant), "Model", sanitized, flags=re.IGNORECASE)
|
| 57 |
+
return sanitized
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _compute_idf(tokens_map: dict[str, list[str]]) -> tuple[dict[str, float], float]:
|
| 61 |
+
doc_count = len(tokens_map)
|
| 62 |
+
if doc_count == 0:
|
| 63 |
+
return {}, 1.0
|
| 64 |
+
df: dict[str, int] = {}
|
| 65 |
+
for tokens in tokens_map.values():
|
| 66 |
+
for token in set(tokens):
|
| 67 |
+
df[token] = df.get(token, 0) + 1
|
| 68 |
+
idf = {token: math.log((doc_count + 1) / (count + 1)) + 1.0 for token, count in df.items()}
|
| 69 |
+
default_idf = math.log((doc_count + 1) / 1) + 1.0
|
| 70 |
+
return idf, default_idf
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _weighted_jaccard(
|
| 74 |
+
query_tokens: set[str], candidate_tokens: set[str], idf_map: dict[str, float], default_idf: float
|
| 75 |
+
) -> float:
|
| 76 |
+
if not query_tokens or not candidate_tokens:
|
| 77 |
+
return 0.0
|
| 78 |
+
intersection = query_tokens & candidate_tokens
|
| 79 |
+
if not intersection:
|
| 80 |
+
return 0.0
|
| 81 |
+
union = query_tokens | candidate_tokens
|
| 82 |
+
union_weight = sum(idf_map.get(token, default_idf) for token in union)
|
| 83 |
+
if union_weight <= 0:
|
| 84 |
+
return 0.0
|
| 85 |
+
intersection_weight = sum(idf_map.get(token, default_idf) for token in intersection)
|
| 86 |
+
return intersection_weight / union_weight
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
@cache
|
| 90 |
+
def _load_definition_line_map(relative_path: str) -> dict[str, int]:
|
| 91 |
+
file_path = MODELS_ROOT / relative_path
|
| 92 |
+
try:
|
| 93 |
+
source = file_path.read_text(encoding="utf-8")
|
| 94 |
+
except (FileNotFoundError, OSError):
|
| 95 |
+
return {}
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
tree = ast.parse(source)
|
| 99 |
+
except SyntaxError:
|
| 100 |
+
return {}
|
| 101 |
+
|
| 102 |
+
line_map: dict[str, int] = {}
|
| 103 |
+
for node in ast.iter_child_nodes(tree):
|
| 104 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
| 105 |
+
line_map[node.name] = getattr(node, "lineno", None) or 1
|
| 106 |
+
if isinstance(node, ast.ClassDef):
|
| 107 |
+
for child in node.body:
|
| 108 |
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 109 |
+
key = f"{node.name}.{child.name}"
|
| 110 |
+
line_map[key] = getattr(child, "lineno", None) or 1
|
| 111 |
+
return line_map
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _resolve_definition_location(relative_path: str, definition: str) -> tuple[str, int | None]:
|
| 115 |
+
full_path = (MODELS_ROOT / relative_path).resolve()
|
| 116 |
+
line = _load_definition_line_map(relative_path).get(definition)
|
| 117 |
+
return str(full_path), line
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class CodeSimilarityAnalyzer:
|
| 121 |
+
def __init__(self, hub_dataset: str, precision: str = "float32", granularity: str = "method"):
|
| 122 |
+
self.hub_dataset = hub_dataset
|
| 123 |
+
self.precision = precision
|
| 124 |
+
self.requested_granularity = granularity
|
| 125 |
+
self.index_granularity = granularity
|
| 126 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 127 |
+
self.dtype = torch.float16 if self.device.type == "cuda" else torch.float32
|
| 128 |
+
self.tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL, trust_remote_code=True)
|
| 129 |
+
self.model = AutoModel.from_pretrained(
|
| 130 |
+
EMBEDDING_MODEL, trust_remote_code=True, torch_dtype=self.dtype, device_map=None
|
| 131 |
+
).to(self.device)
|
| 132 |
+
self.model.eval()
|
| 133 |
+
self.index_dir: Path | None = None
|
| 134 |
+
self.index_origin: str | None = None
|
| 135 |
+
self.missing_files: tuple[str, ...] = ()
|
| 136 |
+
self._index_cache: dict[str, object] | None = None
|
| 137 |
+
|
| 138 |
+
def _embedding_filename(self, granularity: str | None = None) -> str:
|
| 139 |
+
granularity = granularity or self.index_granularity
|
| 140 |
+
suffix = ""
|
| 141 |
+
if granularity == "method":
|
| 142 |
+
suffix += "_methods"
|
| 143 |
+
if self.precision == "int8":
|
| 144 |
+
suffix += "_int8"
|
| 145 |
+
if not suffix:
|
| 146 |
+
return "embeddings.safetensors"
|
| 147 |
+
return f"embeddings{suffix}.safetensors"
|
| 148 |
+
|
| 149 |
+
def _index_map_filename(self, granularity: str | None = None) -> str:
|
| 150 |
+
granularity = granularity or self.index_granularity
|
| 151 |
+
if granularity == "method":
|
| 152 |
+
return "code_index_map_methods.json"
|
| 153 |
+
return "code_index_map.json"
|
| 154 |
+
|
| 155 |
+
def _tokens_filename(self, granularity: str | None = None) -> str:
|
| 156 |
+
granularity = granularity or self.index_granularity
|
| 157 |
+
if granularity == "method":
|
| 158 |
+
return "code_index_tokens_methods.json"
|
| 159 |
+
return "code_index_tokens.json"
|
| 160 |
+
|
| 161 |
+
def _resolve_index_path(self, filename: str) -> Path:
|
| 162 |
+
if self.index_dir is None:
|
| 163 |
+
return Path(filename)
|
| 164 |
+
return self.index_dir / filename
|
| 165 |
+
|
| 166 |
+
def _required_index_files(self, granularity: str | None = None) -> tuple[str, ...]:
|
| 167 |
+
return (
|
| 168 |
+
self._embedding_filename(granularity),
|
| 169 |
+
self._index_map_filename(granularity),
|
| 170 |
+
self._tokens_filename(granularity),
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
def ensure_local_index(self) -> None:
|
| 174 |
+
required_files = self._required_index_files(self.requested_granularity)
|
| 175 |
+
if self.index_dir is not None and all((self.index_dir / fname).exists() for fname in required_files):
|
| 176 |
+
return
|
| 177 |
+
|
| 178 |
+
def missing_files(directory: Path, granularity: str) -> list[str]:
|
| 179 |
+
return [fname for fname in self._required_index_files(granularity) if not (directory / fname).exists()]
|
| 180 |
+
|
| 181 |
+
candidates: list[tuple[str, Path]] = []
|
| 182 |
+
env_dir = os.getenv("INDEX_DIR")
|
| 183 |
+
if env_dir:
|
| 184 |
+
candidates.append(("env", Path(env_dir)))
|
| 185 |
+
candidates.append(("cwd", Path.cwd()))
|
| 186 |
+
candidates.append(("repo", Path(__file__).resolve().parent.parent))
|
| 187 |
+
|
| 188 |
+
missing_preferred: list[str] = []
|
| 189 |
+
for origin, candidate in candidates:
|
| 190 |
+
missing_preferred = missing_files(candidate, self.requested_granularity)
|
| 191 |
+
if not missing_preferred:
|
| 192 |
+
self.index_dir = candidate
|
| 193 |
+
self.index_origin = origin
|
| 194 |
+
self.index_granularity = self.requested_granularity
|
| 195 |
+
self.missing_files = ()
|
| 196 |
+
self._index_cache = None
|
| 197 |
+
return
|
| 198 |
+
|
| 199 |
+
fallback_dir: Path | None = None
|
| 200 |
+
fallback_origin: str | None = None
|
| 201 |
+
fallback_missing: list[str] = []
|
| 202 |
+
if self.requested_granularity == "method":
|
| 203 |
+
for origin, candidate in candidates:
|
| 204 |
+
fallback_missing = missing_files(candidate, "definition")
|
| 205 |
+
if not fallback_missing:
|
| 206 |
+
fallback_dir = candidate
|
| 207 |
+
fallback_origin = origin
|
| 208 |
+
break
|
| 209 |
+
|
| 210 |
+
snapshot_dir = Path(snapshot_download(repo_id=self.hub_dataset, repo_type="dataset"))
|
| 211 |
+
hub_missing_preferred = missing_files(snapshot_dir, self.requested_granularity)
|
| 212 |
+
hub_missing_fallback: list[str] = []
|
| 213 |
+
if self.requested_granularity == "method":
|
| 214 |
+
hub_missing_fallback = missing_files(snapshot_dir, "definition")
|
| 215 |
+
|
| 216 |
+
if not hub_missing_preferred:
|
| 217 |
+
self.index_dir = snapshot_dir
|
| 218 |
+
self.index_origin = "hub"
|
| 219 |
+
self.index_granularity = self.requested_granularity
|
| 220 |
+
self.missing_files = ()
|
| 221 |
+
self._index_cache = None
|
| 222 |
+
return
|
| 223 |
+
|
| 224 |
+
if self.requested_granularity == "method" and not hub_missing_fallback:
|
| 225 |
+
self.index_dir = snapshot_dir
|
| 226 |
+
self.index_origin = "hub"
|
| 227 |
+
self.index_granularity = "definition"
|
| 228 |
+
self.missing_files = tuple(hub_missing_preferred)
|
| 229 |
+
self._index_cache = None
|
| 230 |
+
return
|
| 231 |
+
|
| 232 |
+
if fallback_dir is not None:
|
| 233 |
+
self.index_dir = fallback_dir
|
| 234 |
+
self.index_origin = fallback_origin
|
| 235 |
+
self.index_granularity = "definition"
|
| 236 |
+
self.missing_files = tuple(missing_preferred)
|
| 237 |
+
self._index_cache = None
|
| 238 |
+
return
|
| 239 |
+
|
| 240 |
+
missing_detail = ", ".join(hub_missing_preferred or missing_preferred)
|
| 241 |
+
raise FileNotFoundError(
|
| 242 |
+
"Missing expected files for requested granularity; unable to fall back to definition index. "
|
| 243 |
+
f"Missing: {missing_detail}"
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
def _load_index(self) -> dict[str, object]:
|
| 247 |
+
if self._index_cache is not None:
|
| 248 |
+
return self._index_cache
|
| 249 |
+
self.ensure_local_index()
|
| 250 |
+
embedding_path = self._resolve_index_path(self._embedding_filename())
|
| 251 |
+
base = safetensors_load(str(embedding_path))
|
| 252 |
+
base_embeddings = base["embeddings"]
|
| 253 |
+
scales = base.get("scale") if self.precision == "int8" else None
|
| 254 |
+
with open(self._resolve_index_path(self._index_map_filename()), "r", encoding="utf-8") as file:
|
| 255 |
+
identifier_map = {int(key): value for key, value in json.load(file).items()}
|
| 256 |
+
with open(self._resolve_index_path(self._tokens_filename()), "r", encoding="utf-8") as file:
|
| 257 |
+
tokens_map = json.load(file)
|
| 258 |
+
idf_map, default_idf = _compute_idf(tokens_map)
|
| 259 |
+
self._index_cache = {
|
| 260 |
+
"embeddings": base_embeddings,
|
| 261 |
+
"scales": scales,
|
| 262 |
+
"identifier_map": identifier_map,
|
| 263 |
+
"tokens_map": tokens_map,
|
| 264 |
+
"idf_map": idf_map,
|
| 265 |
+
"default_idf": default_idf,
|
| 266 |
+
}
|
| 267 |
+
return self._index_cache
|
| 268 |
+
|
| 269 |
+
def _encode_batch(self, texts: list[str]) -> np.ndarray:
|
| 270 |
+
encoded = self.tokenizer(texts, padding=True, truncation=True, max_length=MAX_LENGTH, return_tensors="pt")
|
| 271 |
+
encoded = {key: value.to(self.device) for key, value in encoded.items()}
|
| 272 |
+
with (
|
| 273 |
+
torch.autocast(device_type=self.device.type, dtype=self.dtype)
|
| 274 |
+
if self.device.type == "cuda"
|
| 275 |
+
else torch.no_grad()
|
| 276 |
+
):
|
| 277 |
+
output = self.model(**encoded)
|
| 278 |
+
if hasattr(output, "last_hidden_state"):
|
| 279 |
+
embeddings = output.last_hidden_state
|
| 280 |
+
mask = encoded["attention_mask"].unsqueeze(-1)
|
| 281 |
+
embeddings = (embeddings * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1e-9)
|
| 282 |
+
elif hasattr(output, "pooler_output"):
|
| 283 |
+
embeddings = output.pooler_output
|
| 284 |
+
else:
|
| 285 |
+
embeddings = output[0].mean(dim=1)
|
| 286 |
+
embeddings = torch.nn.functional.normalize(embeddings.float(), p=2, dim=1)
|
| 287 |
+
return embeddings.cpu().numpy().astype("float32")
|
| 288 |
+
|
| 289 |
+
def encode(self, texts: list[str]) -> np.ndarray:
|
| 290 |
+
if not texts:
|
| 291 |
+
return np.zeros((0, 0), dtype="float32")
|
| 292 |
+
output = []
|
| 293 |
+
for i in range(0, len(texts), BATCH_SIZE):
|
| 294 |
+
output.append(self._encode_batch(texts[i : i + BATCH_SIZE]))
|
| 295 |
+
if self.device.type == "cuda":
|
| 296 |
+
torch.cuda.empty_cache()
|
| 297 |
+
return np.vstack(output) if output else np.zeros((0, 0), dtype="float32")
|
| 298 |
+
|
| 299 |
+
def _topk(
|
| 300 |
+
self,
|
| 301 |
+
query_embedding_row: np.ndarray,
|
| 302 |
+
base_embeddings: np.ndarray,
|
| 303 |
+
scales: np.ndarray | None,
|
| 304 |
+
identifier_map: dict[int, str],
|
| 305 |
+
k: int,
|
| 306 |
+
pool_size: int | None = None,
|
| 307 |
+
) -> list[tuple[str, float]]:
|
| 308 |
+
if self.precision == "int8":
|
| 309 |
+
if scales is None:
|
| 310 |
+
raise ValueError("Missing int8 scales for int8 search.")
|
| 311 |
+
weighted_query = (query_embedding_row * scales).astype("float32")
|
| 312 |
+
similarities = weighted_query @ base_embeddings.T.astype("float32")
|
| 313 |
+
else:
|
| 314 |
+
similarities = query_embedding_row @ base_embeddings.T
|
| 315 |
+
pool = k + 32 if pool_size is None else max(k, pool_size)
|
| 316 |
+
indices = np.argpartition(-similarities, pool)[:pool]
|
| 317 |
+
indices = indices[np.argsort(-similarities[indices])]
|
| 318 |
+
output = []
|
| 319 |
+
for match_id in indices:
|
| 320 |
+
identifier = identifier_map[int(match_id)]
|
| 321 |
+
output.append((identifier, float(similarities[match_id])))
|
| 322 |
+
if len(output) >= k:
|
| 323 |
+
break
|
| 324 |
+
return output
|
| 325 |
+
|
| 326 |
+
def _combine_hybrid(
|
| 327 |
+
self,
|
| 328 |
+
candidates: list[tuple[str, float]],
|
| 329 |
+
query_tokens: set[str],
|
| 330 |
+
tokens_map: dict[str, list[str]],
|
| 331 |
+
idf_map: dict[str, float],
|
| 332 |
+
default_idf: float,
|
| 333 |
+
k: int,
|
| 334 |
+
) -> tuple[list[tuple[str, float]], dict[str, float], dict[str, float]]:
|
| 335 |
+
embedding_scores: dict[str, float] = {}
|
| 336 |
+
jaccard_scores: dict[str, float] = {}
|
| 337 |
+
hybrid_scores = []
|
| 338 |
+
for identifier, embedding_score in candidates:
|
| 339 |
+
tokens = set(tokens_map.get(identifier, []))
|
| 340 |
+
jaccard_score = _weighted_jaccard(query_tokens, tokens, idf_map, default_idf)
|
| 341 |
+
embedding_scores[identifier] = embedding_score
|
| 342 |
+
jaccard_scores[identifier] = jaccard_score
|
| 343 |
+
hybrid = HYBRID_ALPHA * max(0.0, embedding_score) + (1.0 - HYBRID_ALPHA) * jaccard_score
|
| 344 |
+
hybrid_scores.append((identifier, hybrid))
|
| 345 |
+
hybrid_scores.sort(key=lambda item: item[1], reverse=True)
|
| 346 |
+
return hybrid_scores[:k], embedding_scores, jaccard_scores
|
| 347 |
+
|
| 348 |
+
def _extract_definitions_from_code(
|
| 349 |
+
self,
|
| 350 |
+
code: str,
|
| 351 |
+
model_hint: str | None,
|
| 352 |
+
granularity: str,
|
| 353 |
+
) -> tuple[dict[str, str], dict[str, str], dict[str, list[str]], dict[str, str]]:
|
| 354 |
+
definitions_raw: dict[str, str] = {}
|
| 355 |
+
definitions_sanitized: dict[str, str] = {}
|
| 356 |
+
definitions_tokens: dict[str, list[str]] = {}
|
| 357 |
+
definitions_kind: dict[str, str] = {}
|
| 358 |
+
lines = code.splitlines()
|
| 359 |
+
tree = ast.parse(code)
|
| 360 |
+
for node in ast.iter_child_nodes(tree):
|
| 361 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and granularity in ("definition", "method"):
|
| 362 |
+
segment = ast.get_source_segment(code, node)
|
| 363 |
+
if segment is None and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
|
| 364 |
+
start = max(0, node.lineno - 1)
|
| 365 |
+
end = node.end_lineno
|
| 366 |
+
segment = "\n".join(lines[start:end])
|
| 367 |
+
if not segment:
|
| 368 |
+
continue
|
| 369 |
+
identifier = node.name
|
| 370 |
+
definitions_raw[identifier] = segment
|
| 371 |
+
sanitized = _sanitize_for_embedding(segment, model_hint, node.name)
|
| 372 |
+
definitions_sanitized[identifier] = sanitized
|
| 373 |
+
definitions_tokens[identifier] = sorted(
|
| 374 |
+
set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
|
| 375 |
+
)
|
| 376 |
+
definitions_kind[identifier] = "function"
|
| 377 |
+
continue
|
| 378 |
+
|
| 379 |
+
if isinstance(node, ast.ClassDef):
|
| 380 |
+
class_segment = ast.get_source_segment(code, node)
|
| 381 |
+
if class_segment is None and hasattr(node, "lineno") and hasattr(node, "end_lineno"):
|
| 382 |
+
start = max(0, node.lineno - 1)
|
| 383 |
+
end = node.end_lineno
|
| 384 |
+
class_segment = "\n".join(lines[start:end])
|
| 385 |
+
class_header = ""
|
| 386 |
+
if class_segment:
|
| 387 |
+
class_header = class_segment.splitlines()[0].strip()
|
| 388 |
+
class_docstring = ast.get_docstring(node)
|
| 389 |
+
class_context = class_header
|
| 390 |
+
if class_docstring:
|
| 391 |
+
first_line = class_docstring.strip().splitlines()[0]
|
| 392 |
+
class_context = f'{class_header}\n"""{first_line}"""' if class_header else first_line
|
| 393 |
+
|
| 394 |
+
if granularity == "definition":
|
| 395 |
+
if not class_segment:
|
| 396 |
+
continue
|
| 397 |
+
identifier = node.name
|
| 398 |
+
definitions_raw[identifier] = class_segment
|
| 399 |
+
sanitized = _sanitize_for_embedding(class_segment, model_hint, node.name)
|
| 400 |
+
definitions_sanitized[identifier] = sanitized
|
| 401 |
+
definitions_tokens[identifier] = sorted(
|
| 402 |
+
set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
|
| 403 |
+
)
|
| 404 |
+
definitions_kind[identifier] = "class"
|
| 405 |
+
continue
|
| 406 |
+
|
| 407 |
+
for child in node.body:
|
| 408 |
+
if not isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 409 |
+
continue
|
| 410 |
+
segment = ast.get_source_segment(code, child)
|
| 411 |
+
if segment is None and hasattr(child, "lineno") and hasattr(child, "end_lineno"):
|
| 412 |
+
start = max(0, child.lineno - 1)
|
| 413 |
+
end = child.end_lineno
|
| 414 |
+
segment = "\n".join(lines[start:end])
|
| 415 |
+
if not segment:
|
| 416 |
+
continue
|
| 417 |
+
method_name = child.name
|
| 418 |
+
combined = f"{class_context}\n{segment}" if class_context else segment
|
| 419 |
+
identifier = f"{node.name}.{method_name}"
|
| 420 |
+
definitions_raw[identifier] = segment
|
| 421 |
+
sanitized = _sanitize_for_embedding(combined, model_hint, node.name)
|
| 422 |
+
definitions_sanitized[identifier] = sanitized
|
| 423 |
+
definitions_tokens[identifier] = sorted(
|
| 424 |
+
set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", sanitized))
|
| 425 |
+
)
|
| 426 |
+
definitions_kind[identifier] = "method"
|
| 427 |
+
return definitions_raw, definitions_sanitized, definitions_tokens, definitions_kind
|
| 428 |
+
|
| 429 |
+
def analyze_code(
|
| 430 |
+
self,
|
| 431 |
+
code: str,
|
| 432 |
+
top_k_per_item: int = 5,
|
| 433 |
+
use_jaccard: bool = False,
|
| 434 |
+
model_hint: str | None = None,
|
| 435 |
+
) -> dict[str, dict[str, object]]:
|
| 436 |
+
index_data = self._load_index()
|
| 437 |
+
base_embeddings = index_data["embeddings"]
|
| 438 |
+
scales = index_data["scales"]
|
| 439 |
+
identifier_map = index_data["identifier_map"]
|
| 440 |
+
tokens_map = index_data["tokens_map"]
|
| 441 |
+
idf_map = index_data["idf_map"]
|
| 442 |
+
default_idf = index_data["default_idf"]
|
| 443 |
+
identifiers = [identifier_map[i] for i in range(len(identifier_map))]
|
| 444 |
+
|
| 445 |
+
definitions_raw, definitions_sanitized, _, definitions_kind = self._extract_definitions_from_code(
|
| 446 |
+
code, model_hint, self.index_granularity
|
| 447 |
+
)
|
| 448 |
+
query_identifiers = list(definitions_raw.keys())
|
| 449 |
+
query_sources_sanitized = [definitions_sanitized[key] for key in query_identifiers]
|
| 450 |
+
query_tokens_list = [
|
| 451 |
+
set(re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b", source)) for source in query_sources_sanitized
|
| 452 |
+
]
|
| 453 |
+
|
| 454 |
+
query_embeddings = self.encode(query_sources_sanitized)
|
| 455 |
+
|
| 456 |
+
output = {}
|
| 457 |
+
for i, query_identifier in enumerate(query_identifiers):
|
| 458 |
+
query_name = query_identifier
|
| 459 |
+
pool_size = max(top_k_per_item * 5, top_k_per_item + 32)
|
| 460 |
+
candidates = self._topk(
|
| 461 |
+
query_embeddings[i],
|
| 462 |
+
base_embeddings,
|
| 463 |
+
scales,
|
| 464 |
+
identifier_map,
|
| 465 |
+
pool_size,
|
| 466 |
+
pool_size=pool_size,
|
| 467 |
+
)
|
| 468 |
+
embedding_top, embedding_scores, jaccard_scores = self._combine_hybrid(
|
| 469 |
+
candidates,
|
| 470 |
+
query_tokens_list[i],
|
| 471 |
+
tokens_map,
|
| 472 |
+
idf_map,
|
| 473 |
+
default_idf,
|
| 474 |
+
top_k_per_item,
|
| 475 |
+
)
|
| 476 |
+
entry: dict[str, object] = {
|
| 477 |
+
"kind": definitions_kind.get(query_identifier, "function"),
|
| 478 |
+
"embedding": [],
|
| 479 |
+
}
|
| 480 |
+
for identifier, score in embedding_top:
|
| 481 |
+
if ":" not in identifier:
|
| 482 |
+
continue
|
| 483 |
+
relative_path, match_name = identifier.split(":", 1)
|
| 484 |
+
full_path, line = _resolve_definition_location(relative_path, match_name)
|
| 485 |
+
entry["embedding"].append(
|
| 486 |
+
{
|
| 487 |
+
"identifier": identifier,
|
| 488 |
+
"relative_path": relative_path,
|
| 489 |
+
"match_name": match_name,
|
| 490 |
+
"score": score,
|
| 491 |
+
"embedding_score": embedding_scores.get(identifier),
|
| 492 |
+
"jaccard_score": jaccard_scores.get(identifier),
|
| 493 |
+
"full_path": full_path,
|
| 494 |
+
"line": line,
|
| 495 |
+
}
|
| 496 |
+
)
|
| 497 |
+
if use_jaccard:
|
| 498 |
+
entry["jaccard"] = []
|
| 499 |
+
output[query_name] = entry
|
| 500 |
+
|
| 501 |
+
aggregate_scores: dict[str, float] = {}
|
| 502 |
+
for data in output.values():
|
| 503 |
+
for match in data.get("embedding", []):
|
| 504 |
+
relative_path = match.get("relative_path")
|
| 505 |
+
score = match.get("score")
|
| 506 |
+
if relative_path is None or score is None:
|
| 507 |
+
continue
|
| 508 |
+
aggregate_scores[relative_path] = aggregate_scores.get(relative_path, 0.0) + float(score)
|
| 509 |
+
|
| 510 |
+
overall = sorted(
|
| 511 |
+
(
|
| 512 |
+
{"relative_path": relative_path, "score": score}
|
| 513 |
+
for relative_path, score in aggregate_scores.items()
|
| 514 |
+
),
|
| 515 |
+
key=lambda item: item["score"],
|
| 516 |
+
reverse=True,
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
return {
|
| 520 |
+
"results": output,
|
| 521 |
+
"overall": overall,
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
def index_status(self) -> dict[str, object]:
|
| 525 |
+
return {
|
| 526 |
+
"requested_granularity": self.requested_granularity,
|
| 527 |
+
"resolved_granularity": self.index_granularity,
|
| 528 |
+
"precision": self.precision,
|
| 529 |
+
"hub_dataset": self.hub_dataset,
|
| 530 |
+
"index_dir": str(self.index_dir) if self.index_dir else None,
|
| 531 |
+
"index_origin": self.index_origin,
|
| 532 |
+
"missing_files": list(self.missing_files),
|
| 533 |
+
"embedding_model": EMBEDDING_MODEL,
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
def get_default_hub_dataset() -> str:
|
| 538 |
+
return os.getenv("HUB_DATASET", HUB_DATASET_DEFAULT)
|
app/graph.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
@dataclass
|
| 6 |
+
class Graph:
|
| 7 |
+
nodes: list[dict[str, str]]
|
| 8 |
+
edges: list[dict[str, str]]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _call_target_name(node: ast.AST) -> str | None:
|
| 12 |
+
if isinstance(node, ast.Name):
|
| 13 |
+
return node.id
|
| 14 |
+
if isinstance(node, ast.Attribute) and isinstance(node.attr, str):
|
| 15 |
+
return node.attr
|
| 16 |
+
return None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def build_graph(code: str) -> Graph:
|
| 20 |
+
tree = ast.parse(code)
|
| 21 |
+
functions = {}
|
| 22 |
+
classes: dict[str, list[str]] = {}
|
| 23 |
+
|
| 24 |
+
for node in tree.body:
|
| 25 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 26 |
+
functions[node.name] = node
|
| 27 |
+
elif isinstance(node, ast.ClassDef):
|
| 28 |
+
method_names = []
|
| 29 |
+
for child in node.body:
|
| 30 |
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 31 |
+
method_names.append(child.name)
|
| 32 |
+
classes[node.name] = method_names
|
| 33 |
+
|
| 34 |
+
nodes: list[dict[str, str]] = []
|
| 35 |
+
edges: list[dict[str, str]] = []
|
| 36 |
+
|
| 37 |
+
for class_name, method_names in classes.items():
|
| 38 |
+
nodes.append({"id": class_name, "label": class_name, "type": "class"})
|
| 39 |
+
for method_name in method_names:
|
| 40 |
+
method_id = f"{class_name}.{method_name}"
|
| 41 |
+
nodes.append({"id": method_id, "label": method_name, "type": "method"})
|
| 42 |
+
edges.append({"source": class_name, "target": method_id, "type": "contains"})
|
| 43 |
+
|
| 44 |
+
for func_name in functions:
|
| 45 |
+
nodes.append({"id": func_name, "label": func_name, "type": "function"})
|
| 46 |
+
|
| 47 |
+
known_nodes = {node["id"] for node in nodes}
|
| 48 |
+
call_edges = set()
|
| 49 |
+
|
| 50 |
+
def add_call_edge(source: str, target: str) -> None:
|
| 51 |
+
if source == target:
|
| 52 |
+
return
|
| 53 |
+
if target not in known_nodes:
|
| 54 |
+
return
|
| 55 |
+
call_edges.add((source, target))
|
| 56 |
+
|
| 57 |
+
for func_name, func_node in functions.items():
|
| 58 |
+
for call in [n for n in ast.walk(func_node) if isinstance(n, ast.Call)]:
|
| 59 |
+
target = _call_target_name(call.func)
|
| 60 |
+
if target is None:
|
| 61 |
+
continue
|
| 62 |
+
if target in functions:
|
| 63 |
+
add_call_edge(func_name, target)
|
| 64 |
+
|
| 65 |
+
for class_name, method_names in classes.items():
|
| 66 |
+
for node in tree.body:
|
| 67 |
+
if isinstance(node, ast.ClassDef) and node.name == class_name:
|
| 68 |
+
for child in node.body:
|
| 69 |
+
if not isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 70 |
+
continue
|
| 71 |
+
source_id = f"{class_name}.{child.name}"
|
| 72 |
+
for call in [n for n in ast.walk(child) if isinstance(n, ast.Call)]:
|
| 73 |
+
target = call.func
|
| 74 |
+
if isinstance(target, ast.Name):
|
| 75 |
+
if target.id in functions:
|
| 76 |
+
add_call_edge(source_id, target.id)
|
| 77 |
+
continue
|
| 78 |
+
if isinstance(target, ast.Attribute):
|
| 79 |
+
if isinstance(target.value, ast.Name):
|
| 80 |
+
if target.value.id == "self":
|
| 81 |
+
target_id = f"{class_name}.{target.attr}"
|
| 82 |
+
add_call_edge(source_id, target_id)
|
| 83 |
+
elif target.value.id in classes:
|
| 84 |
+
target_id = f"{target.value.id}.{target.attr}"
|
| 85 |
+
add_call_edge(source_id, target_id)
|
| 86 |
+
|
| 87 |
+
for source, target in sorted(call_edges):
|
| 88 |
+
edges.append({"source": source, "target": target, "type": "calls"})
|
| 89 |
+
|
| 90 |
+
return Graph(nodes=nodes, edges=edges)
|
app/main.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
|
| 3 |
+
from fastapi import FastAPI, HTTPException
|
| 4 |
+
from fastapi.responses import FileResponse
|
| 5 |
+
from fastapi.staticfiles import StaticFiles
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
|
| 8 |
+
from app.detector import CodeSimilarityAnalyzer, get_default_hub_dataset
|
| 9 |
+
from app.graph import build_graph
|
| 10 |
+
|
| 11 |
+
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 12 |
+
STATIC_DIR = BASE_DIR / "static"
|
| 13 |
+
|
| 14 |
+
app = FastAPI(title="Modular Model Graph")
|
| 15 |
+
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AnalyzeRequest(BaseModel):
|
| 19 |
+
code: str = Field(..., min_length=1)
|
| 20 |
+
top_k: int = Field(default=5, ge=1, le=25)
|
| 21 |
+
use_jaccard: bool = False
|
| 22 |
+
granularity: str = "method"
|
| 23 |
+
precision: str = "float32"
|
| 24 |
+
hub_dataset: str | None = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
_ANALYZERS: dict[tuple[str, str, str], CodeSimilarityAnalyzer] = {}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _get_analyzer(precision: str, granularity: str, hub_dataset: str) -> CodeSimilarityAnalyzer:
|
| 31 |
+
key = (precision, granularity, hub_dataset)
|
| 32 |
+
if key in _ANALYZERS:
|
| 33 |
+
return _ANALYZERS[key]
|
| 34 |
+
analyzer = CodeSimilarityAnalyzer(
|
| 35 |
+
hub_dataset=hub_dataset,
|
| 36 |
+
precision=precision,
|
| 37 |
+
granularity=granularity,
|
| 38 |
+
)
|
| 39 |
+
_ANALYZERS[key] = analyzer
|
| 40 |
+
return analyzer
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@app.get("/")
|
| 44 |
+
async def index() -> FileResponse:
|
| 45 |
+
return FileResponse(STATIC_DIR / "index.html")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@app.post("/api/analyze")
|
| 49 |
+
async def analyze(request: AnalyzeRequest) -> dict:
|
| 50 |
+
hub_dataset = request.hub_dataset or get_default_hub_dataset()
|
| 51 |
+
if request.granularity not in ("method", "definition"):
|
| 52 |
+
raise HTTPException(status_code=400, detail="granularity must be 'method' or 'definition'")
|
| 53 |
+
if request.precision not in ("float32", "int8"):
|
| 54 |
+
raise HTTPException(status_code=400, detail="precision must be 'float32' or 'int8'")
|
| 55 |
+
analyzer = _get_analyzer(request.precision, request.granularity, hub_dataset)
|
| 56 |
+
try:
|
| 57 |
+
graph = build_graph(request.code)
|
| 58 |
+
except SyntaxError as exc:
|
| 59 |
+
raise HTTPException(status_code=400, detail=f"Syntax error: {exc.msg} at line {exc.lineno}") from exc
|
| 60 |
+
results = analyzer.analyze_code(
|
| 61 |
+
request.code,
|
| 62 |
+
top_k_per_item=request.top_k,
|
| 63 |
+
use_jaccard=request.use_jaccard,
|
| 64 |
+
)
|
| 65 |
+
return {
|
| 66 |
+
"graph": {"nodes": graph.nodes, "edges": graph.edges},
|
| 67 |
+
"results": results["results"],
|
| 68 |
+
"overall": results["overall"],
|
| 69 |
+
"index_info": analyzer.index_status(),
|
| 70 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.115.0
|
| 2 |
+
uvicorn==0.30.6
|
| 3 |
+
sentence-transformers
|
| 4 |
+
transformers
|
| 5 |
+
huggingface_hub==0.24.6
|
| 6 |
+
safetensors==0.4.5
|
| 7 |
+
numpy==1.26.4
|
static/app.js
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const analyzeBtn = document.getElementById("analyzeBtn");
|
| 2 |
+
const codeInput = document.getElementById("codeInput");
|
| 3 |
+
const statusEl = document.getElementById("status");
|
| 4 |
+
const indexInfoEl = document.getElementById("indexInfo");
|
| 5 |
+
const graphEl = document.getElementById("graph");
|
| 6 |
+
const matchesEl = document.getElementById("matches");
|
| 7 |
+
const overallEl = document.getElementById("overall");
|
| 8 |
+
|
| 9 |
+
function setStatus(message) {
|
| 10 |
+
statusEl.textContent = message;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
function renderIndexInfo(info) {
|
| 14 |
+
if (!indexInfoEl) return;
|
| 15 |
+
if (!info) {
|
| 16 |
+
indexInfoEl.textContent = "";
|
| 17 |
+
return;
|
| 18 |
+
}
|
| 19 |
+
const requested = info.requested_granularity || "method";
|
| 20 |
+
const resolved = info.resolved_granularity || requested;
|
| 21 |
+
const origin = info.index_origin;
|
| 22 |
+
const dir = info.index_dir;
|
| 23 |
+
const pieces = [`Using ${resolved} index`];
|
| 24 |
+
if (requested !== resolved) {
|
| 25 |
+
pieces.push(`(fallback from ${requested})`);
|
| 26 |
+
}
|
| 27 |
+
if (origin === "hub") {
|
| 28 |
+
pieces.push("from Hub");
|
| 29 |
+
} else if (origin) {
|
| 30 |
+
pieces.push(`from ${origin}`);
|
| 31 |
+
}
|
| 32 |
+
if (dir) {
|
| 33 |
+
pieces.push(`@ ${dir}`);
|
| 34 |
+
}
|
| 35 |
+
indexInfoEl.textContent = pieces.join(" ");
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
function renderOverall(overall) {
|
| 39 |
+
overallEl.innerHTML = "";
|
| 40 |
+
if (!overall || overall.length === 0) {
|
| 41 |
+
overallEl.textContent = "No aggregate matches yet.";
|
| 42 |
+
return;
|
| 43 |
+
}
|
| 44 |
+
const slice = overall.slice(0, 10);
|
| 45 |
+
for (const entry of slice) {
|
| 46 |
+
const div = document.createElement("div");
|
| 47 |
+
div.className = "overall-item";
|
| 48 |
+
div.textContent = `${entry.relative_path} (${entry.score.toFixed(4)})`;
|
| 49 |
+
overallEl.appendChild(div);
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
function renderMatches(results) {
|
| 54 |
+
matchesEl.innerHTML = "";
|
| 55 |
+
const keys = Object.keys(results);
|
| 56 |
+
if (keys.length === 0) {
|
| 57 |
+
matchesEl.textContent = "No matches returned.";
|
| 58 |
+
return;
|
| 59 |
+
}
|
| 60 |
+
for (const symbol of keys) {
|
| 61 |
+
const card = document.createElement("div");
|
| 62 |
+
card.className = "match-card";
|
| 63 |
+
const title = document.createElement("h3");
|
| 64 |
+
title.textContent = symbol;
|
| 65 |
+
card.appendChild(title);
|
| 66 |
+
const list = document.createElement("div");
|
| 67 |
+
list.className = "match-list";
|
| 68 |
+
const matches = results[symbol].embedding || [];
|
| 69 |
+
for (const match of matches) {
|
| 70 |
+
const row = document.createElement("div");
|
| 71 |
+
row.className = "match-row";
|
| 72 |
+
const left = document.createElement("span");
|
| 73 |
+
left.textContent = `${match.match_name} (${match.score.toFixed(4)})`;
|
| 74 |
+
const right = document.createElement("span");
|
| 75 |
+
right.textContent = match.relative_path;
|
| 76 |
+
row.appendChild(left);
|
| 77 |
+
row.appendChild(right);
|
| 78 |
+
list.appendChild(row);
|
| 79 |
+
}
|
| 80 |
+
card.appendChild(list);
|
| 81 |
+
matchesEl.appendChild(card);
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
function renderGraph(graph) {
|
| 86 |
+
graphEl.innerHTML = "";
|
| 87 |
+
const width = graphEl.clientWidth;
|
| 88 |
+
const height = graphEl.clientHeight;
|
| 89 |
+
const svg = d3
|
| 90 |
+
.select(graphEl)
|
| 91 |
+
.append("svg")
|
| 92 |
+
.attr("width", width)
|
| 93 |
+
.attr("height", height);
|
| 94 |
+
|
| 95 |
+
const nodes = graph.nodes.map((node) => ({ ...node }));
|
| 96 |
+
const links = graph.edges.map((edge) => ({ ...edge }));
|
| 97 |
+
|
| 98 |
+
const color = (type) => {
|
| 99 |
+
if (type === "class") return "#d6572b";
|
| 100 |
+
if (type === "method") return "#2b6fd6";
|
| 101 |
+
if (type === "function") return "#1b8d57";
|
| 102 |
+
return "#666";
|
| 103 |
+
};
|
| 104 |
+
|
| 105 |
+
const simulation = d3
|
| 106 |
+
.forceSimulation(nodes)
|
| 107 |
+
.force("link", d3.forceLink(links).id((d) => d.id).distance(80))
|
| 108 |
+
.force("charge", d3.forceManyBody().strength(-220))
|
| 109 |
+
.force("center", d3.forceCenter(width / 2, height / 2));
|
| 110 |
+
|
| 111 |
+
const link = svg
|
| 112 |
+
.append("g")
|
| 113 |
+
.attr("stroke", "#333")
|
| 114 |
+
.attr("stroke-opacity", 0.4)
|
| 115 |
+
.selectAll("line")
|
| 116 |
+
.data(links)
|
| 117 |
+
.join("line")
|
| 118 |
+
.attr("stroke-width", (d) => (d.type === "contains" ? 1.5 : 1));
|
| 119 |
+
|
| 120 |
+
const node = svg
|
| 121 |
+
.append("g")
|
| 122 |
+
.attr("stroke", "#fff")
|
| 123 |
+
.attr("stroke-width", 1.5)
|
| 124 |
+
.selectAll("circle")
|
| 125 |
+
.data(nodes)
|
| 126 |
+
.join("circle")
|
| 127 |
+
.attr("r", (d) => (d.type === "class" ? 9 : 6))
|
| 128 |
+
.attr("fill", (d) => color(d.type))
|
| 129 |
+
.call(drag(simulation));
|
| 130 |
+
|
| 131 |
+
const labels = svg
|
| 132 |
+
.append("g")
|
| 133 |
+
.selectAll("text")
|
| 134 |
+
.data(nodes)
|
| 135 |
+
.join("text")
|
| 136 |
+
.text((d) => d.label)
|
| 137 |
+
.attr("font-size", 11)
|
| 138 |
+
.attr("fill", "#2b1e13")
|
| 139 |
+
.attr("dx", 12)
|
| 140 |
+
.attr("dy", 3);
|
| 141 |
+
|
| 142 |
+
node.append("title").text((d) => d.id);
|
| 143 |
+
|
| 144 |
+
simulation.on("tick", () => {
|
| 145 |
+
link
|
| 146 |
+
.attr("x1", (d) => d.source.x)
|
| 147 |
+
.attr("y1", (d) => d.source.y)
|
| 148 |
+
.attr("x2", (d) => d.target.x)
|
| 149 |
+
.attr("y2", (d) => d.target.y);
|
| 150 |
+
|
| 151 |
+
node.attr("cx", (d) => d.x).attr("cy", (d) => d.y);
|
| 152 |
+
|
| 153 |
+
labels.attr("x", (d) => d.x).attr("y", (d) => d.y);
|
| 154 |
+
});
|
| 155 |
+
|
| 156 |
+
function drag(sim) {
|
| 157 |
+
function dragstarted(event, d) {
|
| 158 |
+
if (!event.active) sim.alphaTarget(0.3).restart();
|
| 159 |
+
d.fx = d.x;
|
| 160 |
+
d.fy = d.y;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
function dragged(event, d) {
|
| 164 |
+
d.fx = event.x;
|
| 165 |
+
d.fy = event.y;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
function dragended(event, d) {
|
| 169 |
+
if (!event.active) sim.alphaTarget(0);
|
| 170 |
+
d.fx = null;
|
| 171 |
+
d.fy = null;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
return d3.drag().on("start", dragstarted).on("drag", dragged).on("end", dragended);
|
| 175 |
+
}
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
analyzeBtn.addEventListener("click", async () => {
|
| 179 |
+
const code = codeInput.value.trim();
|
| 180 |
+
if (!code) {
|
| 181 |
+
setStatus("Paste some code first.");
|
| 182 |
+
return;
|
| 183 |
+
}
|
| 184 |
+
renderIndexInfo(null);
|
| 185 |
+
setStatus("Analyzing... this can take a bit on first run.");
|
| 186 |
+
analyzeBtn.disabled = true;
|
| 187 |
+
try {
|
| 188 |
+
const payload = {
|
| 189 |
+
code,
|
| 190 |
+
top_k: Number(document.getElementById("topK").value || 5),
|
| 191 |
+
granularity: document.getElementById("granularity").value,
|
| 192 |
+
use_jaccard: document.getElementById("useJaccard").checked,
|
| 193 |
+
precision: "float32",
|
| 194 |
+
};
|
| 195 |
+
const response = await fetch("/api/analyze", {
|
| 196 |
+
method: "POST",
|
| 197 |
+
headers: { "Content-Type": "application/json" },
|
| 198 |
+
body: JSON.stringify(payload),
|
| 199 |
+
});
|
| 200 |
+
if (!response.ok) {
|
| 201 |
+
const detail = await response.text();
|
| 202 |
+
throw new Error(detail || "Request failed");
|
| 203 |
+
}
|
| 204 |
+
const data = await response.json();
|
| 205 |
+
renderGraph(data.graph);
|
| 206 |
+
renderOverall(data.overall);
|
| 207 |
+
renderMatches(data.results);
|
| 208 |
+
renderIndexInfo(data.index_info);
|
| 209 |
+
setStatus("Done.");
|
| 210 |
+
} catch (error) {
|
| 211 |
+
setStatus(`Error: ${error.message || error}`);
|
| 212 |
+
} finally {
|
| 213 |
+
analyzeBtn.disabled = false;
|
| 214 |
+
}
|
| 215 |
+
});
|
static/index.html
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 6 |
+
<title>Modular Model Graph</title>
|
| 7 |
+
<link rel="stylesheet" href="/static/styles.css" />
|
| 8 |
+
<script src="https://d3js.org/d3.v7.min.js"></script>
|
| 9 |
+
</head>
|
| 10 |
+
<body>
|
| 11 |
+
<div class="page">
|
| 12 |
+
<header class="hero">
|
| 13 |
+
<div>
|
| 14 |
+
<p class="eyebrow">Transformers similarity explorer</p>
|
| 15 |
+
<h1>Modular Model Graph</h1>
|
| 16 |
+
<p class="subhead">
|
| 17 |
+
Paste a modeling file, visualize its structure, and compare against Transformers models.
|
| 18 |
+
</p>
|
| 19 |
+
</div>
|
| 20 |
+
</header>
|
| 21 |
+
|
| 22 |
+
<section class="panel">
|
| 23 |
+
<div class="panel-header">
|
| 24 |
+
<h2>Input</h2>
|
| 25 |
+
<div class="controls">
|
| 26 |
+
<label>
|
| 27 |
+
Top K
|
| 28 |
+
<input id="topK" type="number" value="5" min="1" max="25" />
|
| 29 |
+
</label>
|
| 30 |
+
<label>
|
| 31 |
+
Granularity
|
| 32 |
+
<select id="granularity">
|
| 33 |
+
<option value="method" selected>method</option>
|
| 34 |
+
<option value="definition">definition</option>
|
| 35 |
+
</select>
|
| 36 |
+
</label>
|
| 37 |
+
<label class="checkbox">
|
| 38 |
+
<input id="useJaccard" type="checkbox" />
|
| 39 |
+
Use Jaccard
|
| 40 |
+
</label>
|
| 41 |
+
<button id="analyzeBtn">Analyze</button>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
+
<textarea id="codeInput" placeholder="Paste modeling file code here..."></textarea>
|
| 45 |
+
<p id="status" class="status"></p>
|
| 46 |
+
<p id="indexInfo" class="status"></p>
|
| 47 |
+
</section>
|
| 48 |
+
|
| 49 |
+
<section class="grid">
|
| 50 |
+
<div class="panel">
|
| 51 |
+
<div class="panel-header">
|
| 52 |
+
<h2>Graph</h2>
|
| 53 |
+
<div class="legend">
|
| 54 |
+
<span class="dot class">Class</span>
|
| 55 |
+
<span class="dot method">Method</span>
|
| 56 |
+
<span class="dot function">Function</span>
|
| 57 |
+
<span class="dot call">Call edge</span>
|
| 58 |
+
</div>
|
| 59 |
+
</div>
|
| 60 |
+
<div id="graph" class="graph"></div>
|
| 61 |
+
</div>
|
| 62 |
+
|
| 63 |
+
<div class="panel">
|
| 64 |
+
<div class="panel-header">
|
| 65 |
+
<h2>Closest Models</h2>
|
| 66 |
+
</div>
|
| 67 |
+
<div id="overall" class="overall"></div>
|
| 68 |
+
</div>
|
| 69 |
+
</section>
|
| 70 |
+
|
| 71 |
+
<section class="panel">
|
| 72 |
+
<div class="panel-header">
|
| 73 |
+
<h2>Matches by Symbol</h2>
|
| 74 |
+
</div>
|
| 75 |
+
<div id="matches" class="matches"></div>
|
| 76 |
+
</section>
|
| 77 |
+
</div>
|
| 78 |
+
|
| 79 |
+
<script src="/static/app.js"></script>
|
| 80 |
+
</body>
|
| 81 |
+
</html>
|
static/styles.css
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import url("https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;600;700&display=swap");
|
| 2 |
+
|
| 3 |
+
:root {
|
| 4 |
+
--bg: #f6f0e6;
|
| 5 |
+
--panel: #fff7ee;
|
| 6 |
+
--ink: #1b1b1b;
|
| 7 |
+
--muted: #6b5f55;
|
| 8 |
+
--accent: #d6572b;
|
| 9 |
+
--accent-2: #2b6fd6;
|
| 10 |
+
--accent-3: #1b8d57;
|
| 11 |
+
--shadow: rgba(27, 27, 27, 0.1);
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
* {
|
| 15 |
+
box-sizing: border-box;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
body {
|
| 19 |
+
margin: 0;
|
| 20 |
+
font-family: "Space Grotesk", system-ui, sans-serif;
|
| 21 |
+
color: var(--ink);
|
| 22 |
+
background: radial-gradient(circle at 20% 20%, #ffe4c7 0%, transparent 55%),
|
| 23 |
+
radial-gradient(circle at 85% 15%, #f5d2e8 0%, transparent 40%),
|
| 24 |
+
radial-gradient(circle at 70% 80%, #d8f0e2 0%, transparent 45%),
|
| 25 |
+
var(--bg);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.page {
|
| 29 |
+
max-width: 1200px;
|
| 30 |
+
margin: 0 auto;
|
| 31 |
+
padding: 32px 24px 64px;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.hero {
|
| 35 |
+
display: flex;
|
| 36 |
+
justify-content: space-between;
|
| 37 |
+
align-items: flex-end;
|
| 38 |
+
gap: 24px;
|
| 39 |
+
margin-bottom: 24px;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.eyebrow {
|
| 43 |
+
text-transform: uppercase;
|
| 44 |
+
letter-spacing: 0.12em;
|
| 45 |
+
font-size: 12px;
|
| 46 |
+
margin: 0 0 8px;
|
| 47 |
+
color: var(--muted);
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
h1 {
|
| 51 |
+
font-size: 40px;
|
| 52 |
+
margin: 0 0 12px;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.subhead {
|
| 56 |
+
font-size: 16px;
|
| 57 |
+
max-width: 640px;
|
| 58 |
+
margin: 0;
|
| 59 |
+
color: var(--muted);
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.panel {
|
| 63 |
+
background: var(--panel);
|
| 64 |
+
border-radius: 20px;
|
| 65 |
+
padding: 20px;
|
| 66 |
+
box-shadow: 0 14px 30px var(--shadow);
|
| 67 |
+
margin-bottom: 24px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.panel-header {
|
| 71 |
+
display: flex;
|
| 72 |
+
justify-content: space-between;
|
| 73 |
+
align-items: center;
|
| 74 |
+
flex-wrap: wrap;
|
| 75 |
+
gap: 12px;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
h2 {
|
| 79 |
+
margin: 0;
|
| 80 |
+
font-size: 20px;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.controls {
|
| 84 |
+
display: flex;
|
| 85 |
+
align-items: center;
|
| 86 |
+
gap: 12px;
|
| 87 |
+
flex-wrap: wrap;
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
.controls label {
|
| 91 |
+
display: flex;
|
| 92 |
+
gap: 8px;
|
| 93 |
+
align-items: center;
|
| 94 |
+
font-size: 14px;
|
| 95 |
+
color: var(--muted);
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.controls input,
|
| 99 |
+
.controls select {
|
| 100 |
+
border: 1px solid #d9cbbd;
|
| 101 |
+
border-radius: 10px;
|
| 102 |
+
padding: 6px 8px;
|
| 103 |
+
background: #fff;
|
| 104 |
+
font-size: 14px;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
.controls button {
|
| 108 |
+
background: var(--accent);
|
| 109 |
+
color: #fff;
|
| 110 |
+
border: none;
|
| 111 |
+
border-radius: 12px;
|
| 112 |
+
padding: 8px 16px;
|
| 113 |
+
font-weight: 600;
|
| 114 |
+
cursor: pointer;
|
| 115 |
+
transition: transform 0.2s ease;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.controls button:hover {
|
| 119 |
+
transform: translateY(-1px);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
textarea {
|
| 123 |
+
width: 100%;
|
| 124 |
+
min-height: 240px;
|
| 125 |
+
margin-top: 16px;
|
| 126 |
+
border-radius: 16px;
|
| 127 |
+
border: 1px solid #d9cbbd;
|
| 128 |
+
padding: 16px;
|
| 129 |
+
font-family: "Space Grotesk", monospace;
|
| 130 |
+
background: #fff;
|
| 131 |
+
resize: vertical;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.status {
|
| 135 |
+
margin-top: 8px;
|
| 136 |
+
color: var(--muted);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
.grid {
|
| 140 |
+
display: grid;
|
| 141 |
+
grid-template-columns: minmax(0, 2fr) minmax(0, 1fr);
|
| 142 |
+
gap: 24px;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.graph {
|
| 146 |
+
width: 100%;
|
| 147 |
+
height: 480px;
|
| 148 |
+
border-radius: 18px;
|
| 149 |
+
background: #fff;
|
| 150 |
+
border: 1px solid #e3d6c8;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.legend {
|
| 154 |
+
display: flex;
|
| 155 |
+
gap: 10px;
|
| 156 |
+
align-items: center;
|
| 157 |
+
flex-wrap: wrap;
|
| 158 |
+
font-size: 12px;
|
| 159 |
+
color: var(--muted);
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
.dot {
|
| 163 |
+
display: inline-flex;
|
| 164 |
+
align-items: center;
|
| 165 |
+
gap: 6px;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.dot::before {
|
| 169 |
+
content: "";
|
| 170 |
+
width: 10px;
|
| 171 |
+
height: 10px;
|
| 172 |
+
border-radius: 50%;
|
| 173 |
+
background: var(--muted);
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
.dot.class::before {
|
| 177 |
+
background: var(--accent);
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.dot.method::before {
|
| 181 |
+
background: var(--accent-2);
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
.dot.function::before {
|
| 185 |
+
background: var(--accent-3);
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
.dot.call::before {
|
| 189 |
+
background: #333;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
.overall {
|
| 193 |
+
display: flex;
|
| 194 |
+
flex-direction: column;
|
| 195 |
+
gap: 12px;
|
| 196 |
+
margin-top: 12px;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.overall-item {
|
| 200 |
+
padding: 10px 12px;
|
| 201 |
+
background: #fff;
|
| 202 |
+
border-radius: 12px;
|
| 203 |
+
border: 1px solid #eadccd;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.matches {
|
| 207 |
+
display: grid;
|
| 208 |
+
gap: 16px;
|
| 209 |
+
margin-top: 16px;
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
.match-card {
|
| 213 |
+
background: #fff;
|
| 214 |
+
border-radius: 16px;
|
| 215 |
+
border: 1px solid #eadccd;
|
| 216 |
+
padding: 16px;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
.match-card h3 {
|
| 220 |
+
margin: 0 0 8px;
|
| 221 |
+
font-size: 16px;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.match-list {
|
| 225 |
+
display: grid;
|
| 226 |
+
gap: 6px;
|
| 227 |
+
font-size: 14px;
|
| 228 |
+
color: var(--muted);
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
.match-row {
|
| 232 |
+
display: flex;
|
| 233 |
+
justify-content: space-between;
|
| 234 |
+
gap: 12px;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
@media (max-width: 960px) {
|
| 238 |
+
.grid {
|
| 239 |
+
grid-template-columns: 1fr;
|
| 240 |
+
}
|
| 241 |
+
}
|