Molbap HF Staff commited on
Commit
a45c10f
verified
1 Parent(s): d6e311d

Update app

Browse files
app/__pycache__/detector.cpython-312.pyc CHANGED
Binary files a/app/__pycache__/detector.cpython-312.pyc and b/app/__pycache__/detector.cpython-312.pyc differ
 
app/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/app/__pycache__/main.cpython-312.pyc and b/app/__pycache__/main.cpython-312.pyc differ
 
app/detector.py CHANGED
@@ -71,6 +71,17 @@ def _infer_model_hint(definitions_kind: dict[str, str]) -> str | None:
71
  return max(counts.items(), key=lambda item: item[1])[0]
72
 
73
 
 
 
 
 
 
 
 
 
 
 
 
74
  def _normalize_source_path(path: str | None) -> str | None:
75
  if not path:
76
  return None
@@ -520,19 +531,25 @@ class CodeSimilarityAnalyzer:
520
 
521
  by_class_out: dict[str, list[dict[str, object]]] = {}
522
  for qcls, cand_map in by_class.items():
 
 
 
 
523
  rows = []
524
  for _, slot in cand_map.items():
525
  scores = sorted(slot["scores"], reverse=True)
526
- top = scores[:3]
527
- score = float(sum(top) / max(1, len(top)))
 
528
  contributors = sorted(slot["contributors"], key=lambda x: float(x["score"]), reverse=True)[:5]
529
  rows.append(
530
  {
531
  "relative_path": slot["relative_path"],
532
  "class_name": slot["class_name"],
533
  "identifier": f"{slot['relative_path']}:{slot['class_name']}",
534
- "score": score,
535
- "coverage": len(scores),
 
536
  "top_contributors": contributors,
537
  }
538
  )
 
71
  return max(counts.items(), key=lambda item: item[1])[0]
72
 
73
 
74
+ def _softmax_weighted(scores: list[float]) -> float:
75
+ if not scores:
76
+ return 0.0
77
+ max_score = max(scores)
78
+ weights = [float(np.exp(score - max_score)) for score in scores]
79
+ total = sum(weights)
80
+ if total <= 0:
81
+ return float(sum(scores)) / float(len(scores))
82
+ return float(sum(score * weight for score, weight in zip(scores, weights)) / total)
83
+
84
+
85
  def _normalize_source_path(path: str | None) -> str | None:
86
  if not path:
87
  return None
 
531
 
532
  by_class_out: dict[str, list[dict[str, object]]] = {}
533
  for qcls, cand_map in by_class.items():
534
+ q_method_count = len(
535
+ [key for key, kind in definitions_kind.items() if kind == "method" and key.startswith(f"{qcls}.")]
536
+ )
537
+ q_method_count = max(1, q_method_count)
538
  rows = []
539
  for _, slot in cand_map.items():
540
  scores = sorted(slot["scores"], reverse=True)
541
+ base_score = _softmax_weighted(scores)
542
+ coverage_count = len(scores)
543
+ coverage_ratio = coverage_count / float(q_method_count)
544
  contributors = sorted(slot["contributors"], key=lambda x: float(x["score"]), reverse=True)[:5]
545
  rows.append(
546
  {
547
  "relative_path": slot["relative_path"],
548
  "class_name": slot["class_name"],
549
  "identifier": f"{slot['relative_path']}:{slot['class_name']}",
550
+ "score": base_score,
551
+ "coverage": coverage_count,
552
+ "coverage_pct": coverage_ratio,
553
  "top_contributors": contributors,
554
  }
555
  )
app/main.py CHANGED
@@ -85,12 +85,40 @@ def _summarize_ast(node: ast.AST) -> dict[str, list[dict[str, int]]]:
85
  return {"node_counts": node_counts, "calls": call_counts}
86
 
87
 
88
- def _extract_ast(source: str, symbol: str) -> tuple[str | None, dict[str, list[dict[str, int]]] | None]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  tree = ast.parse(source)
90
  node = _find_definition(tree, symbol)
91
  if node is None:
92
  return None, None
93
- return ast.dump(node, include_attributes=False, indent=2), _summarize_ast(node)
 
 
 
 
 
94
 
95
 
96
  @app.get("/")
 
85
  return {"node_counts": node_counts, "calls": call_counts}
86
 
87
 
88
+ def _strip_node_docstring(node: ast.AST) -> None:
89
+ if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef)):
90
+ return
91
+ if not node.body:
92
+ return
93
+ first = node.body[0]
94
+ if isinstance(first, ast.Expr) and isinstance(getattr(first, "value", None), ast.Constant):
95
+ if isinstance(first.value.value, str):
96
+ node.body.pop(0)
97
+
98
+
99
+ def _get_structural_flow(node: ast.AST) -> str:
100
+ flow: list[str] = []
101
+ for child in ast.walk(node):
102
+ if isinstance(child, ast.Call):
103
+ name = _call_name(child.func)
104
+ if name:
105
+ flow.append(name)
106
+ elif isinstance(child, (ast.If, ast.While, ast.For)):
107
+ flow.append(f"Control({type(child).__name__})")
108
+ return " -> ".join(flow[:15])
109
+
110
+
111
+ def _extract_ast(source: str, symbol: str) -> tuple[str | None, dict[str, object] | None]:
112
  tree = ast.parse(source)
113
  node = _find_definition(tree, symbol)
114
  if node is None:
115
  return None, None
116
+ _strip_node_docstring(node)
117
+ try:
118
+ code = ast.unparse(node)
119
+ except Exception:
120
+ code = None
121
+ return code, {"summary": _summarize_ast(node), "flow": _get_structural_flow(node)}
122
 
123
 
124
  @app.get("/")
static/app.js CHANGED
@@ -41,11 +41,13 @@ function renderIndexInfo(info) {
41
 
42
  function formatSummary(summary) {
43
  if (!summary) return "No structural summary.";
44
- const nodes = (summary.node_counts || [])
 
45
  .map((item) => `${item.name}(${item.count})`)
46
  .join(", ");
47
- const calls = (summary.calls || []).map((item) => `${item.name}(${item.count})`).join(", ");
48
  const parts = [];
 
49
  if (nodes) parts.push(`Nodes: ${nodes}`);
50
  if (calls) parts.push(`Calls: ${calls}`);
51
  return parts.join(" 路 ") || "No structural summary.";
@@ -101,7 +103,8 @@ function renderByClass(byClass) {
101
  const line = document.createElement("div");
102
  line.className = "match-row";
103
  const left = document.createElement("span");
104
- left.textContent = `${row.class_name} 路 ${Number(row.score).toFixed(4)} ${row.coverage} defs`;
 
105
  const right = document.createElement("span");
106
  right.textContent = row.relative_path;
107
  line.appendChild(left);
 
41
 
42
  function formatSummary(summary) {
43
  if (!summary) return "No structural summary.";
44
+ const flow = summary.flow ? `Flow: ${summary.flow}` : "";
45
+ const nodes = (summary.summary?.node_counts || [])
46
  .map((item) => `${item.name}(${item.count})`)
47
  .join(", ");
48
+ const calls = (summary.summary?.calls || []).map((item) => `${item.name}(${item.count})`).join(", ");
49
  const parts = [];
50
+ if (flow) parts.push(flow);
51
  if (nodes) parts.push(`Nodes: ${nodes}`);
52
  if (calls) parts.push(`Calls: ${calls}`);
53
  return parts.join(" 路 ") || "No structural summary.";
 
103
  const line = document.createElement("div");
104
  line.className = "match-row";
105
  const left = document.createElement("span");
106
+ const coverage = row.coverage_pct !== undefined ? ` 路 ${(row.coverage_pct * 100).toFixed(0)}%` : "";
107
+ left.textContent = `${row.class_name} 路 ${Number(row.score).toFixed(4)} 路 ${row.coverage} defs${coverage}`;
108
  const right = document.createElement("span");
109
  right.textContent = row.relative_path;
110
  line.appendChild(left);