Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
"""
|
| 4 |
-
Capricode Pro Master - 精准代码视觉感知系统 (
|
| 5 |
-
|
| 6 |
-
经过混合语言压力测试并已修复所有已知逻辑漏洞
|
| 7 |
"""
|
| 8 |
import gradio as gr
|
| 9 |
import time
|
|
@@ -12,7 +11,14 @@ from dataclasses import dataclass
|
|
| 12 |
from typing import Dict, List, Tuple, Any
|
| 13 |
from collections import defaultdict
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
@dataclass
|
| 17 |
class LanguageSignature:
|
| 18 |
patterns: List[Tuple[re.Pattern, float]]
|
|
@@ -21,162 +27,26 @@ class LanguageSignature:
|
|
| 21 |
file_extensions: List[str]
|
| 22 |
|
| 23 |
class PrecisionLanguageDetector:
|
|
|
|
|
|
|
| 24 |
def __init__(self):
|
| 25 |
self.languages = {}
|
| 26 |
self._compile_precision_patterns()
|
| 27 |
-
|
| 28 |
def _compile_precision_patterns(self):
|
| 29 |
-
"""
|
| 30 |
-
self.languages = {
|
| 31 |
-
'python': LanguageSignature(
|
| 32 |
-
patterns=[(re.compile(r'def\s+\w+\s*\([^)]*\)\s*:'), 9.0), (re.compile(r'class\s+\w+\s*\(?[^)]*\)?\s*:'), 8.5), (re.compile(r'import\s+[\w.]+'), 6.0)],
|
| 33 |
-
keywords={'def': 8.0, 'class': 7.0, 'import': 6.0, 'elif': 5.0, 'async': 6.0, 'await': 6.0},
|
| 34 |
-
exclusive_patterns=[(re.compile(r'if __name__ == "__main__":'), 20.0), (re.compile(r'f"[^"]*"'), 12.0)],
|
| 35 |
-
file_extensions=['.py', '.pyw']
|
| 36 |
-
),
|
| 37 |
-
'javascript': LanguageSignature(
|
| 38 |
-
patterns=[(re.compile(r'function\s*.*\s*\{'), 8.0), (re.compile(r'console\.log\s*\('), 7.0), (re.compile(r'(const|let|var)\s+\w+\s*='), 8.0)],
|
| 39 |
-
keywords={'function': 7.0, 'const': 6.0, 'let': 6.0, 'console': 5.0, 'document': 6.0, 'async': 5.0, 'await': 5.0},
|
| 40 |
-
exclusive_patterns=[(re.compile(r'document\.getElementById\s*\('), 15.0)],
|
| 41 |
-
file_extensions=['.js', '.jsx', '.mjs']
|
| 42 |
-
),
|
| 43 |
-
'java': LanguageSignature(
|
| 44 |
-
patterns=[(re.compile(r'public\s+class\s+\w+'), 10.0), (re.compile(r'System\.out\.println\s*\('), 9.0), (re.compile(r'import\s+java\.'), 8.0)],
|
| 45 |
-
keywords={'public': 8.0, 'class': 8.0, 'static': 7.0, 'void': 6.0, 'String': 7.0, 'System': 7.0, 'new': 5.0},
|
| 46 |
-
exclusive_patterns=[(re.compile(r'public\s+static\s+void\s+main\s*\(\s*String\s*\[\]\s*args\s*\)'), 25.0)],
|
| 47 |
-
file_extensions=['.java']
|
| 48 |
-
),
|
| 49 |
-
'cpp': LanguageSignature(
|
| 50 |
-
patterns=[(re.compile(r'#include\s*<[a-zA-Z_]+>'), 10.0), (re.compile(r'using\s+namespace\s+std;'), 9.0), (re.compile(r'std::cout'), 8.0)],
|
| 51 |
-
keywords={'#include': 9.0, 'iostream': 8.0, 'std': 7.0, 'cout': 7.0, 'vector': 6.0, 'int': 5.0, 'main': 6.0},
|
| 52 |
-
exclusive_patterns=[(re.compile(r'#include\s*<iostream>'), 18.0)],
|
| 53 |
-
file_extensions=['.cpp', '.cxx', '.h', '.hpp']
|
| 54 |
-
),
|
| 55 |
-
'html': LanguageSignature(
|
| 56 |
-
patterns=[(re.compile(r'<\s*head\s*>'), 8.0), (re.compile(r'<\s*body\s*>'), 8.0), (re.compile(r'<\s*/\s*\w+\s*>'), 6.0)],
|
| 57 |
-
keywords={'div': 5.0, 'p': 4.0, 'a': 4.0, 'href': 5.0, 'class': 3.0, 'id': 3.0},
|
| 58 |
-
exclusive_patterns=[(re.compile(r'<!DOCTYPE\s+html>', re.IGNORECASE), 100.0)], # 绝对权重
|
| 59 |
-
file_extensions=['.html', '.htm']
|
| 60 |
-
),
|
| 61 |
-
'css': LanguageSignature(
|
| 62 |
-
patterns=[(re.compile(r'[\w\s.#-]+\s*\{'), 8.0), (re.compile(r'[\w-]+\s*:\s*[^;]+;'), 9.0)],
|
| 63 |
-
keywords={'color': 6.0, 'background-color': 7.0, 'margin': 5.0, 'padding': 5.0, 'font-size': 6.0, 'display': 6.0},
|
| 64 |
-
exclusive_patterns=[(re.compile(r'@media\s*\(.+\)\s*\{'), 15.0)],
|
| 65 |
-
file_extensions=['.css']
|
| 66 |
-
),
|
| 67 |
-
'sql': LanguageSignature(
|
| 68 |
-
patterns=[(re.compile(r'(SELECT|CREATE|INSERT|UPDATE|DELETE|FROM|WHERE)', re.IGNORECASE), 10.0)],
|
| 69 |
-
keywords={'SELECT': 9.0, 'FROM': 8.0, 'WHERE': 7.0, 'JOIN': 7.0, 'GROUP BY': 8.0, 'ORDER BY': 7.0, 'CREATE TABLE': 9.0},
|
| 70 |
-
exclusive_patterns=[(re.compile(r'CREATE\s+TABLE', re.IGNORECASE), 18.0)],
|
| 71 |
-
file_extensions=['.sql']
|
| 72 |
-
),
|
| 73 |
-
'csharp': LanguageSignature(
|
| 74 |
-
patterns=[(re.compile(r'using\s+System;'), 10.0), (re.compile(r'namespace\s+\w+'), 9.0)],
|
| 75 |
-
keywords={'namespace': 9.0, 'public': 7.0, 'class': 7.0, 'static': 6.0, 'void': 5.0, 'string': 7.0, 'var': 6.0},
|
| 76 |
-
exclusive_patterns=[(re.compile(r'static\s+void\s+Main\s*\(\s*string\s*\[\]\s*args\s*\)'), 25.0), (re.compile(r'Console\.WriteLine\s*\('), 15.0)],
|
| 77 |
-
file_extensions=['.cs']
|
| 78 |
-
),
|
| 79 |
-
'go': LanguageSignature(
|
| 80 |
-
patterns=[(re.compile(r'package\s+main'), 10.0), (re.compile(r'import\s*\(\s*'), 8.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)],
|
| 81 |
-
keywords={'package': 9.0, 'import': 8.0, 'func': 9.0, 'fmt': 7.0, 'Println': 6.0},
|
| 82 |
-
exclusive_patterns=[(re.compile(r'func\s+main\s*\(\s*\)'), 20.0), (re.compile(r':='), 12.0)],
|
| 83 |
-
file_extensions=['.go']
|
| 84 |
-
),
|
| 85 |
-
'rust': LanguageSignature(
|
| 86 |
-
patterns=[(re.compile(r'fn\s+\w+\s*\([^)]*\)\s*\{'), 10.0), (re.compile(r'use\s+std::'), 9.0), (re.compile(r'let\s+(mut\s+)?\w+'), 8.0)],
|
| 87 |
-
keywords={'fn': 9.0, 'let': 8.0, 'mut': 7.0, 'use': 8.0, 'struct': 7.0},
|
| 88 |
-
exclusive_patterns=[(re.compile(r'fn\s+main\s*\(\s*\)'), 20.0), (re.compile(r'println!\s*\('), 18.0)],
|
| 89 |
-
file_extensions=['.rs']
|
| 90 |
-
),
|
| 91 |
-
'kotlin': LanguageSignature(
|
| 92 |
-
patterns=[(re.compile(r'fun\s+\w+\s*\([^)]*\)'), 10.0), (re.compile(r'val\s+\w+'), 8.0)],
|
| 93 |
-
keywords={'package': 8.0, 'import': 7.0, 'fun': 9.0, 'val': 8.0, 'var': 7.0, 'println': 6.0, 'class': 7.0},
|
| 94 |
-
exclusive_patterns=[(re.compile(r'fun\s+main\s*\(\s*args:\s*Array<String>\s*\)'), 25.0)],
|
| 95 |
-
file_extensions=['.kt', '.kts']
|
| 96 |
-
),
|
| 97 |
-
'ruby': LanguageSignature(
|
| 98 |
-
patterns=[(re.compile(r'def\s+\w+'), 10.0), (re.compile(r'require\s+[\'"]\w+[\'"]'), 8.0), (re.compile(r'^\s*end\s*$'), 7.0)],
|
| 99 |
-
keywords={'def': 9.0, 'end': 8.0, 'require': 7.0, 'puts': 6.0, 'class': 7.0},
|
| 100 |
-
exclusive_patterns=[(re.compile(r'# frozen_string_literal: true'), 15.0), (re.compile(r':\w+'), 10.0)],
|
| 101 |
-
file_extensions=['.rb']
|
| 102 |
-
),
|
| 103 |
-
'swift': LanguageSignature(
|
| 104 |
-
patterns=[(re.compile(r'import\s+(UIKit|Foundation)'), 10.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)],
|
| 105 |
-
keywords={'import': 8.0, 'func': 9.0, 'let': 8.0, 'var': 8.0, 'class': 7.0, 'print': 6.0},
|
| 106 |
-
exclusive_patterns=[(re.compile(r'@IBOutlet'), 18.0), (re.compile(r'import\s+SwiftUI'), 20.0)],
|
| 107 |
-
file_extensions=['.swift']
|
| 108 |
-
),
|
| 109 |
-
'php': LanguageSignature(
|
| 110 |
-
patterns=[(re.compile(r'function\s+\w+\s*\([^)]*\)'), 7.0), (re.compile(r'\$\w+'), 9.0), (re.compile(r'echo\s+'), 6.0)],
|
| 111 |
-
keywords={'function': 7.0, 'echo': 6.0, 'class': 6.0, 'public': 6.0},
|
| 112 |
-
exclusive_patterns=[(re.compile(r'<\?php'), 100.0)], # 绝对权重
|
| 113 |
-
file_extensions=['.php']
|
| 114 |
-
),
|
| 115 |
-
'typescript': LanguageSignature(
|
| 116 |
-
patterns=[(re.compile(r'interface\s+\w+'), 10.0), (re.compile(r':\s*(string|number|boolean|any)'), 9.0)],
|
| 117 |
-
keywords={'interface': 9.0, 'type': 8.0, 'public': 7.0, 'private': 7.0, 'enum': 7.0},
|
| 118 |
-
exclusive_patterns=[(re.compile(r'public\s+constructor\s*\('), 18.0)],
|
| 119 |
-
file_extensions=['.ts', '.tsx']
|
| 120 |
-
),
|
| 121 |
-
'r': LanguageSignature(
|
| 122 |
-
patterns=[(re.compile(r'library\s*\(\w+\)'), 10.0), (re.compile(r'\w+\s*<-\s*'), 9.0)],
|
| 123 |
-
keywords={'library': 9.0, 'function': 7.0, 'if': 5.0, 'else': 5.0},
|
| 124 |
-
exclusive_patterns=[(re.compile(r'install\.packages\s*\('), 20.0), (re.compile(r'<-'), 15.0)],
|
| 125 |
-
file_extensions=['.r']
|
| 126 |
-
),
|
| 127 |
-
'bash': LanguageSignature(
|
| 128 |
-
patterns=[(re.compile(r'if\s+\[.*\]'), 8.0), (re.compile(r'^\s*fi\s*$'), 7.0), (re.compile(r'echo\s+'), 6.0)],
|
| 129 |
-
keywords={'if': 7.0, 'then': 7.0, 'fi': 7.0, 'for': 6.0, 'do': 6.0, 'done': 6.0, 'echo': 6.0},
|
| 130 |
-
exclusive_patterns=[(re.compile(r'#!/bin?/(ba|z|k)?sh'), 100.0)], # 绝对权重
|
| 131 |
-
file_extensions=['.sh']
|
| 132 |
-
),
|
| 133 |
-
'dockerfile': LanguageSignature(
|
| 134 |
-
patterns=[(re.compile(r'^(FROM|RUN|CMD|COPY|ADD|WORKDIR|EXPOSE)\s+', re.MULTILINE), 10.0)],
|
| 135 |
-
keywords={'FROM': 10.0, 'RUN': 9.0, 'CMD': 8.0, 'COPY': 8.0, 'WORKDIR': 7.0, 'EXPOSE': 7.0},
|
| 136 |
-
exclusive_patterns=[(re.compile(r'^FROM\s+'), 20.0)],
|
| 137 |
-
file_extensions=['Dockerfile']
|
| 138 |
-
),
|
| 139 |
-
'yaml': LanguageSignature(
|
| 140 |
-
patterns=[(re.compile(r'^\s*[\w-]+\s*:\s*.*'), 9.0), (re.compile(r'^\s*-\s+'), 8.0)],
|
| 141 |
-
keywords={'version': 6.0, 'services': 7.0, 'steps': 7.0, 'jobs': 7.0, 'name': 5.0, 'image': 6.0},
|
| 142 |
-
exclusive_patterns=[],
|
| 143 |
-
file_extensions=['.yml', '.yaml']
|
| 144 |
-
),
|
| 145 |
-
'xml': LanguageSignature(
|
| 146 |
-
patterns=[(re.compile(r'<(\w+)\s*.*>.*</\1>'), 9.0), (re.compile(r'<\?xml[^>]*\?>'), 10.0)],
|
| 147 |
-
keywords={},
|
| 148 |
-
exclusive_patterns=[(re.compile(r'<\?xml\s+version="1.0"'), 100.0)], # 绝对权重
|
| 149 |
-
file_extensions=['.xml']
|
| 150 |
-
),
|
| 151 |
-
}
|
| 152 |
-
|
| 153 |
def detect_with_line_info(self, code: str) -> Dict[str, Any]:
|
| 154 |
start_time = time.time()
|
| 155 |
if not code or not code.strip(): return self._empty_result()
|
| 156 |
-
code = code.strip()
|
| 157 |
-
lines = code.split('\n')
|
| 158 |
-
scores = defaultdict(float)
|
| 159 |
-
line_evidence = {lang: [] for lang in self.languages}
|
| 160 |
-
exclusive_matches = defaultdict(float)
|
| 161 |
-
|
| 162 |
-
# 1. 独占模式检测
|
| 163 |
for lang, signature in self.languages.items():
|
| 164 |
for pattern, weight in signature.exclusive_patterns:
|
| 165 |
for i, line in enumerate(lines, 1):
|
| 166 |
-
if pattern.search(line):
|
| 167 |
-
exclusive_matches[lang] += weight
|
| 168 |
-
line_evidence[lang].append(f"第{i}行: {line.strip()[:50]}...")
|
| 169 |
-
|
| 170 |
-
# 2. 检查是否有绝对权重匹配
|
| 171 |
if exclusive_matches:
|
| 172 |
best_lang_exclusive = max(exclusive_matches, key=exclusive_matches.get)
|
| 173 |
-
if exclusive_matches[best_lang_exclusive] >= 100.0:
|
| 174 |
-
return self._build_absolute_result(best_lang_exclusive, lines, start_time)
|
| 175 |
-
|
| 176 |
-
# 3. 如果没有绝对权重匹配,则继续进行常规检测
|
| 177 |
for lang, signature in self.languages.items():
|
| 178 |
-
score = exclusive_matches.get(lang, 0)
|
| 179 |
-
lang_evidence = []
|
| 180 |
for pattern, weight in signature.patterns:
|
| 181 |
for i, line in enumerate(lines, 1):
|
| 182 |
if pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (正则): {line.strip()[:50]}...")
|
|
@@ -184,21 +54,10 @@ class PrecisionLanguageDetector:
|
|
| 184 |
keyword_pattern = re.compile(r'\b' + re.escape(keyword) + r'\b', re.IGNORECASE if lang in ['html', 'sql', 'dockerfile'] else 0)
|
| 185 |
for i, line in enumerate(lines, 1):
|
| 186 |
if keyword_pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (关键词): '{keyword}'")
|
| 187 |
-
|
| 188 |
-
if score > 0:
|
| 189 |
-
scores[lang] = score
|
| 190 |
-
line_evidence[lang].extend(list(dict.fromkeys(lang_evidence)))
|
| 191 |
-
|
| 192 |
if not scores: return self._unknown_result(lines, start_time)
|
| 193 |
-
|
| 194 |
-
best_lang = max(scores, key=scores.get)
|
| 195 |
-
best_score = scores[best_lang]
|
| 196 |
-
total_score = sum(scores.values())
|
| 197 |
-
confidence = best_score / total_score if total_score > 0 else 0.0
|
| 198 |
-
processing_time = (time.time() - start_time) * 1000
|
| 199 |
-
|
| 200 |
return {'language': best_lang, 'confidence': round(min(confidence * 1.2, 0.999), 3), 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': line_evidence[best_lang][:15], 'processing_time_ms': round(processing_time, 2), 'code_preview': self._get_code_preview(lines), 'all_scores': {k: round(v, 2) for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(scores, lines, best_lang)}
|
| 201 |
-
|
| 202 |
def _get_code_preview(self, lines: List[str]) -> Dict[str, Any]:
|
| 203 |
if not lines: return {}
|
| 204 |
functions, classes, imports, comments = [], [], [], []
|
|
@@ -209,22 +68,14 @@ class PrecisionLanguageDetector:
|
|
| 209 |
elif re.match(r'(import|from|#include|using|require|use|library|package)\s+', line_clean): imports.append(f"第{i}行: {line_clean}")
|
| 210 |
elif re.match(r'(#|//|--|/\*|<!--)', line_clean): comments.append(f"第{i}行: {line_clean}")
|
| 211 |
return {'total_lines': len(lines), 'functions_count': len(functions), 'classes_count': len(classes), 'imports_count': len(imports), 'comments_count': len(comments), 'code_density': round((len(lines) - len(comments)) / len(lines), 3) if lines else 0, 'sample_functions': functions[:3], 'sample_classes': classes[:2], 'sample_imports': imports[:3]}
|
| 212 |
-
|
| 213 |
-
# ★★★ FIX: This function now accepts the decided primary_language ★★★
|
| 214 |
def _get_detection_stats(self, scores: Dict[str, float], lines: List[str], primary_language: str) -> Dict[str, Any]:
|
| 215 |
if not scores: return {}
|
| 216 |
-
total_score = sum(scores.values())
|
| 217 |
-
max_score = max(scores.values(), default=0)
|
| 218 |
-
quality = '确定性' if max_score >= 100 else '极高' if max_score > 50 else '高' if max_score > 25 else '中'
|
| 219 |
return {'languages_detected': len(scores), 'primary_language': primary_language, 'score_distribution': {lang: round(score/total_score*100, 1) for lang, score in scores.items() if score > 0}, 'total_score': round(total_score, 2), 'detection_quality': quality}
|
| 220 |
-
|
| 221 |
def _empty_result(self) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '代码为空', 'total_lines': 0, 'line_evidence': [], 'processing_time_ms': 0.1, 'code_preview': {}, 'detection_stats': {}, 'all_scores': {}}
|
| 222 |
-
|
| 223 |
def _unknown_result(self, lines: List[str], start_time: float) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '无法识别编程语言', 'total_lines': len(lines), 'line_evidence': [], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_stats': {}, 'all_scores': {}}
|
| 224 |
-
|
| 225 |
def _build_absolute_result(self, lang: str, lines: List[str], start_time: float) -> Dict[str, Any]:
|
| 226 |
-
all_scores = defaultdict(float)
|
| 227 |
-
line_evidence = defaultdict(list)
|
| 228 |
for current_lang, signature in self.languages.items():
|
| 229 |
score = 0
|
| 230 |
for pattern, weight in signature.exclusive_patterns + signature.patterns:
|
|
@@ -239,16 +90,13 @@ class PrecisionLanguageDetector:
|
|
| 239 |
score += weight
|
| 240 |
if current_lang == lang: line_evidence[lang].append(f"第{i}行 (关键词): '{keyword}'")
|
| 241 |
if score > 0: all_scores[current_lang] = score
|
| 242 |
-
|
| 243 |
best_score = all_scores[lang]
|
| 244 |
return {'language': lang, 'confidence': 0.999, 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': list(dict.fromkeys(line_evidence[lang]))[:15], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_method': 'absolute_exclusive_pattern', 'all_scores': {k: round(v, 2) for k, v in sorted(all_scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(all_scores, lines, lang)}
|
| 245 |
|
| 246 |
# 全局实例
|
| 247 |
precision_detector = PrecisionLanguageDetector()
|
| 248 |
-
|
| 249 |
# CSS样式
|
| 250 |
custom_css = ":root{--primary:#6366f1;--primary-dark:#4f46e5;--secondary:#10b981;--accent:#f59e0b;--danger:#ef4444;--bg-primary:#fff;--bg-secondary:#f8fafc;--bg-card:#fff;--border:#e2e8f0;--text-primary:#1e293b;--text-secondary:#64748b;--text-muted:#94a3b8;--shadow:0 1px 3px 0 rgba(0,0,0,.1),0 1px 2px -1px rgba(0,0,0,.1);--shadow-lg:0 10px 15px -3px rgba(0,0,0,.1),0 4px 6px -4px rgba(0,0,0,.1)}.gradio-container{background:linear-gradient(135deg,#f8fafc 0%,#e2e8f0 100%)!important;min-height:100vh;font-family:Inter,-apple-system,BlinkMacSystemFont,sans-serif!important}.pro-card{background:var(--bg-card)!important;border:1px solid var(--border)!important;border-radius:12px!important;box-shadow:var(--shadow)!important;padding:24px!important;margin-bottom:20px!important;transition:all .3s cubic-bezier(.4,0,.2,1)!important}.pro-card:hover{box-shadow:var(--shadow-lg)!important;transform:translateY(-2px)!important}.header-section{text-align:center!important;margin-bottom:40px!important;background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;border-radius:16px!important;padding:40px 32px!important;color:#fff!important}.app-title{font-size:3rem!important;font-weight:800!important;margin-bottom:12px!important;background:linear-gradient(135deg,#fff 0%,#f1f5f9 100%)!important;-webkit-background-clip:text!important;-webkit-text-fill-color:transparent!important;background-clip:text!important}.app-subtitle{font-size:1.25rem!important;font-weight:400!important;opacity:.9!important;margin-bottom:0!important}.stats-grid{display:grid!important;grid-template-columns:repeat(auto-fit,minmax(180px,1fr))!important;gap:16px!important;margin:20px 0!important}.stat-card{background:#fff!important;border-radius:8px!important;padding:16px!important;text-align:center!important;border:1px solid var(--border)!important}.stat-value{font-size:2rem!important;font-weight:700!important;color:var(--primary)!important;margin-bottom:4px!important}.stat-label{font-size:.875rem!important;color:var(--text-secondary)!important;font-weight:500!important}.evidence-panel{background:var(--bg-secondary)!important;border:1px solid var(--border)!important;border-radius:8px!important;padding:16px!important;max-height:300px!important;overflow-y:auto!important;font-family:Monaco,Menlo,Consolas,monospace!important;font-size:.875rem!important}.evidence-item{padding:8px 12px!important;margin:4px 0!important;background:#fff!important;border-radius:6px!important;border-left:4px solid var(--primary)!important}.confidence-high{color:var(--secondary)!important;font-weight:700!important}.confidence-medium{color:var(--accent)!important;font-weight:600!important}.confidence-low{color:var(--danger)!important;font-weight:600!important}.btn-primary{background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;color:#fff!important;border:none!important;border-radius:8px!important;padding:12px 24px!important;font-weight:600!important;transition:all .3s ease!important}.btn-primary:hover{transform:translateY(-1px)!important;box-shadow:0 4px 12px rgba(99,102,241,.3)!important}"
|
| 251 |
-
|
| 252 |
# 显示格式化辅助类
|
| 253 |
class DisplayFormatter:
|
| 254 |
@staticmethod
|
|
@@ -284,49 +132,170 @@ class DisplayFormatter:
|
|
| 284 |
preview = result.get('code_preview', {})
|
| 285 |
if not preview: return "<div style='text-align: center; color: #64748b;'>无代码质量数据</div>"
|
| 286 |
return f"""<div style="padding: 20px;"><h4 style="margin-bottom: 16px;">代码结构指标 (通用)</h4><div class="stats-grid"><div class="stat-card"><div class="stat-value">{preview.get('functions_count',0)}</div><div class="stat-label">函数/方法</div></div><div class="stat-card"><div class="stat-value">{preview.get('classes_count',0)}</div><div class="stat-label">类/结构体</div></div><div class="stat-card"><div class="stat-value">{preview.get('imports_count',0)}</div><div class="stat-label">导入/包含</div></div><div class="stat-card"><div class="stat-value">{preview.get('code_density',0):.1%}</div><div class="stat-label">代码密度</div></div></div></div>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
def create_enhanced_interface():
|
| 290 |
with gr.Blocks(title="Capricode Pro Master", css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")) as demo:
|
| 291 |
-
gr.HTML("""<div class='header-section'><div class="app-title">🚀 Capricode Pro Master</div><div class="app-subtitle">精准代码视觉感知系统 (
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
with gr.
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
with gr.Column(elem_classes="pro-card"):
|
| 302 |
-
gr.Markdown("###
|
| 303 |
-
with gr.
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
with gr.Column(elem_classes="pro-card"):
|
| 306 |
-
gr.Markdown("### 📊 分
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
with gr.Column(elem_classes="pro-card"):
|
| 313 |
-
gr.Markdown("### 📈 可视化分析")
|
| 314 |
-
with gr.Tabs():
|
| 315 |
-
with gr.TabItem("🔧 语言分布"): language_distribution = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>语言分布可视化将在此显示</div>")
|
| 316 |
-
with gr.TabItem("📋 代码质量"): code_quality = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>代码质量分析将在此显示</div>")
|
| 317 |
-
with gr.TabItem("🎯 检测详情 (JSON)"): detection_details = gr.JSON(label="详细检测数据", show_label=False)
|
| 318 |
-
test_cases = {'python': 'def main():\n print("Hello from Python!")\n\nif __name__ == "__main__":\n main()','java': 'public class HelloWorld {\n public static void main(String[] args) {\n System.out.println("Hello, Java!");\n }\n}','javascript': 'document.addEventListener("DOMContentLoaded", () => {\n console.log("Hello, JavaScript!");\n});','cpp': '#include <iostream>\n\nint main() {\n std::cout << "Hello, C++!" << std::endl;\n return 0;\n}','csharp': 'using System;\n\nnamespace HelloWorldApp {\n class Program {\n static void Main(string[] args) {\n Console.WriteLine("Hello, C#!");\n }\n }\n}','go': 'package main\n\nimport "fmt"\n\nfunc main() {\n fmt.Println("Hello, Go!")\n}','rust': 'fn main() {\n println!("Hello, Rust!");\n}','php': '<?php\n echo "Hello, PHP!";\n?>','ruby': 'def say_hello\n puts "Hello, Ruby!"\nend\n\nsay_hello()','typescript': 'interface User {\n name: string;\n id: number;\n}\n\nconst user: User = { name: "TypeScript", id: 0 };\nconsole.log(`Hello, ${user.name}!`);','bash': '#!/bin/bash\n\n# Simple bash script\nMESSAGE="Hello, Bash!"\necho $MESSAGE','dockerfile': 'FROM ubuntu:20.04\n\nRUN apt-get update && apt-get install -y curl\n\nCMD ["echo", "Hello, Docker!"]',}
|
| 319 |
-
with gr.Column(elem_classes="pro-card"):
|
| 320 |
-
gr.Markdown("### 🚀 快速测试用例")
|
| 321 |
-
gr.Examples(examples=[[v] for v in test_cases.values()], inputs=code_input, label="点击示例以快速加载")
|
| 322 |
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
| 324 |
if not code or not code.strip(): return DisplayFormatter.format_display_result(precision_detector._empty_result())
|
| 325 |
return DisplayFormatter.format_display_result(precision_detector.detect_with_line_info(code))
|
| 326 |
|
| 327 |
-
|
| 328 |
-
detect_btn.click(fn=
|
| 329 |
-
clear_btn.click(fn=DisplayFormatter.get_empty_display_state, outputs=[code_input] +
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
return demo
|
| 331 |
|
| 332 |
# 启动应用
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
"""
|
| 4 |
+
Capricode Pro Master - 精准代码视觉感知系统 (史诗进化版)
|
| 5 |
+
集成项目分析器,支持.zip文件上传分析,完美兼容免费部署环境
|
|
|
|
| 6 |
"""
|
| 7 |
import gradio as gr
|
| 8 |
import time
|
|
|
|
| 11 |
from typing import Dict, List, Tuple, Any
|
| 12 |
from collections import defaultdict
|
| 13 |
|
| 14 |
+
# --- 新增的库 ---
|
| 15 |
+
import os
|
| 16 |
+
import zipfile
|
| 17 |
+
import tempfile
|
| 18 |
+
import shutil
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
|
| 21 |
+
# ==================== 精准语言识别引擎 (保持不变) ====================
|
| 22 |
@dataclass
|
| 23 |
class LanguageSignature:
|
| 24 |
patterns: List[Tuple[re.Pattern, float]]
|
|
|
|
| 27 |
file_extensions: List[str]
|
| 28 |
|
| 29 |
class PrecisionLanguageDetector:
|
| 30 |
+
# ... 核心引擎代码与上一版完全相同,此处折叠以节省篇幅 ...
|
| 31 |
+
# (在最终代码中,这部分是完整存在的)
|
| 32 |
def __init__(self):
|
| 33 |
self.languages = {}
|
| 34 |
self._compile_precision_patterns()
|
|
|
|
| 35 |
def _compile_precision_patterns(self):
|
| 36 |
+
self.languages = {'python': LanguageSignature(patterns=[(re.compile(r'def\s+\w+\s*\([^)]*\)\s*:'), 9.0), (re.compile(r'class\s+\w+\s*\(?[^)]*\)?\s*:'), 8.5), (re.compile(r'import\s+[\w.]+'), 6.0)], keywords={'def': 8.0, 'class': 7.0, 'import': 6.0, 'elif': 5.0, 'async': 6.0, 'await': 6.0}, exclusive_patterns=[(re.compile(r'if __name__ == "__main__":'), 20.0), (re.compile(r'f"[^"]*"'), 12.0)], file_extensions=['.py', '.pyw']), 'javascript': LanguageSignature(patterns=[(re.compile(r'function\s*.*\s*\{'), 8.0), (re.compile(r'console\.log\s*\('), 7.0), (re.compile(r'(const|let|var)\s+\w+\s*='), 8.0)], keywords={'function': 7.0, 'const': 6.0, 'let': 6.0, 'console': 5.0, 'document': 6.0, 'async': 5.0, 'await': 5.0}, exclusive_patterns=[(re.compile(r'document\.getElementById\s*\('), 15.0)], file_extensions=['.js', '.jsx', '.mjs']), 'java': LanguageSignature(patterns=[(re.compile(r'public\s+class\s+\w+'), 10.0), (re.compile(r'System\.out\.println\s*\('), 9.0), (re.compile(r'import\s+java\.'), 8.0)], keywords={'public': 8.0, 'class': 8.0, 'static': 7.0, 'void': 6.0, 'String': 7.0, 'System': 7.0, 'new': 5.0}, exclusive_patterns=[(re.compile(r'public\s+static\s+void\s+main\s*\(\s*String\s*\[\]\s*args\s*\)'), 25.0)], file_extensions=['.java']), 'cpp': LanguageSignature(patterns=[(re.compile(r'#include\s*<[a-zA-Z_]+>'), 10.0), (re.compile(r'using\s+namespace\s+std;'), 9.0), (re.compile(r'std::cout'), 8.0)], keywords={'#include': 9.0, 'iostream': 8.0, 'std': 7.0, 'cout': 7.0, 'vector': 6.0, 'int': 5.0, 'main': 6.0}, exclusive_patterns=[(re.compile(r'#include\s*<iostream>'), 18.0)], file_extensions=['.cpp', '.cxx', '.h', '.hpp']), 'html': LanguageSignature(patterns=[(re.compile(r'<\s*head\s*>'), 8.0), (re.compile(r'<\s*body\s*>'), 8.0), (re.compile(r'<\s*/\s*\w+\s*>'), 6.0)], keywords={'div': 5.0, 'p': 4.0, 'a': 4.0, 'href': 5.0, 'class': 3.0, 'id': 3.0}, exclusive_patterns=[(re.compile(r'<!DOCTYPE\s+html>', re.IGNORECASE), 100.0)], file_extensions=['.html', '.htm']), 'css': LanguageSignature(patterns=[(re.compile(r'[\w\s.#-]+\s*\{'), 8.0), (re.compile(r'[\w-]+\s*:\s*[^;]+;'), 9.0)], keywords={'color': 6.0, 'background-color': 7.0, 'margin': 5.0, 'padding': 5.0, 'font-size': 6.0, 'display': 6.0}, exclusive_patterns=[(re.compile(r'@media\s*\(.+\)\s*\{'), 15.0)], file_extensions=['.css']), 'sql': LanguageSignature(patterns=[(re.compile(r'(SELECT|CREATE|INSERT|UPDATE|DELETE|FROM|WHERE)', re.IGNORECASE), 10.0)], keywords={'SELECT': 9.0, 'FROM': 8.0, 'WHERE': 7.0, 'JOIN': 7.0, 'GROUP BY': 8.0, 'ORDER BY': 7.0, 'CREATE TABLE': 9.0}, exclusive_patterns=[(re.compile(r'CREATE\s+TABLE', re.IGNORECASE), 18.0)], file_extensions=['.sql']), 'csharp': LanguageSignature(patterns=[(re.compile(r'using\s+System;'), 10.0), (re.compile(r'namespace\s+\w+'), 9.0)], keywords={'namespace': 9.0, 'public': 7.0, 'class': 7.0, 'static': 6.0, 'void': 5.0, 'string': 7.0, 'var': 6.0}, exclusive_patterns=[(re.compile(r'static\s+void\s+Main\s*\(\s*string\s*\[\]\s*args\s*\)'), 25.0), (re.compile(r'Console\.WriteLine\s*\('), 15.0)], file_extensions=['.cs']), 'go': LanguageSignature(patterns=[(re.compile(r'package\s+main'), 10.0), (re.compile(r'import\s*\(\s*'), 8.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)], keywords={'package': 9.0, 'import': 8.0, 'func': 9.0, 'fmt': 7.0, 'Println': 6.0}, exclusive_patterns=[(re.compile(r'func\s+main\s*\(\s*\)'), 20.0), (re.compile(r':='), 12.0)], file_extensions=['.go']), 'rust': LanguageSignature(patterns=[(re.compile(r'fn\s+\w+\s*\([^)]*\)\s*\{'), 10.0), (re.compile(r'use\s+std::'), 9.0), (re.compile(r'let\s+(mut\s+)?\w+'), 8.0)], keywords={'fn': 9.0, 'let': 8.0, 'mut': 7.0, 'use': 8.0, 'struct': 7.0}, exclusive_patterns=[(re.compile(r'fn\s+main\s*\(\s*\)'), 20.0), (re.compile(r'println!\s*\('), 18.0)], file_extensions=['.rs']), 'kotlin': LanguageSignature(patterns=[(re.compile(r'fun\s+\w+\s*\([^)]*\)'), 10.0), (re.compile(r'val\s+\w+'), 8.0)], keywords={'package': 8.0, 'import': 7.0, 'fun': 9.0, 'val': 8.0, 'var': 7.0, 'println': 6.0, 'class': 7.0}, exclusive_patterns=[(re.compile(r'fun\s+main\s*\(\s*args:\s*Array<String>\s*\)'), 25.0)], file_extensions=['.kt', '.kts']), 'ruby': LanguageSignature(patterns=[(re.compile(r'def\s+\w+'), 10.0), (re.compile(r'require\s+[\'"]\w+[\'"]'), 8.0), (re.compile(r'^\s*end\s*$'), 7.0)], keywords={'def': 9.0, 'end': 8.0, 'require': 7.0, 'puts': 6.0, 'class': 7.0}, exclusive_patterns=[(re.compile(r'# frozen_string_literal: true'), 15.0), (re.compile(r':\w+'), 10.0)], file_extensions=['.rb']), 'swift': LanguageSignature(patterns=[(re.compile(r'import\s+(UIKit|Foundation)'), 10.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)], keywords={'import': 8.0, 'func': 9.0, 'let': 8.0, 'var': 8.0, 'class': 7.0, 'print': 6.0}, exclusive_patterns=[(re.compile(r'@IBOutlet'), 18.0), (re.compile(r'import\s+SwiftUI'), 20.0)], file_extensions=['.swift']), 'php': LanguageSignature(patterns=[(re.compile(r'function\s+\w+\s*\([^)]*\)'), 7.0), (re.compile(r'\$\w+'), 9.0), (re.compile(r'echo\s+'), 6.0)], keywords={'function': 7.0, 'echo': 6.0, 'class': 6.0, 'public': 6.0}, exclusive_patterns=[(re.compile(r'<\?php'), 100.0)], file_extensions=['.php']), 'typescript': LanguageSignature(patterns=[(re.compile(r'interface\s+\w+'), 10.0), (re.compile(r':\s*(string|number|boolean|any)'), 9.0)], keywords={'interface': 9.0, 'type': 8.0, 'public': 7.0, 'private': 7.0, 'enum': 7.0}, exclusive_patterns=[(re.compile(r'public\s+constructor\s*\('), 18.0)], file_extensions=['.ts', '.tsx']), 'r': LanguageSignature(patterns=[(re.compile(r'library\s*\(\w+\)'), 10.0), (re.compile(r'\w+\s*<-\s*'), 9.0)], keywords={'library': 9.0, 'function': 7.0, 'if': 5.0, 'else': 5.0}, exclusive_patterns=[(re.compile(r'install\.packages\s*\('), 20.0), (re.compile(r'<-'), 15.0)], file_extensions=['.r']), 'bash': LanguageSignature(patterns=[(re.compile(r'if\s+\[.*\]'), 8.0), (re.compile(r'^\s*fi\s*$'), 7.0), (re.compile(r'echo\s+'), 6.0)], keywords={'if': 7.0, 'then': 7.0, 'fi': 7.0, 'for': 6.0, 'do': 6.0, 'done': 6.0, 'echo': 6.0}, exclusive_patterns=[(re.compile(r'#!/bin?/(ba|z|k)?sh'), 100.0)], file_extensions=['.sh']), 'dockerfile': LanguageSignature(patterns=[(re.compile(r'^(FROM|RUN|CMD|COPY|ADD|WORKDIR|EXPOSE)\s+', re.MULTILINE), 10.0)], keywords={'FROM': 10.0, 'RUN': 9.0, 'CMD': 8.0, 'COPY': 8.0, 'WORKDIR': 7.0, 'EXPOSE': 7.0}, exclusive_patterns=[(re.compile(r'^FROM\s+'), 20.0)], file_extensions=['Dockerfile']), 'yaml': LanguageSignature(patterns=[(re.compile(r'^\s*[\w-]+\s*:\s*.*'), 9.0), (re.compile(r'^\s*-\s+'), 8.0)], keywords={'version': 6.0, 'services': 7.0, 'steps': 7.0, 'jobs': 7.0, 'name': 5.0, 'image': 6.0}, exclusive_patterns=[], file_extensions=['.yml', '.yaml']), 'xml': LanguageSignature(patterns=[(re.compile(r'<(\w+)\s*.*>.*</\1>'), 9.0), (re.compile(r'<\?xml[^>]*\?>'), 10.0)], keywords={}, exclusive_patterns=[(re.compile(r'<\?xml\s+version="1.0"'), 100.0)], file_extensions=['.xml'])}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def detect_with_line_info(self, code: str) -> Dict[str, Any]:
|
| 38 |
start_time = time.time()
|
| 39 |
if not code or not code.strip(): return self._empty_result()
|
| 40 |
+
code = code.strip(); lines = code.split('\n'); scores = defaultdict(float); line_evidence = {lang: [] for lang in self.languages}; exclusive_matches = defaultdict(float)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
for lang, signature in self.languages.items():
|
| 42 |
for pattern, weight in signature.exclusive_patterns:
|
| 43 |
for i, line in enumerate(lines, 1):
|
| 44 |
+
if pattern.search(line): exclusive_matches[lang] += weight; line_evidence[lang].append(f"第{i}行: {line.strip()[:50]}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if exclusive_matches:
|
| 46 |
best_lang_exclusive = max(exclusive_matches, key=exclusive_matches.get)
|
| 47 |
+
if exclusive_matches[best_lang_exclusive] >= 100.0: return self._build_absolute_result(best_lang_exclusive, lines, start_time)
|
|
|
|
|
|
|
|
|
|
| 48 |
for lang, signature in self.languages.items():
|
| 49 |
+
score = exclusive_matches.get(lang, 0); lang_evidence = []
|
|
|
|
| 50 |
for pattern, weight in signature.patterns:
|
| 51 |
for i, line in enumerate(lines, 1):
|
| 52 |
if pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (正则): {line.strip()[:50]}...")
|
|
|
|
| 54 |
keyword_pattern = re.compile(r'\b' + re.escape(keyword) + r'\b', re.IGNORECASE if lang in ['html', 'sql', 'dockerfile'] else 0)
|
| 55 |
for i, line in enumerate(lines, 1):
|
| 56 |
if keyword_pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (关键词): '{keyword}'")
|
| 57 |
+
if score > 0: scores[lang] = score; line_evidence[lang].extend(list(dict.fromkeys(lang_evidence)))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
if not scores: return self._unknown_result(lines, start_time)
|
| 59 |
+
best_lang = max(scores, key=scores.get); best_score = scores[best_lang]; total_score = sum(scores.values()); confidence = best_score / total_score if total_score > 0 else 0.0; processing_time = (time.time() - start_time) * 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
return {'language': best_lang, 'confidence': round(min(confidence * 1.2, 0.999), 3), 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': line_evidence[best_lang][:15], 'processing_time_ms': round(processing_time, 2), 'code_preview': self._get_code_preview(lines), 'all_scores': {k: round(v, 2) for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(scores, lines, best_lang)}
|
|
|
|
| 61 |
def _get_code_preview(self, lines: List[str]) -> Dict[str, Any]:
|
| 62 |
if not lines: return {}
|
| 63 |
functions, classes, imports, comments = [], [], [], []
|
|
|
|
| 68 |
elif re.match(r'(import|from|#include|using|require|use|library|package)\s+', line_clean): imports.append(f"第{i}行: {line_clean}")
|
| 69 |
elif re.match(r'(#|//|--|/\*|<!--)', line_clean): comments.append(f"第{i}行: {line_clean}")
|
| 70 |
return {'total_lines': len(lines), 'functions_count': len(functions), 'classes_count': len(classes), 'imports_count': len(imports), 'comments_count': len(comments), 'code_density': round((len(lines) - len(comments)) / len(lines), 3) if lines else 0, 'sample_functions': functions[:3], 'sample_classes': classes[:2], 'sample_imports': imports[:3]}
|
|
|
|
|
|
|
| 71 |
def _get_detection_stats(self, scores: Dict[str, float], lines: List[str], primary_language: str) -> Dict[str, Any]:
|
| 72 |
if not scores: return {}
|
| 73 |
+
total_score = sum(scores.values()); max_score = max(scores.values(), default=0); quality = '确定性' if max_score >= 100 else '极高' if max_score > 50 else '高' if max_score > 25 else '中'
|
|
|
|
|
|
|
| 74 |
return {'languages_detected': len(scores), 'primary_language': primary_language, 'score_distribution': {lang: round(score/total_score*100, 1) for lang, score in scores.items() if score > 0}, 'total_score': round(total_score, 2), 'detection_quality': quality}
|
|
|
|
| 75 |
def _empty_result(self) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '代码为空', 'total_lines': 0, 'line_evidence': [], 'processing_time_ms': 0.1, 'code_preview': {}, 'detection_stats': {}, 'all_scores': {}}
|
|
|
|
| 76 |
def _unknown_result(self, lines: List[str], start_time: float) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '无法识别编程语言', 'total_lines': len(lines), 'line_evidence': [], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_stats': {}, 'all_scores': {}}
|
|
|
|
| 77 |
def _build_absolute_result(self, lang: str, lines: List[str], start_time: float) -> Dict[str, Any]:
|
| 78 |
+
all_scores = defaultdict(float); line_evidence = defaultdict(list)
|
|
|
|
| 79 |
for current_lang, signature in self.languages.items():
|
| 80 |
score = 0
|
| 81 |
for pattern, weight in signature.exclusive_patterns + signature.patterns:
|
|
|
|
| 90 |
score += weight
|
| 91 |
if current_lang == lang: line_evidence[lang].append(f"第{i}行 (关键词): '{keyword}'")
|
| 92 |
if score > 0: all_scores[current_lang] = score
|
|
|
|
| 93 |
best_score = all_scores[lang]
|
| 94 |
return {'language': lang, 'confidence': 0.999, 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': list(dict.fromkeys(line_evidence[lang]))[:15], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_method': 'absolute_exclusive_pattern', 'all_scores': {k: round(v, 2) for k, v in sorted(all_scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(all_scores, lines, lang)}
|
| 95 |
|
| 96 |
# 全局实例
|
| 97 |
precision_detector = PrecisionLanguageDetector()
|
|
|
|
| 98 |
# CSS样式
|
| 99 |
custom_css = ":root{--primary:#6366f1;--primary-dark:#4f46e5;--secondary:#10b981;--accent:#f59e0b;--danger:#ef4444;--bg-primary:#fff;--bg-secondary:#f8fafc;--bg-card:#fff;--border:#e2e8f0;--text-primary:#1e293b;--text-secondary:#64748b;--text-muted:#94a3b8;--shadow:0 1px 3px 0 rgba(0,0,0,.1),0 1px 2px -1px rgba(0,0,0,.1);--shadow-lg:0 10px 15px -3px rgba(0,0,0,.1),0 4px 6px -4px rgba(0,0,0,.1)}.gradio-container{background:linear-gradient(135deg,#f8fafc 0%,#e2e8f0 100%)!important;min-height:100vh;font-family:Inter,-apple-system,BlinkMacSystemFont,sans-serif!important}.pro-card{background:var(--bg-card)!important;border:1px solid var(--border)!important;border-radius:12px!important;box-shadow:var(--shadow)!important;padding:24px!important;margin-bottom:20px!important;transition:all .3s cubic-bezier(.4,0,.2,1)!important}.pro-card:hover{box-shadow:var(--shadow-lg)!important;transform:translateY(-2px)!important}.header-section{text-align:center!important;margin-bottom:40px!important;background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;border-radius:16px!important;padding:40px 32px!important;color:#fff!important}.app-title{font-size:3rem!important;font-weight:800!important;margin-bottom:12px!important;background:linear-gradient(135deg,#fff 0%,#f1f5f9 100%)!important;-webkit-background-clip:text!important;-webkit-text-fill-color:transparent!important;background-clip:text!important}.app-subtitle{font-size:1.25rem!important;font-weight:400!important;opacity:.9!important;margin-bottom:0!important}.stats-grid{display:grid!important;grid-template-columns:repeat(auto-fit,minmax(180px,1fr))!important;gap:16px!important;margin:20px 0!important}.stat-card{background:#fff!important;border-radius:8px!important;padding:16px!important;text-align:center!important;border:1px solid var(--border)!important}.stat-value{font-size:2rem!important;font-weight:700!important;color:var(--primary)!important;margin-bottom:4px!important}.stat-label{font-size:.875rem!important;color:var(--text-secondary)!important;font-weight:500!important}.evidence-panel{background:var(--bg-secondary)!important;border:1px solid var(--border)!important;border-radius:8px!important;padding:16px!important;max-height:300px!important;overflow-y:auto!important;font-family:Monaco,Menlo,Consolas,monospace!important;font-size:.875rem!important}.evidence-item{padding:8px 12px!important;margin:4px 0!important;background:#fff!important;border-radius:6px!important;border-left:4px solid var(--primary)!important}.confidence-high{color:var(--secondary)!important;font-weight:700!important}.confidence-medium{color:var(--accent)!important;font-weight:600!important}.confidence-low{color:var(--danger)!important;font-weight:600!important}.btn-primary{background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;color:#fff!important;border:none!important;border-radius:8px!important;padding:12px 24px!important;font-weight:600!important;transition:all .3s ease!important}.btn-primary:hover{transform:translateY(-1px)!important;box-shadow:0 4px 12px rgba(99,102,241,.3)!important}"
|
|
|
|
| 100 |
# 显示格式化辅助类
|
| 101 |
class DisplayFormatter:
|
| 102 |
@staticmethod
|
|
|
|
| 132 |
preview = result.get('code_preview', {})
|
| 133 |
if not preview: return "<div style='text-align: center; color: #64748b;'>无代码质量数据</div>"
|
| 134 |
return f"""<div style="padding: 20px;"><h4 style="margin-bottom: 16px;">代码结构指标 (通用)</h4><div class="stats-grid"><div class="stat-card"><div class="stat-value">{preview.get('functions_count',0)}</div><div class="stat-label">函数/方法</div></div><div class="stat-card"><div class="stat-value">{preview.get('classes_count',0)}</div><div class="stat-label">类/结构体</div></div><div class="stat-card"><div class="stat-value">{preview.get('imports_count',0)}</div><div class="stat-label">导入/包含</div></div><div class="stat-card"><div class="stat-value">{preview.get('code_density',0):.1%}</div><div class="stat-label">代码密度</div></div></div></div>"""
|
| 135 |
+
# ★★★ 新增:项目分析结果的格式化函数 ★★★
|
| 136 |
+
@staticmethod
|
| 137 |
+
def format_project_tree(tree_string):
|
| 138 |
+
if not tree_string: return "项目为空或无法解析。"
|
| 139 |
+
return f"```\n{tree_string}\n```"
|
| 140 |
+
@staticmethod
|
| 141 |
+
def format_language_distribution_plot(lang_stats):
|
| 142 |
+
if not lang_stats: return None
|
| 143 |
+
# 按文件数量排序
|
| 144 |
+
sorted_stats = sorted(lang_stats.items(), key=lambda item: item[1]['count'], reverse=True)
|
| 145 |
+
langs = [item[0].upper() for item in sorted_stats]
|
| 146 |
+
counts = [item[1]['count'] for item in sorted_stats]
|
| 147 |
+
|
| 148 |
+
fig, ax = plt.subplots(figsize=(10, 8))
|
| 149 |
+
bars = ax.barh(langs, counts, color='#6366f1')
|
| 150 |
+
ax.invert_yaxis()
|
| 151 |
+
ax.set_xlabel('文件数量 (File Count)')
|
| 152 |
+
ax.set_title('项目语言分布 (Project Language Distribution)')
|
| 153 |
+
ax.bar_label(bars, padding=3)
|
| 154 |
+
plt.tight_layout()
|
| 155 |
+
return fig
|
| 156 |
+
|
| 157 |
+
# ★★★ 新增:项目分析核心逻辑 ★★★
|
| 158 |
+
def analyze_project_zip(zip_file):
|
| 159 |
+
if zip_file is None:
|
| 160 |
+
return ("请先上传一个 .zip 文件。", None, {})
|
| 161 |
+
|
| 162 |
+
# 创建一个安全的临时目录
|
| 163 |
+
temp_dir = tempfile.mkdtemp()
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
# 确保上传的是zip文件
|
| 167 |
+
if not zipfile.is_zipfile(zip_file.name):
|
| 168 |
+
return ("上传的不是一个有效的 .zip 文件。", None, {})
|
| 169 |
+
|
| 170 |
+
# 解压文件到临时目录
|
| 171 |
+
with zipfile.ZipFile(zip_file.name, 'r') as zf:
|
| 172 |
+
zf.extractall(temp_dir)
|
| 173 |
+
|
| 174 |
+
project_tree_str = ""
|
| 175 |
+
language_stats = defaultdict(lambda: {'count': 0, 'lines': 0})
|
| 176 |
+
total_files = 0
|
| 177 |
+
|
| 178 |
+
# 遍历解压后的目录和文件
|
| 179 |
+
for root, _, files in os.walk(temp_dir):
|
| 180 |
+
# 计算当前目录深度用于缩进
|
| 181 |
+
level = root.replace(temp_dir, '').count(os.sep)
|
| 182 |
+
indent = ' ' * 4 * level
|
| 183 |
+
project_tree_str += f"{indent}📂 {os.path.basename(root)}/\n"
|
| 184 |
+
|
| 185 |
+
sub_indent = ' ' * 4 * (level + 1)
|
| 186 |
+
for filename in files:
|
| 187 |
+
total_files += 1
|
| 188 |
+
file_path = os.path.join(root, filename)
|
| 189 |
+
try:
|
| 190 |
+
# 读取文件内容,忽略无法解码的二进制文件
|
| 191 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 192 |
+
content = f.read()
|
| 193 |
+
except Exception:
|
| 194 |
+
# 如果是二进制文件等,则标记为 "Binary"
|
| 195 |
+
project_tree_str += f"{sub_indent}📄 {filename} [Binary or Unreadable]\n"
|
| 196 |
+
continue
|
| 197 |
|
| 198 |
+
# 调用我们的核心引擎进行分析
|
| 199 |
+
result = precision_detector.detect_with_line_info(content)
|
| 200 |
+
lang = result['language']
|
| 201 |
+
|
| 202 |
+
project_tree_str += f"{sub_indent}📄 {filename} -> [{lang.upper()}]\n"
|
| 203 |
+
|
| 204 |
+
if lang != 'unknown':
|
| 205 |
+
language_stats[lang]['count'] += 1
|
| 206 |
+
language_stats[lang]['lines'] += result['total_lines']
|
| 207 |
+
|
| 208 |
+
if total_files == 0:
|
| 209 |
+
return ("这是一个空的 .zip 文件。", None, {})
|
| 210 |
+
|
| 211 |
+
return project_tree_str, language_stats, language_stats # 返回三次以匹配输出
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
return (f"分析过程中出现错误: {e}", None, {})
|
| 215 |
+
finally:
|
| 216 |
+
# ★★★ 关键:无论成功失败,都必须清理临时目录 ★★★
|
| 217 |
+
shutil.rmtree(temp_dir)
|
| 218 |
+
|
| 219 |
+
# Gradio界面 (已升级为双模式)
|
| 220 |
def create_enhanced_interface():
|
| 221 |
with gr.Blocks(title="Capricode Pro Master", css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")) as demo:
|
| 222 |
+
gr.HTML("""<div class='header-section'><div class="app-title">🚀 Capricode Pro Master</div><div class="app-subtitle">精准代码视觉感知系统 (双模式:单文件 & 项目分析)</div></div>""")
|
| 223 |
+
|
| 224 |
+
with gr.Tabs():
|
| 225 |
+
# --- 模式一:单文件分析器 ---
|
| 226 |
+
with gr.TabItem("单文件分析器 (Single File Analyzer)"):
|
| 227 |
+
with gr.Row(equal_height=False):
|
| 228 |
+
with gr.Column(scale=1):
|
| 229 |
+
with gr.Column(elem_classes="pro-card"):
|
| 230 |
+
gr.Markdown("### 📝 代码输入区域")
|
| 231 |
+
code_input = gr.Textbox(label="", placeholder="请在此处粘贴代码...", lines=15, show_label=False)
|
| 232 |
+
with gr.Row():
|
| 233 |
+
detect_btn = gr.Button("🔍 分析代码片段", variant="primary", elem_classes="btn-primary")
|
| 234 |
+
clear_btn = gr.Button("🗑️ 清空")
|
| 235 |
+
with gr.Column(scale=1):
|
| 236 |
+
with gr.Column(elem_classes="pro-card"):
|
| 237 |
+
gr.Markdown("### 🎯 主要检测结果")
|
| 238 |
+
with gr.Row(): detected_language, confidence_score = gr.HTML(label="识别语言"), gr.HTML(label="置信度")
|
| 239 |
+
with gr.Row(): processing_time, total_lines = gr.Textbox(label="处理时间", interactive=False), gr.Textbox(label="代码行数", interactive=False)
|
| 240 |
+
with gr.Column(elem_classes="pro-card"):
|
| 241 |
+
gr.Markdown("### 📊 分析统计"), (stats_display := gr.HTML(value="<div style='text-align: center; color: #64748b;'>等待分析数据...</div>"))
|
| 242 |
+
with gr.Row():
|
| 243 |
+
with gr.Column(scale=1):
|
| 244 |
+
with gr.Column(elem_classes="pro-card"): gr.Markdown("### 📍 行数证据详情"), (line_evidence := gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>检测证据将在此显示</div>"))
|
| 245 |
+
with gr.Column(scale=1):
|
| 246 |
+
with gr.Column(elem_classes="pro-card"): gr.Markdown("### 🏗️ 代码结构分析 (通用)"), (code_preview := gr.JSON(label="结构分析", show_label=False))
|
| 247 |
with gr.Column(elem_classes="pro-card"):
|
| 248 |
+
gr.Markdown("### 📈 可视化分析")
|
| 249 |
+
with gr.Tabs():
|
| 250 |
+
with gr.TabItem("🔧 语言分布"): language_distribution = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>语言分布可视化将在此显示</div>")
|
| 251 |
+
with gr.TabItem("📋 代码质量"): code_quality = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>代码质量分析将在此显示</div>")
|
| 252 |
+
with gr.TabItem("🎯 检测详情 (JSON)"): detection_details = gr.JSON(label="详细检测数据", show_label=False)
|
| 253 |
+
|
| 254 |
+
# --- 模式二:项目分析器 ---
|
| 255 |
+
with gr.TabItem("项目分析器 (Project Analyzer)"):
|
| 256 |
+
with gr.Row():
|
| 257 |
+
with gr.Column(scale=1):
|
| 258 |
+
with gr.Column(elem_classes="pro-card"):
|
| 259 |
+
gr.Markdown("### 📁 上传项目压缩包")
|
| 260 |
+
zip_input = gr.File(label="请上传项目的 .zip 文件", file_types=['.zip'], type="file")
|
| 261 |
+
project_analyze_btn = gr.Button("🚀 开始扫描整个项目", variant="primary", elem_classes="btn-primary")
|
| 262 |
+
with gr.Column(scale=2):
|
| 263 |
+
with gr.Column(elem_classes="pro-card"):
|
| 264 |
+
gr.Markdown("### 🌳 项目结构 & 语言识别")
|
| 265 |
+
project_tree_output = gr.Markdown("项目的文件结构树将在这里显示...", label="Project Structure")
|
| 266 |
+
|
| 267 |
with gr.Column(elem_classes="pro-card"):
|
| 268 |
+
gr.Markdown("### 📊 项目语言分布统计")
|
| 269 |
+
with gr.Tabs():
|
| 270 |
+
with gr.TabItem("📈 按文件数量分布 (Bar Chart)"):
|
| 271 |
+
lang_dist_plot = gr.Plot(label="Language Distribution (by file count)")
|
| 272 |
+
with gr.TabItem("📋 原始统计数据 (JSON)"):
|
| 273 |
+
project_raw_json_output = gr.JSON(label="Raw Analysis Data")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
# --- 回调函数绑定 ---
|
| 276 |
+
|
| 277 |
+
# 单文件分析器的回调
|
| 278 |
+
def single_file_precision_analyze(code):
|
| 279 |
if not code or not code.strip(): return DisplayFormatter.format_display_result(precision_detector._empty_result())
|
| 280 |
return DisplayFormatter.format_display_result(precision_detector.detect_with_line_info(code))
|
| 281 |
|
| 282 |
+
single_file_outputs = [detected_language, confidence_score, processing_time, total_lines, stats_display, line_evidence, code_preview, language_distribution, code_quality, detection_details]
|
| 283 |
+
detect_btn.click(fn=single_file_precision_analyze, inputs=[code_input], outputs=single_file_outputs)
|
| 284 |
+
clear_btn.click(fn=DisplayFormatter.get_empty_display_state, outputs=[code_input] + single_file_outputs)
|
| 285 |
+
|
| 286 |
+
# 项目分析器的回调
|
| 287 |
+
def run_project_analysis_and_format(zip_file):
|
| 288 |
+
tree_str, stats, raw_stats = analyze_project_zip(zip_file)
|
| 289 |
+
formatted_tree = DisplayFormatter.format_project_tree(tree_str)
|
| 290 |
+
plot = DisplayFormatter.format_language_distribution_plot(stats)
|
| 291 |
+
return formatted_tree, plot, raw_stats
|
| 292 |
+
|
| 293 |
+
project_analyze_btn.click(
|
| 294 |
+
fn=run_project_analysis_and_format,
|
| 295 |
+
inputs=[zip_input],
|
| 296 |
+
outputs=[project_tree_output, lang_dist_plot, project_raw_json_output]
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
return demo
|
| 300 |
|
| 301 |
# 启动应用
|