pangxiang commited on
Commit
479440c
·
verified ·
1 Parent(s): 06f6776

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -207
app.py CHANGED
@@ -1,9 +1,8 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
- Capricode Pro Master - 精准代码视觉感知系统 (终极版)
5
- UI增强 & 引擎扩展 - 为大型语言模型提供20种主流语言的精准识别
6
- 经过混合语言压力测试并已修复所有已知逻辑漏洞
7
  """
8
  import gradio as gr
9
  import time
@@ -12,7 +11,14 @@ from dataclasses import dataclass
12
  from typing import Dict, List, Tuple, Any
13
  from collections import defaultdict
14
 
15
- # ==================== 精准语言识别引擎 (终极版) ====================
 
 
 
 
 
 
 
16
  @dataclass
17
  class LanguageSignature:
18
  patterns: List[Tuple[re.Pattern, float]]
@@ -21,162 +27,26 @@ class LanguageSignature:
21
  file_extensions: List[str]
22
 
23
  class PrecisionLanguageDetector:
 
 
24
  def __init__(self):
25
  self.languages = {}
26
  self._compile_precision_patterns()
27
-
28
  def _compile_precision_patterns(self):
29
- """编译所有20种语言的精准特征,并为定义级特征赋予绝对权重"""
30
- self.languages = {
31
- 'python': LanguageSignature(
32
- patterns=[(re.compile(r'def\s+\w+\s*\([^)]*\)\s*:'), 9.0), (re.compile(r'class\s+\w+\s*\(?[^)]*\)?\s*:'), 8.5), (re.compile(r'import\s+[\w.]+'), 6.0)],
33
- keywords={'def': 8.0, 'class': 7.0, 'import': 6.0, 'elif': 5.0, 'async': 6.0, 'await': 6.0},
34
- exclusive_patterns=[(re.compile(r'if __name__ == "__main__":'), 20.0), (re.compile(r'f"[^"]*"'), 12.0)],
35
- file_extensions=['.py', '.pyw']
36
- ),
37
- 'javascript': LanguageSignature(
38
- patterns=[(re.compile(r'function\s*.*\s*\{'), 8.0), (re.compile(r'console\.log\s*\('), 7.0), (re.compile(r'(const|let|var)\s+\w+\s*='), 8.0)],
39
- keywords={'function': 7.0, 'const': 6.0, 'let': 6.0, 'console': 5.0, 'document': 6.0, 'async': 5.0, 'await': 5.0},
40
- exclusive_patterns=[(re.compile(r'document\.getElementById\s*\('), 15.0)],
41
- file_extensions=['.js', '.jsx', '.mjs']
42
- ),
43
- 'java': LanguageSignature(
44
- patterns=[(re.compile(r'public\s+class\s+\w+'), 10.0), (re.compile(r'System\.out\.println\s*\('), 9.0), (re.compile(r'import\s+java\.'), 8.0)],
45
- keywords={'public': 8.0, 'class': 8.0, 'static': 7.0, 'void': 6.0, 'String': 7.0, 'System': 7.0, 'new': 5.0},
46
- exclusive_patterns=[(re.compile(r'public\s+static\s+void\s+main\s*\(\s*String\s*\[\]\s*args\s*\)'), 25.0)],
47
- file_extensions=['.java']
48
- ),
49
- 'cpp': LanguageSignature(
50
- patterns=[(re.compile(r'#include\s*<[a-zA-Z_]+>'), 10.0), (re.compile(r'using\s+namespace\s+std;'), 9.0), (re.compile(r'std::cout'), 8.0)],
51
- keywords={'#include': 9.0, 'iostream': 8.0, 'std': 7.0, 'cout': 7.0, 'vector': 6.0, 'int': 5.0, 'main': 6.0},
52
- exclusive_patterns=[(re.compile(r'#include\s*<iostream>'), 18.0)],
53
- file_extensions=['.cpp', '.cxx', '.h', '.hpp']
54
- ),
55
- 'html': LanguageSignature(
56
- patterns=[(re.compile(r'<\s*head\s*>'), 8.0), (re.compile(r'<\s*body\s*>'), 8.0), (re.compile(r'<\s*/\s*\w+\s*>'), 6.0)],
57
- keywords={'div': 5.0, 'p': 4.0, 'a': 4.0, 'href': 5.0, 'class': 3.0, 'id': 3.0},
58
- exclusive_patterns=[(re.compile(r'<!DOCTYPE\s+html>', re.IGNORECASE), 100.0)], # 绝对权重
59
- file_extensions=['.html', '.htm']
60
- ),
61
- 'css': LanguageSignature(
62
- patterns=[(re.compile(r'[\w\s.#-]+\s*\{'), 8.0), (re.compile(r'[\w-]+\s*:\s*[^;]+;'), 9.0)],
63
- keywords={'color': 6.0, 'background-color': 7.0, 'margin': 5.0, 'padding': 5.0, 'font-size': 6.0, 'display': 6.0},
64
- exclusive_patterns=[(re.compile(r'@media\s*\(.+\)\s*\{'), 15.0)],
65
- file_extensions=['.css']
66
- ),
67
- 'sql': LanguageSignature(
68
- patterns=[(re.compile(r'(SELECT|CREATE|INSERT|UPDATE|DELETE|FROM|WHERE)', re.IGNORECASE), 10.0)],
69
- keywords={'SELECT': 9.0, 'FROM': 8.0, 'WHERE': 7.0, 'JOIN': 7.0, 'GROUP BY': 8.0, 'ORDER BY': 7.0, 'CREATE TABLE': 9.0},
70
- exclusive_patterns=[(re.compile(r'CREATE\s+TABLE', re.IGNORECASE), 18.0)],
71
- file_extensions=['.sql']
72
- ),
73
- 'csharp': LanguageSignature(
74
- patterns=[(re.compile(r'using\s+System;'), 10.0), (re.compile(r'namespace\s+\w+'), 9.0)],
75
- keywords={'namespace': 9.0, 'public': 7.0, 'class': 7.0, 'static': 6.0, 'void': 5.0, 'string': 7.0, 'var': 6.0},
76
- exclusive_patterns=[(re.compile(r'static\s+void\s+Main\s*\(\s*string\s*\[\]\s*args\s*\)'), 25.0), (re.compile(r'Console\.WriteLine\s*\('), 15.0)],
77
- file_extensions=['.cs']
78
- ),
79
- 'go': LanguageSignature(
80
- patterns=[(re.compile(r'package\s+main'), 10.0), (re.compile(r'import\s*\(\s*'), 8.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)],
81
- keywords={'package': 9.0, 'import': 8.0, 'func': 9.0, 'fmt': 7.0, 'Println': 6.0},
82
- exclusive_patterns=[(re.compile(r'func\s+main\s*\(\s*\)'), 20.0), (re.compile(r':='), 12.0)],
83
- file_extensions=['.go']
84
- ),
85
- 'rust': LanguageSignature(
86
- patterns=[(re.compile(r'fn\s+\w+\s*\([^)]*\)\s*\{'), 10.0), (re.compile(r'use\s+std::'), 9.0), (re.compile(r'let\s+(mut\s+)?\w+'), 8.0)],
87
- keywords={'fn': 9.0, 'let': 8.0, 'mut': 7.0, 'use': 8.0, 'struct': 7.0},
88
- exclusive_patterns=[(re.compile(r'fn\s+main\s*\(\s*\)'), 20.0), (re.compile(r'println!\s*\('), 18.0)],
89
- file_extensions=['.rs']
90
- ),
91
- 'kotlin': LanguageSignature(
92
- patterns=[(re.compile(r'fun\s+\w+\s*\([^)]*\)'), 10.0), (re.compile(r'val\s+\w+'), 8.0)],
93
- keywords={'package': 8.0, 'import': 7.0, 'fun': 9.0, 'val': 8.0, 'var': 7.0, 'println': 6.0, 'class': 7.0},
94
- exclusive_patterns=[(re.compile(r'fun\s+main\s*\(\s*args:\s*Array<String>\s*\)'), 25.0)],
95
- file_extensions=['.kt', '.kts']
96
- ),
97
- 'ruby': LanguageSignature(
98
- patterns=[(re.compile(r'def\s+\w+'), 10.0), (re.compile(r'require\s+[\'"]\w+[\'"]'), 8.0), (re.compile(r'^\s*end\s*$'), 7.0)],
99
- keywords={'def': 9.0, 'end': 8.0, 'require': 7.0, 'puts': 6.0, 'class': 7.0},
100
- exclusive_patterns=[(re.compile(r'# frozen_string_literal: true'), 15.0), (re.compile(r':\w+'), 10.0)],
101
- file_extensions=['.rb']
102
- ),
103
- 'swift': LanguageSignature(
104
- patterns=[(re.compile(r'import\s+(UIKit|Foundation)'), 10.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)],
105
- keywords={'import': 8.0, 'func': 9.0, 'let': 8.0, 'var': 8.0, 'class': 7.0, 'print': 6.0},
106
- exclusive_patterns=[(re.compile(r'@IBOutlet'), 18.0), (re.compile(r'import\s+SwiftUI'), 20.0)],
107
- file_extensions=['.swift']
108
- ),
109
- 'php': LanguageSignature(
110
- patterns=[(re.compile(r'function\s+\w+\s*\([^)]*\)'), 7.0), (re.compile(r'\$\w+'), 9.0), (re.compile(r'echo\s+'), 6.0)],
111
- keywords={'function': 7.0, 'echo': 6.0, 'class': 6.0, 'public': 6.0},
112
- exclusive_patterns=[(re.compile(r'<\?php'), 100.0)], # 绝对权重
113
- file_extensions=['.php']
114
- ),
115
- 'typescript': LanguageSignature(
116
- patterns=[(re.compile(r'interface\s+\w+'), 10.0), (re.compile(r':\s*(string|number|boolean|any)'), 9.0)],
117
- keywords={'interface': 9.0, 'type': 8.0, 'public': 7.0, 'private': 7.0, 'enum': 7.0},
118
- exclusive_patterns=[(re.compile(r'public\s+constructor\s*\('), 18.0)],
119
- file_extensions=['.ts', '.tsx']
120
- ),
121
- 'r': LanguageSignature(
122
- patterns=[(re.compile(r'library\s*\(\w+\)'), 10.0), (re.compile(r'\w+\s*<-\s*'), 9.0)],
123
- keywords={'library': 9.0, 'function': 7.0, 'if': 5.0, 'else': 5.0},
124
- exclusive_patterns=[(re.compile(r'install\.packages\s*\('), 20.0), (re.compile(r'<-'), 15.0)],
125
- file_extensions=['.r']
126
- ),
127
- 'bash': LanguageSignature(
128
- patterns=[(re.compile(r'if\s+\[.*\]'), 8.0), (re.compile(r'^\s*fi\s*$'), 7.0), (re.compile(r'echo\s+'), 6.0)],
129
- keywords={'if': 7.0, 'then': 7.0, 'fi': 7.0, 'for': 6.0, 'do': 6.0, 'done': 6.0, 'echo': 6.0},
130
- exclusive_patterns=[(re.compile(r'#!/bin?/(ba|z|k)?sh'), 100.0)], # 绝对权重
131
- file_extensions=['.sh']
132
- ),
133
- 'dockerfile': LanguageSignature(
134
- patterns=[(re.compile(r'^(FROM|RUN|CMD|COPY|ADD|WORKDIR|EXPOSE)\s+', re.MULTILINE), 10.0)],
135
- keywords={'FROM': 10.0, 'RUN': 9.0, 'CMD': 8.0, 'COPY': 8.0, 'WORKDIR': 7.0, 'EXPOSE': 7.0},
136
- exclusive_patterns=[(re.compile(r'^FROM\s+'), 20.0)],
137
- file_extensions=['Dockerfile']
138
- ),
139
- 'yaml': LanguageSignature(
140
- patterns=[(re.compile(r'^\s*[\w-]+\s*:\s*.*'), 9.0), (re.compile(r'^\s*-\s+'), 8.0)],
141
- keywords={'version': 6.0, 'services': 7.0, 'steps': 7.0, 'jobs': 7.0, 'name': 5.0, 'image': 6.0},
142
- exclusive_patterns=[],
143
- file_extensions=['.yml', '.yaml']
144
- ),
145
- 'xml': LanguageSignature(
146
- patterns=[(re.compile(r'<(\w+)\s*.*>.*</\1>'), 9.0), (re.compile(r'<\?xml[^>]*\?>'), 10.0)],
147
- keywords={},
148
- exclusive_patterns=[(re.compile(r'<\?xml\s+version="1.0"'), 100.0)], # 绝对权重
149
- file_extensions=['.xml']
150
- ),
151
- }
152
-
153
  def detect_with_line_info(self, code: str) -> Dict[str, Any]:
154
  start_time = time.time()
155
  if not code or not code.strip(): return self._empty_result()
156
- code = code.strip()
157
- lines = code.split('\n')
158
- scores = defaultdict(float)
159
- line_evidence = {lang: [] for lang in self.languages}
160
- exclusive_matches = defaultdict(float)
161
-
162
- # 1. 独占模式检测
163
  for lang, signature in self.languages.items():
164
  for pattern, weight in signature.exclusive_patterns:
165
  for i, line in enumerate(lines, 1):
166
- if pattern.search(line):
167
- exclusive_matches[lang] += weight
168
- line_evidence[lang].append(f"第{i}行: {line.strip()[:50]}...")
169
-
170
- # 2. 检查是否有绝对权重匹配
171
  if exclusive_matches:
172
  best_lang_exclusive = max(exclusive_matches, key=exclusive_matches.get)
173
- if exclusive_matches[best_lang_exclusive] >= 100.0:
174
- return self._build_absolute_result(best_lang_exclusive, lines, start_time)
175
-
176
- # 3. 如果没有绝对权重匹配,则继续进行常规检测
177
  for lang, signature in self.languages.items():
178
- score = exclusive_matches.get(lang, 0) # 继承独占分数
179
- lang_evidence = []
180
  for pattern, weight in signature.patterns:
181
  for i, line in enumerate(lines, 1):
182
  if pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (正则): {line.strip()[:50]}...")
@@ -184,21 +54,10 @@ class PrecisionLanguageDetector:
184
  keyword_pattern = re.compile(r'\b' + re.escape(keyword) + r'\b', re.IGNORECASE if lang in ['html', 'sql', 'dockerfile'] else 0)
185
  for i, line in enumerate(lines, 1):
186
  if keyword_pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (关键词): '{keyword}'")
187
-
188
- if score > 0:
189
- scores[lang] = score
190
- line_evidence[lang].extend(list(dict.fromkeys(lang_evidence)))
191
-
192
  if not scores: return self._unknown_result(lines, start_time)
193
-
194
- best_lang = max(scores, key=scores.get)
195
- best_score = scores[best_lang]
196
- total_score = sum(scores.values())
197
- confidence = best_score / total_score if total_score > 0 else 0.0
198
- processing_time = (time.time() - start_time) * 1000
199
-
200
  return {'language': best_lang, 'confidence': round(min(confidence * 1.2, 0.999), 3), 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': line_evidence[best_lang][:15], 'processing_time_ms': round(processing_time, 2), 'code_preview': self._get_code_preview(lines), 'all_scores': {k: round(v, 2) for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(scores, lines, best_lang)}
201
-
202
  def _get_code_preview(self, lines: List[str]) -> Dict[str, Any]:
203
  if not lines: return {}
204
  functions, classes, imports, comments = [], [], [], []
@@ -209,22 +68,14 @@ class PrecisionLanguageDetector:
209
  elif re.match(r'(import|from|#include|using|require|use|library|package)\s+', line_clean): imports.append(f"第{i}行: {line_clean}")
210
  elif re.match(r'(#|//|--|/\*|<!--)', line_clean): comments.append(f"第{i}行: {line_clean}")
211
  return {'total_lines': len(lines), 'functions_count': len(functions), 'classes_count': len(classes), 'imports_count': len(imports), 'comments_count': len(comments), 'code_density': round((len(lines) - len(comments)) / len(lines), 3) if lines else 0, 'sample_functions': functions[:3], 'sample_classes': classes[:2], 'sample_imports': imports[:3]}
212
-
213
- # ★★★ FIX: This function now accepts the decided primary_language ★★★
214
  def _get_detection_stats(self, scores: Dict[str, float], lines: List[str], primary_language: str) -> Dict[str, Any]:
215
  if not scores: return {}
216
- total_score = sum(scores.values())
217
- max_score = max(scores.values(), default=0)
218
- quality = '确定性' if max_score >= 100 else '极高' if max_score > 50 else '高' if max_score > 25 else '中'
219
  return {'languages_detected': len(scores), 'primary_language': primary_language, 'score_distribution': {lang: round(score/total_score*100, 1) for lang, score in scores.items() if score > 0}, 'total_score': round(total_score, 2), 'detection_quality': quality}
220
-
221
  def _empty_result(self) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '代码为空', 'total_lines': 0, 'line_evidence': [], 'processing_time_ms': 0.1, 'code_preview': {}, 'detection_stats': {}, 'all_scores': {}}
222
-
223
  def _unknown_result(self, lines: List[str], start_time: float) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '无法识别编程语言', 'total_lines': len(lines), 'line_evidence': [], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_stats': {}, 'all_scores': {}}
224
-
225
  def _build_absolute_result(self, lang: str, lines: List[str], start_time: float) -> Dict[str, Any]:
226
- all_scores = defaultdict(float)
227
- line_evidence = defaultdict(list)
228
  for current_lang, signature in self.languages.items():
229
  score = 0
230
  for pattern, weight in signature.exclusive_patterns + signature.patterns:
@@ -239,16 +90,13 @@ class PrecisionLanguageDetector:
239
  score += weight
240
  if current_lang == lang: line_evidence[lang].append(f"第{i}行 (关键词): '{keyword}'")
241
  if score > 0: all_scores[current_lang] = score
242
-
243
  best_score = all_scores[lang]
244
  return {'language': lang, 'confidence': 0.999, 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': list(dict.fromkeys(line_evidence[lang]))[:15], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_method': 'absolute_exclusive_pattern', 'all_scores': {k: round(v, 2) for k, v in sorted(all_scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(all_scores, lines, lang)}
245
 
246
  # 全局实例
247
  precision_detector = PrecisionLanguageDetector()
248
-
249
  # CSS样式
250
  custom_css = ":root{--primary:#6366f1;--primary-dark:#4f46e5;--secondary:#10b981;--accent:#f59e0b;--danger:#ef4444;--bg-primary:#fff;--bg-secondary:#f8fafc;--bg-card:#fff;--border:#e2e8f0;--text-primary:#1e293b;--text-secondary:#64748b;--text-muted:#94a3b8;--shadow:0 1px 3px 0 rgba(0,0,0,.1),0 1px 2px -1px rgba(0,0,0,.1);--shadow-lg:0 10px 15px -3px rgba(0,0,0,.1),0 4px 6px -4px rgba(0,0,0,.1)}.gradio-container{background:linear-gradient(135deg,#f8fafc 0%,#e2e8f0 100%)!important;min-height:100vh;font-family:Inter,-apple-system,BlinkMacSystemFont,sans-serif!important}.pro-card{background:var(--bg-card)!important;border:1px solid var(--border)!important;border-radius:12px!important;box-shadow:var(--shadow)!important;padding:24px!important;margin-bottom:20px!important;transition:all .3s cubic-bezier(.4,0,.2,1)!important}.pro-card:hover{box-shadow:var(--shadow-lg)!important;transform:translateY(-2px)!important}.header-section{text-align:center!important;margin-bottom:40px!important;background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;border-radius:16px!important;padding:40px 32px!important;color:#fff!important}.app-title{font-size:3rem!important;font-weight:800!important;margin-bottom:12px!important;background:linear-gradient(135deg,#fff 0%,#f1f5f9 100%)!important;-webkit-background-clip:text!important;-webkit-text-fill-color:transparent!important;background-clip:text!important}.app-subtitle{font-size:1.25rem!important;font-weight:400!important;opacity:.9!important;margin-bottom:0!important}.stats-grid{display:grid!important;grid-template-columns:repeat(auto-fit,minmax(180px,1fr))!important;gap:16px!important;margin:20px 0!important}.stat-card{background:#fff!important;border-radius:8px!important;padding:16px!important;text-align:center!important;border:1px solid var(--border)!important}.stat-value{font-size:2rem!important;font-weight:700!important;color:var(--primary)!important;margin-bottom:4px!important}.stat-label{font-size:.875rem!important;color:var(--text-secondary)!important;font-weight:500!important}.evidence-panel{background:var(--bg-secondary)!important;border:1px solid var(--border)!important;border-radius:8px!important;padding:16px!important;max-height:300px!important;overflow-y:auto!important;font-family:Monaco,Menlo,Consolas,monospace!important;font-size:.875rem!important}.evidence-item{padding:8px 12px!important;margin:4px 0!important;background:#fff!important;border-radius:6px!important;border-left:4px solid var(--primary)!important}.confidence-high{color:var(--secondary)!important;font-weight:700!important}.confidence-medium{color:var(--accent)!important;font-weight:600!important}.confidence-low{color:var(--danger)!important;font-weight:600!important}.btn-primary{background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;color:#fff!important;border:none!important;border-radius:8px!important;padding:12px 24px!important;font-weight:600!important;transition:all .3s ease!important}.btn-primary:hover{transform:translateY(-1px)!important;box-shadow:0 4px 12px rgba(99,102,241,.3)!important}"
251
-
252
  # 显示格式化辅助类
253
  class DisplayFormatter:
254
  @staticmethod
@@ -284,49 +132,170 @@ class DisplayFormatter:
284
  preview = result.get('code_preview', {})
285
  if not preview: return "<div style='text-align: center; color: #64748b;'>无代码质量数据</div>"
286
  return f"""<div style="padding: 20px;"><h4 style="margin-bottom: 16px;">代码结构指标 (通用)</h4><div class="stats-grid"><div class="stat-card"><div class="stat-value">{preview.get('functions_count',0)}</div><div class="stat-label">函数/方法</div></div><div class="stat-card"><div class="stat-value">{preview.get('classes_count',0)}</div><div class="stat-label">类/结构体</div></div><div class="stat-card"><div class="stat-value">{preview.get('imports_count',0)}</div><div class="stat-label">导入/包含</div></div><div class="stat-card"><div class="stat-value">{preview.get('code_density',0):.1%}</div><div class="stat-label">代码密度</div></div></div></div>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- # Gradio界面
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  def create_enhanced_interface():
290
  with gr.Blocks(title="Capricode Pro Master", css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")) as demo:
291
- gr.HTML("""<div class='header-section'><div class="app-title">🚀 Capricode Pro Master</div><div class="app-subtitle">精准代码视觉感知系统 (终极版)</div></div>""")
292
- with gr.Row(equal_height=False):
293
- with gr.Column(scale=1):
294
- with gr.Column(elem_classes="pro-card"):
295
- gr.Markdown("### 📝 代码输入区域")
296
- code_input = gr.Textbox(label="", placeholder="请在此处粘贴代码... 已支持20种主流语言!", lines=15, show_label=False)
297
- with gr.Row():
298
- detect_btn = gr.Button("🔍 开始分析", variant="primary", elem_classes="btn-primary")
299
- clear_btn = gr.Button("🗑️ 一键清空")
300
- with gr.Column(scale=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  with gr.Column(elem_classes="pro-card"):
302
- gr.Markdown("### 🎯 主要检测结果")
303
- with gr.Row(): detected_language, confidence_score = gr.HTML(label="识别语言"), gr.HTML(label="置信度")
304
- with gr.Row(): processing_time, total_lines = gr.Textbox(label="处理时间", interactive=False), gr.Textbox(label="代码行数", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  with gr.Column(elem_classes="pro-card"):
306
- gr.Markdown("### 📊 分统计"), (stats_display := gr.HTML(value="<div style='text-align: center; color: #64748b;'>等待分析数据...</div>"))
307
- with gr.Row():
308
- with gr.Column(scale=1):
309
- with gr.Column(elem_classes="pro-card"): gr.Markdown("### 📍 行数证据详情"), (line_evidence := gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>检测证据将在此显示</div>"))
310
- with gr.Column(scale=1):
311
- with gr.Column(elem_classes="pro-card"): gr.Markdown("### 🏗️ 代码结构分析 (通用)"), (code_preview := gr.JSON(label="结构分析", show_label=False))
312
- with gr.Column(elem_classes="pro-card"):
313
- gr.Markdown("### 📈 可视化分析")
314
- with gr.Tabs():
315
- with gr.TabItem("🔧 语言分布"): language_distribution = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>语言分布可视化将在此显示</div>")
316
- with gr.TabItem("📋 代码质量"): code_quality = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>代码质量分析将在此显示</div>")
317
- with gr.TabItem("🎯 检测详情 (JSON)"): detection_details = gr.JSON(label="详细检测数据", show_label=False)
318
- test_cases = {'python': 'def main():\n print("Hello from Python!")\n\nif __name__ == "__main__":\n main()','java': 'public class HelloWorld {\n public static void main(String[] args) {\n System.out.println("Hello, Java!");\n }\n}','javascript': 'document.addEventListener("DOMContentLoaded", () => {\n console.log("Hello, JavaScript!");\n});','cpp': '#include <iostream>\n\nint main() {\n std::cout << "Hello, C++!" << std::endl;\n return 0;\n}','csharp': 'using System;\n\nnamespace HelloWorldApp {\n class Program {\n static void Main(string[] args) {\n Console.WriteLine("Hello, C#!");\n }\n }\n}','go': 'package main\n\nimport "fmt"\n\nfunc main() {\n fmt.Println("Hello, Go!")\n}','rust': 'fn main() {\n println!("Hello, Rust!");\n}','php': '<?php\n echo "Hello, PHP!";\n?>','ruby': 'def say_hello\n puts "Hello, Ruby!"\nend\n\nsay_hello()','typescript': 'interface User {\n name: string;\n id: number;\n}\n\nconst user: User = { name: "TypeScript", id: 0 };\nconsole.log(`Hello, ${user.name}!`);','bash': '#!/bin/bash\n\n# Simple bash script\nMESSAGE="Hello, Bash!"\necho $MESSAGE','dockerfile': 'FROM ubuntu:20.04\n\nRUN apt-get update && apt-get install -y curl\n\nCMD ["echo", "Hello, Docker!"]',}
319
- with gr.Column(elem_classes="pro-card"):
320
- gr.Markdown("### 🚀 快速测试用例")
321
- gr.Examples(examples=[[v] for v in test_cases.values()], inputs=code_input, label="点击示例以快速加载")
322
 
323
- def precision_analyze(code):
 
 
 
324
  if not code or not code.strip(): return DisplayFormatter.format_display_result(precision_detector._empty_result())
325
  return DisplayFormatter.format_display_result(precision_detector.detect_with_line_info(code))
326
 
327
- outputs = [detected_language, confidence_score, processing_time, total_lines, stats_display, line_evidence, code_preview, language_distribution, code_quality, detection_details]
328
- detect_btn.click(fn=precision_analyze, inputs=[code_input], outputs=outputs)
329
- clear_btn.click(fn=DisplayFormatter.get_empty_display_state, outputs=[code_input] + outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  return demo
331
 
332
  # 启动应用
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ Capricode Pro Master - 精准代码视觉感知系统 (史诗进化版)
5
+ 集成项目分析器,支持.zip文件上传分析,完美兼容免费部署环境
 
6
  """
7
  import gradio as gr
8
  import time
 
11
  from typing import Dict, List, Tuple, Any
12
  from collections import defaultdict
13
 
14
+ # --- 新增的库 ---
15
+ import os
16
+ import zipfile
17
+ import tempfile
18
+ import shutil
19
+ import matplotlib.pyplot as plt
20
+
21
+ # ==================== 精准语言识别引擎 (保持不变) ====================
22
  @dataclass
23
  class LanguageSignature:
24
  patterns: List[Tuple[re.Pattern, float]]
 
27
  file_extensions: List[str]
28
 
29
  class PrecisionLanguageDetector:
30
+ # ... 核心引擎代码与上一版完全相同,此处折叠以节省篇幅 ...
31
+ # (在最终代码中,这部分是完整存在的)
32
  def __init__(self):
33
  self.languages = {}
34
  self._compile_precision_patterns()
 
35
  def _compile_precision_patterns(self):
36
+ self.languages = {'python': LanguageSignature(patterns=[(re.compile(r'def\s+\w+\s*\([^)]*\)\s*:'), 9.0), (re.compile(r'class\s+\w+\s*\(?[^)]*\)?\s*:'), 8.5), (re.compile(r'import\s+[\w.]+'), 6.0)], keywords={'def': 8.0, 'class': 7.0, 'import': 6.0, 'elif': 5.0, 'async': 6.0, 'await': 6.0}, exclusive_patterns=[(re.compile(r'if __name__ == "__main__":'), 20.0), (re.compile(r'f"[^"]*"'), 12.0)], file_extensions=['.py', '.pyw']), 'javascript': LanguageSignature(patterns=[(re.compile(r'function\s*.*\s*\{'), 8.0), (re.compile(r'console\.log\s*\('), 7.0), (re.compile(r'(const|let|var)\s+\w+\s*='), 8.0)], keywords={'function': 7.0, 'const': 6.0, 'let': 6.0, 'console': 5.0, 'document': 6.0, 'async': 5.0, 'await': 5.0}, exclusive_patterns=[(re.compile(r'document\.getElementById\s*\('), 15.0)], file_extensions=['.js', '.jsx', '.mjs']), 'java': LanguageSignature(patterns=[(re.compile(r'public\s+class\s+\w+'), 10.0), (re.compile(r'System\.out\.println\s*\('), 9.0), (re.compile(r'import\s+java\.'), 8.0)], keywords={'public': 8.0, 'class': 8.0, 'static': 7.0, 'void': 6.0, 'String': 7.0, 'System': 7.0, 'new': 5.0}, exclusive_patterns=[(re.compile(r'public\s+static\s+void\s+main\s*\(\s*String\s*\[\]\s*args\s*\)'), 25.0)], file_extensions=['.java']), 'cpp': LanguageSignature(patterns=[(re.compile(r'#include\s*<[a-zA-Z_]+>'), 10.0), (re.compile(r'using\s+namespace\s+std;'), 9.0), (re.compile(r'std::cout'), 8.0)], keywords={'#include': 9.0, 'iostream': 8.0, 'std': 7.0, 'cout': 7.0, 'vector': 6.0, 'int': 5.0, 'main': 6.0}, exclusive_patterns=[(re.compile(r'#include\s*<iostream>'), 18.0)], file_extensions=['.cpp', '.cxx', '.h', '.hpp']), 'html': LanguageSignature(patterns=[(re.compile(r'<\s*head\s*>'), 8.0), (re.compile(r'<\s*body\s*>'), 8.0), (re.compile(r'<\s*/\s*\w+\s*>'), 6.0)], keywords={'div': 5.0, 'p': 4.0, 'a': 4.0, 'href': 5.0, 'class': 3.0, 'id': 3.0}, exclusive_patterns=[(re.compile(r'<!DOCTYPE\s+html>', re.IGNORECASE), 100.0)], file_extensions=['.html', '.htm']), 'css': LanguageSignature(patterns=[(re.compile(r'[\w\s.#-]+\s*\{'), 8.0), (re.compile(r'[\w-]+\s*:\s*[^;]+;'), 9.0)], keywords={'color': 6.0, 'background-color': 7.0, 'margin': 5.0, 'padding': 5.0, 'font-size': 6.0, 'display': 6.0}, exclusive_patterns=[(re.compile(r'@media\s*\(.+\)\s*\{'), 15.0)], file_extensions=['.css']), 'sql': LanguageSignature(patterns=[(re.compile(r'(SELECT|CREATE|INSERT|UPDATE|DELETE|FROM|WHERE)', re.IGNORECASE), 10.0)], keywords={'SELECT': 9.0, 'FROM': 8.0, 'WHERE': 7.0, 'JOIN': 7.0, 'GROUP BY': 8.0, 'ORDER BY': 7.0, 'CREATE TABLE': 9.0}, exclusive_patterns=[(re.compile(r'CREATE\s+TABLE', re.IGNORECASE), 18.0)], file_extensions=['.sql']), 'csharp': LanguageSignature(patterns=[(re.compile(r'using\s+System;'), 10.0), (re.compile(r'namespace\s+\w+'), 9.0)], keywords={'namespace': 9.0, 'public': 7.0, 'class': 7.0, 'static': 6.0, 'void': 5.0, 'string': 7.0, 'var': 6.0}, exclusive_patterns=[(re.compile(r'static\s+void\s+Main\s*\(\s*string\s*\[\]\s*args\s*\)'), 25.0), (re.compile(r'Console\.WriteLine\s*\('), 15.0)], file_extensions=['.cs']), 'go': LanguageSignature(patterns=[(re.compile(r'package\s+main'), 10.0), (re.compile(r'import\s*\(\s*'), 8.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)], keywords={'package': 9.0, 'import': 8.0, 'func': 9.0, 'fmt': 7.0, 'Println': 6.0}, exclusive_patterns=[(re.compile(r'func\s+main\s*\(\s*\)'), 20.0), (re.compile(r':='), 12.0)], file_extensions=['.go']), 'rust': LanguageSignature(patterns=[(re.compile(r'fn\s+\w+\s*\([^)]*\)\s*\{'), 10.0), (re.compile(r'use\s+std::'), 9.0), (re.compile(r'let\s+(mut\s+)?\w+'), 8.0)], keywords={'fn': 9.0, 'let': 8.0, 'mut': 7.0, 'use': 8.0, 'struct': 7.0}, exclusive_patterns=[(re.compile(r'fn\s+main\s*\(\s*\)'), 20.0), (re.compile(r'println!\s*\('), 18.0)], file_extensions=['.rs']), 'kotlin': LanguageSignature(patterns=[(re.compile(r'fun\s+\w+\s*\([^)]*\)'), 10.0), (re.compile(r'val\s+\w+'), 8.0)], keywords={'package': 8.0, 'import': 7.0, 'fun': 9.0, 'val': 8.0, 'var': 7.0, 'println': 6.0, 'class': 7.0}, exclusive_patterns=[(re.compile(r'fun\s+main\s*\(\s*args:\s*Array<String>\s*\)'), 25.0)], file_extensions=['.kt', '.kts']), 'ruby': LanguageSignature(patterns=[(re.compile(r'def\s+\w+'), 10.0), (re.compile(r'require\s+[\'"]\w+[\'"]'), 8.0), (re.compile(r'^\s*end\s*$'), 7.0)], keywords={'def': 9.0, 'end': 8.0, 'require': 7.0, 'puts': 6.0, 'class': 7.0}, exclusive_patterns=[(re.compile(r'# frozen_string_literal: true'), 15.0), (re.compile(r':\w+'), 10.0)], file_extensions=['.rb']), 'swift': LanguageSignature(patterns=[(re.compile(r'import\s+(UIKit|Foundation)'), 10.0), (re.compile(r'func\s+\w+\s*\([^)]*\)\s*\{'), 9.0)], keywords={'import': 8.0, 'func': 9.0, 'let': 8.0, 'var': 8.0, 'class': 7.0, 'print': 6.0}, exclusive_patterns=[(re.compile(r'@IBOutlet'), 18.0), (re.compile(r'import\s+SwiftUI'), 20.0)], file_extensions=['.swift']), 'php': LanguageSignature(patterns=[(re.compile(r'function\s+\w+\s*\([^)]*\)'), 7.0), (re.compile(r'\$\w+'), 9.0), (re.compile(r'echo\s+'), 6.0)], keywords={'function': 7.0, 'echo': 6.0, 'class': 6.0, 'public': 6.0}, exclusive_patterns=[(re.compile(r'<\?php'), 100.0)], file_extensions=['.php']), 'typescript': LanguageSignature(patterns=[(re.compile(r'interface\s+\w+'), 10.0), (re.compile(r':\s*(string|number|boolean|any)'), 9.0)], keywords={'interface': 9.0, 'type': 8.0, 'public': 7.0, 'private': 7.0, 'enum': 7.0}, exclusive_patterns=[(re.compile(r'public\s+constructor\s*\('), 18.0)], file_extensions=['.ts', '.tsx']), 'r': LanguageSignature(patterns=[(re.compile(r'library\s*\(\w+\)'), 10.0), (re.compile(r'\w+\s*<-\s*'), 9.0)], keywords={'library': 9.0, 'function': 7.0, 'if': 5.0, 'else': 5.0}, exclusive_patterns=[(re.compile(r'install\.packages\s*\('), 20.0), (re.compile(r'<-'), 15.0)], file_extensions=['.r']), 'bash': LanguageSignature(patterns=[(re.compile(r'if\s+\[.*\]'), 8.0), (re.compile(r'^\s*fi\s*$'), 7.0), (re.compile(r'echo\s+'), 6.0)], keywords={'if': 7.0, 'then': 7.0, 'fi': 7.0, 'for': 6.0, 'do': 6.0, 'done': 6.0, 'echo': 6.0}, exclusive_patterns=[(re.compile(r'#!/bin?/(ba|z|k)?sh'), 100.0)], file_extensions=['.sh']), 'dockerfile': LanguageSignature(patterns=[(re.compile(r'^(FROM|RUN|CMD|COPY|ADD|WORKDIR|EXPOSE)\s+', re.MULTILINE), 10.0)], keywords={'FROM': 10.0, 'RUN': 9.0, 'CMD': 8.0, 'COPY': 8.0, 'WORKDIR': 7.0, 'EXPOSE': 7.0}, exclusive_patterns=[(re.compile(r'^FROM\s+'), 20.0)], file_extensions=['Dockerfile']), 'yaml': LanguageSignature(patterns=[(re.compile(r'^\s*[\w-]+\s*:\s*.*'), 9.0), (re.compile(r'^\s*-\s+'), 8.0)], keywords={'version': 6.0, 'services': 7.0, 'steps': 7.0, 'jobs': 7.0, 'name': 5.0, 'image': 6.0}, exclusive_patterns=[], file_extensions=['.yml', '.yaml']), 'xml': LanguageSignature(patterns=[(re.compile(r'<(\w+)\s*.*>.*</\1>'), 9.0), (re.compile(r'<\?xml[^>]*\?>'), 10.0)], keywords={}, exclusive_patterns=[(re.compile(r'<\?xml\s+version="1.0"'), 100.0)], file_extensions=['.xml'])}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def detect_with_line_info(self, code: str) -> Dict[str, Any]:
38
  start_time = time.time()
39
  if not code or not code.strip(): return self._empty_result()
40
+ code = code.strip(); lines = code.split('\n'); scores = defaultdict(float); line_evidence = {lang: [] for lang in self.languages}; exclusive_matches = defaultdict(float)
 
 
 
 
 
 
41
  for lang, signature in self.languages.items():
42
  for pattern, weight in signature.exclusive_patterns:
43
  for i, line in enumerate(lines, 1):
44
+ if pattern.search(line): exclusive_matches[lang] += weight; line_evidence[lang].append(f"第{i}行: {line.strip()[:50]}...")
 
 
 
 
45
  if exclusive_matches:
46
  best_lang_exclusive = max(exclusive_matches, key=exclusive_matches.get)
47
+ if exclusive_matches[best_lang_exclusive] >= 100.0: return self._build_absolute_result(best_lang_exclusive, lines, start_time)
 
 
 
48
  for lang, signature in self.languages.items():
49
+ score = exclusive_matches.get(lang, 0); lang_evidence = []
 
50
  for pattern, weight in signature.patterns:
51
  for i, line in enumerate(lines, 1):
52
  if pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (正则): {line.strip()[:50]}...")
 
54
  keyword_pattern = re.compile(r'\b' + re.escape(keyword) + r'\b', re.IGNORECASE if lang in ['html', 'sql', 'dockerfile'] else 0)
55
  for i, line in enumerate(lines, 1):
56
  if keyword_pattern.search(line): score += weight; lang_evidence.append(f"第{i}行 (关键词): '{keyword}'")
57
+ if score > 0: scores[lang] = score; line_evidence[lang].extend(list(dict.fromkeys(lang_evidence)))
 
 
 
 
58
  if not scores: return self._unknown_result(lines, start_time)
59
+ best_lang = max(scores, key=scores.get); best_score = scores[best_lang]; total_score = sum(scores.values()); confidence = best_score / total_score if total_score > 0 else 0.0; processing_time = (time.time() - start_time) * 1000
 
 
 
 
 
 
60
  return {'language': best_lang, 'confidence': round(min(confidence * 1.2, 0.999), 3), 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': line_evidence[best_lang][:15], 'processing_time_ms': round(processing_time, 2), 'code_preview': self._get_code_preview(lines), 'all_scores': {k: round(v, 2) for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(scores, lines, best_lang)}
 
61
  def _get_code_preview(self, lines: List[str]) -> Dict[str, Any]:
62
  if not lines: return {}
63
  functions, classes, imports, comments = [], [], [], []
 
68
  elif re.match(r'(import|from|#include|using|require|use|library|package)\s+', line_clean): imports.append(f"第{i}行: {line_clean}")
69
  elif re.match(r'(#|//|--|/\*|<!--)', line_clean): comments.append(f"第{i}行: {line_clean}")
70
  return {'total_lines': len(lines), 'functions_count': len(functions), 'classes_count': len(classes), 'imports_count': len(imports), 'comments_count': len(comments), 'code_density': round((len(lines) - len(comments)) / len(lines), 3) if lines else 0, 'sample_functions': functions[:3], 'sample_classes': classes[:2], 'sample_imports': imports[:3]}
 
 
71
  def _get_detection_stats(self, scores: Dict[str, float], lines: List[str], primary_language: str) -> Dict[str, Any]:
72
  if not scores: return {}
73
+ total_score = sum(scores.values()); max_score = max(scores.values(), default=0); quality = '确定性' if max_score >= 100 else '极高' if max_score > 50 else '高' if max_score > 25 else '中'
 
 
74
  return {'languages_detected': len(scores), 'primary_language': primary_language, 'score_distribution': {lang: round(score/total_score*100, 1) for lang, score in scores.items() if score > 0}, 'total_score': round(total_score, 2), 'detection_quality': quality}
 
75
  def _empty_result(self) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '代码为空', 'total_lines': 0, 'line_evidence': [], 'processing_time_ms': 0.1, 'code_preview': {}, 'detection_stats': {}, 'all_scores': {}}
 
76
  def _unknown_result(self, lines: List[str], start_time: float) -> Dict[str, Any]: return {'language': 'unknown', 'confidence': 0.0, 'message': '无法识别编程语言', 'total_lines': len(lines), 'line_evidence': [], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_stats': {}, 'all_scores': {}}
 
77
  def _build_absolute_result(self, lang: str, lines: List[str], start_time: float) -> Dict[str, Any]:
78
+ all_scores = defaultdict(float); line_evidence = defaultdict(list)
 
79
  for current_lang, signature in self.languages.items():
80
  score = 0
81
  for pattern, weight in signature.exclusive_patterns + signature.patterns:
 
90
  score += weight
91
  if current_lang == lang: line_evidence[lang].append(f"第{i}行 (关键词): '{keyword}'")
92
  if score > 0: all_scores[current_lang] = score
 
93
  best_score = all_scores[lang]
94
  return {'language': lang, 'confidence': 0.999, 'score': round(best_score, 2), 'total_lines': len(lines), 'line_evidence': list(dict.fromkeys(line_evidence[lang]))[:15], 'processing_time_ms': round((time.time() - start_time) * 1000, 2), 'code_preview': self._get_code_preview(lines), 'detection_method': 'absolute_exclusive_pattern', 'all_scores': {k: round(v, 2) for k, v in sorted(all_scores.items(), key=lambda item: item[1], reverse=True) if v > 0}, 'detection_stats': self._get_detection_stats(all_scores, lines, lang)}
95
 
96
  # 全局实例
97
  precision_detector = PrecisionLanguageDetector()
 
98
  # CSS样式
99
  custom_css = ":root{--primary:#6366f1;--primary-dark:#4f46e5;--secondary:#10b981;--accent:#f59e0b;--danger:#ef4444;--bg-primary:#fff;--bg-secondary:#f8fafc;--bg-card:#fff;--border:#e2e8f0;--text-primary:#1e293b;--text-secondary:#64748b;--text-muted:#94a3b8;--shadow:0 1px 3px 0 rgba(0,0,0,.1),0 1px 2px -1px rgba(0,0,0,.1);--shadow-lg:0 10px 15px -3px rgba(0,0,0,.1),0 4px 6px -4px rgba(0,0,0,.1)}.gradio-container{background:linear-gradient(135deg,#f8fafc 0%,#e2e8f0 100%)!important;min-height:100vh;font-family:Inter,-apple-system,BlinkMacSystemFont,sans-serif!important}.pro-card{background:var(--bg-card)!important;border:1px solid var(--border)!important;border-radius:12px!important;box-shadow:var(--shadow)!important;padding:24px!important;margin-bottom:20px!important;transition:all .3s cubic-bezier(.4,0,.2,1)!important}.pro-card:hover{box-shadow:var(--shadow-lg)!important;transform:translateY(-2px)!important}.header-section{text-align:center!important;margin-bottom:40px!important;background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;border-radius:16px!important;padding:40px 32px!important;color:#fff!important}.app-title{font-size:3rem!important;font-weight:800!important;margin-bottom:12px!important;background:linear-gradient(135deg,#fff 0%,#f1f5f9 100%)!important;-webkit-background-clip:text!important;-webkit-text-fill-color:transparent!important;background-clip:text!important}.app-subtitle{font-size:1.25rem!important;font-weight:400!important;opacity:.9!important;margin-bottom:0!important}.stats-grid{display:grid!important;grid-template-columns:repeat(auto-fit,minmax(180px,1fr))!important;gap:16px!important;margin:20px 0!important}.stat-card{background:#fff!important;border-radius:8px!important;padding:16px!important;text-align:center!important;border:1px solid var(--border)!important}.stat-value{font-size:2rem!important;font-weight:700!important;color:var(--primary)!important;margin-bottom:4px!important}.stat-label{font-size:.875rem!important;color:var(--text-secondary)!important;font-weight:500!important}.evidence-panel{background:var(--bg-secondary)!important;border:1px solid var(--border)!important;border-radius:8px!important;padding:16px!important;max-height:300px!important;overflow-y:auto!important;font-family:Monaco,Menlo,Consolas,monospace!important;font-size:.875rem!important}.evidence-item{padding:8px 12px!important;margin:4px 0!important;background:#fff!important;border-radius:6px!important;border-left:4px solid var(--primary)!important}.confidence-high{color:var(--secondary)!important;font-weight:700!important}.confidence-medium{color:var(--accent)!important;font-weight:600!important}.confidence-low{color:var(--danger)!important;font-weight:600!important}.btn-primary{background:linear-gradient(135deg,var(--primary) 0%,var(--primary-dark) 100%)!important;color:#fff!important;border:none!important;border-radius:8px!important;padding:12px 24px!important;font-weight:600!important;transition:all .3s ease!important}.btn-primary:hover{transform:translateY(-1px)!important;box-shadow:0 4px 12px rgba(99,102,241,.3)!important}"
 
100
  # 显示格式化辅助类
101
  class DisplayFormatter:
102
  @staticmethod
 
132
  preview = result.get('code_preview', {})
133
  if not preview: return "<div style='text-align: center; color: #64748b;'>无代码质量数据</div>"
134
  return f"""<div style="padding: 20px;"><h4 style="margin-bottom: 16px;">代码结构指标 (通用)</h4><div class="stats-grid"><div class="stat-card"><div class="stat-value">{preview.get('functions_count',0)}</div><div class="stat-label">函数/方法</div></div><div class="stat-card"><div class="stat-value">{preview.get('classes_count',0)}</div><div class="stat-label">类/结构体</div></div><div class="stat-card"><div class="stat-value">{preview.get('imports_count',0)}</div><div class="stat-label">导入/包含</div></div><div class="stat-card"><div class="stat-value">{preview.get('code_density',0):.1%}</div><div class="stat-label">代码密度</div></div></div></div>"""
135
+ # ★★★ 新增:项目分析结果的格式化函数 ★★★
136
+ @staticmethod
137
+ def format_project_tree(tree_string):
138
+ if not tree_string: return "项目为空或无法解析。"
139
+ return f"```\n{tree_string}\n```"
140
+ @staticmethod
141
+ def format_language_distribution_plot(lang_stats):
142
+ if not lang_stats: return None
143
+ # 按文件数量排序
144
+ sorted_stats = sorted(lang_stats.items(), key=lambda item: item[1]['count'], reverse=True)
145
+ langs = [item[0].upper() for item in sorted_stats]
146
+ counts = [item[1]['count'] for item in sorted_stats]
147
+
148
+ fig, ax = plt.subplots(figsize=(10, 8))
149
+ bars = ax.barh(langs, counts, color='#6366f1')
150
+ ax.invert_yaxis()
151
+ ax.set_xlabel('文件数量 (File Count)')
152
+ ax.set_title('项目语言分布 (Project Language Distribution)')
153
+ ax.bar_label(bars, padding=3)
154
+ plt.tight_layout()
155
+ return fig
156
+
157
+ # ★★★ 新增:项目分析核心逻辑 ★★★
158
+ def analyze_project_zip(zip_file):
159
+ if zip_file is None:
160
+ return ("请先上传一个 .zip 文件。", None, {})
161
+
162
+ # 创建一个安全的临时目录
163
+ temp_dir = tempfile.mkdtemp()
164
+
165
+ try:
166
+ # 确保上传的是zip文件
167
+ if not zipfile.is_zipfile(zip_file.name):
168
+ return ("上传的不是一个有效的 .zip 文件。", None, {})
169
+
170
+ # 解压文件到临时目录
171
+ with zipfile.ZipFile(zip_file.name, 'r') as zf:
172
+ zf.extractall(temp_dir)
173
+
174
+ project_tree_str = ""
175
+ language_stats = defaultdict(lambda: {'count': 0, 'lines': 0})
176
+ total_files = 0
177
+
178
+ # 遍历解压后的目录和文件
179
+ for root, _, files in os.walk(temp_dir):
180
+ # 计算当前目录深度用于缩进
181
+ level = root.replace(temp_dir, '').count(os.sep)
182
+ indent = ' ' * 4 * level
183
+ project_tree_str += f"{indent}📂 {os.path.basename(root)}/\n"
184
+
185
+ sub_indent = ' ' * 4 * (level + 1)
186
+ for filename in files:
187
+ total_files += 1
188
+ file_path = os.path.join(root, filename)
189
+ try:
190
+ # 读取文件内容,忽略无法解码的二进制文件
191
+ with open(file_path, 'r', encoding='utf-8') as f:
192
+ content = f.read()
193
+ except Exception:
194
+ # 如果是二进制文件等,则标记为 "Binary"
195
+ project_tree_str += f"{sub_indent}📄 {filename} [Binary or Unreadable]\n"
196
+ continue
197
 
198
+ # 调用我们的核心引擎进行分析
199
+ result = precision_detector.detect_with_line_info(content)
200
+ lang = result['language']
201
+
202
+ project_tree_str += f"{sub_indent}📄 {filename} -> [{lang.upper()}]\n"
203
+
204
+ if lang != 'unknown':
205
+ language_stats[lang]['count'] += 1
206
+ language_stats[lang]['lines'] += result['total_lines']
207
+
208
+ if total_files == 0:
209
+ return ("这是一个空的 .zip 文件。", None, {})
210
+
211
+ return project_tree_str, language_stats, language_stats # 返回三次以匹配输出
212
+
213
+ except Exception as e:
214
+ return (f"分析过程中出现错误: {e}", None, {})
215
+ finally:
216
+ # ★★★ 关键:无论成功失败,都必须清理临时目录 ★★★
217
+ shutil.rmtree(temp_dir)
218
+
219
+ # Gradio界面 (已升级为双模式)
220
  def create_enhanced_interface():
221
  with gr.Blocks(title="Capricode Pro Master", css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")) as demo:
222
+ gr.HTML("""<div class='header-section'><div class="app-title">🚀 Capricode Pro Master</div><div class="app-subtitle">精准代码视觉感知系统 (双模式:单文件 & 项目分析)</div></div>""")
223
+
224
+ with gr.Tabs():
225
+ # --- 模式一:单文件分析器 ---
226
+ with gr.TabItem("单文件分析器 (Single File Analyzer)"):
227
+ with gr.Row(equal_height=False):
228
+ with gr.Column(scale=1):
229
+ with gr.Column(elem_classes="pro-card"):
230
+ gr.Markdown("### 📝 代码输入区域")
231
+ code_input = gr.Textbox(label="", placeholder="请在此处粘贴代码...", lines=15, show_label=False)
232
+ with gr.Row():
233
+ detect_btn = gr.Button("🔍 分析代码片段", variant="primary", elem_classes="btn-primary")
234
+ clear_btn = gr.Button("🗑️ 清空")
235
+ with gr.Column(scale=1):
236
+ with gr.Column(elem_classes="pro-card"):
237
+ gr.Markdown("### 🎯 主要检测结果")
238
+ with gr.Row(): detected_language, confidence_score = gr.HTML(label="识别语言"), gr.HTML(label="置信度")
239
+ with gr.Row(): processing_time, total_lines = gr.Textbox(label="处理时间", interactive=False), gr.Textbox(label="代码行数", interactive=False)
240
+ with gr.Column(elem_classes="pro-card"):
241
+ gr.Markdown("### 📊 分析统计"), (stats_display := gr.HTML(value="<div style='text-align: center; color: #64748b;'>等待分析数据...</div>"))
242
+ with gr.Row():
243
+ with gr.Column(scale=1):
244
+ with gr.Column(elem_classes="pro-card"): gr.Markdown("### 📍 行数证据详情"), (line_evidence := gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>检测证据将在此显示</div>"))
245
+ with gr.Column(scale=1):
246
+ with gr.Column(elem_classes="pro-card"): gr.Markdown("### 🏗️ 代码结构分析 (通用)"), (code_preview := gr.JSON(label="结构分析", show_label=False))
247
  with gr.Column(elem_classes="pro-card"):
248
+ gr.Markdown("### 📈 可视化分析")
249
+ with gr.Tabs():
250
+ with gr.TabItem("🔧 语言分布"): language_distribution = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>语言分布可视化将在此显示</div>")
251
+ with gr.TabItem("📋 代码质量"): code_quality = gr.HTML(value="<div style='text-align: center; color: #64748b; padding: 40px;'>代码质量分析将在此显示</div>")
252
+ with gr.TabItem("🎯 检测详情 (JSON)"): detection_details = gr.JSON(label="详细检测数据", show_label=False)
253
+
254
+ # --- 模式二:项目分析器 ---
255
+ with gr.TabItem("项目分析器 (Project Analyzer)"):
256
+ with gr.Row():
257
+ with gr.Column(scale=1):
258
+ with gr.Column(elem_classes="pro-card"):
259
+ gr.Markdown("### 📁 上传项目压缩包")
260
+ zip_input = gr.File(label="请上传项目的 .zip 文件", file_types=['.zip'], type="file")
261
+ project_analyze_btn = gr.Button("🚀 开始扫描整个项目", variant="primary", elem_classes="btn-primary")
262
+ with gr.Column(scale=2):
263
+ with gr.Column(elem_classes="pro-card"):
264
+ gr.Markdown("### 🌳 项目结构 & 语言识别")
265
+ project_tree_output = gr.Markdown("项目的文件结构树将在这里显示...", label="Project Structure")
266
+
267
  with gr.Column(elem_classes="pro-card"):
268
+ gr.Markdown("### 📊 项目语言统计")
269
+ with gr.Tabs():
270
+ with gr.TabItem("📈 按文件数量分布 (Bar Chart)"):
271
+ lang_dist_plot = gr.Plot(label="Language Distribution (by file count)")
272
+ with gr.TabItem("📋 原始统计数据 (JSON)"):
273
+ project_raw_json_output = gr.JSON(label="Raw Analysis Data")
 
 
 
 
 
 
 
 
 
 
274
 
275
+ # --- 回调函数绑定 ---
276
+
277
+ # 单文件分析器的回调
278
+ def single_file_precision_analyze(code):
279
  if not code or not code.strip(): return DisplayFormatter.format_display_result(precision_detector._empty_result())
280
  return DisplayFormatter.format_display_result(precision_detector.detect_with_line_info(code))
281
 
282
+ single_file_outputs = [detected_language, confidence_score, processing_time, total_lines, stats_display, line_evidence, code_preview, language_distribution, code_quality, detection_details]
283
+ detect_btn.click(fn=single_file_precision_analyze, inputs=[code_input], outputs=single_file_outputs)
284
+ clear_btn.click(fn=DisplayFormatter.get_empty_display_state, outputs=[code_input] + single_file_outputs)
285
+
286
+ # 项目分析器的回调
287
+ def run_project_analysis_and_format(zip_file):
288
+ tree_str, stats, raw_stats = analyze_project_zip(zip_file)
289
+ formatted_tree = DisplayFormatter.format_project_tree(tree_str)
290
+ plot = DisplayFormatter.format_language_distribution_plot(stats)
291
+ return formatted_tree, plot, raw_stats
292
+
293
+ project_analyze_btn.click(
294
+ fn=run_project_analysis_and_format,
295
+ inputs=[zip_input],
296
+ outputs=[project_tree_output, lang_dist_plot, project_raw_json_output]
297
+ )
298
+
299
  return demo
300
 
301
  # 启动应用