pangxiang commited on
Commit
d8bb2ca
·
verified ·
1 Parent(s): 3ae6216

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -219
app.py CHANGED
@@ -1,237 +1,265 @@
1
- import re
2
- import math
3
- from dataclasses import dataclass
4
- from typing import Dict, List, Tuple
5
 
6
- @dataclass
7
- class HeuristicRule:
8
- pattern: str
9
- weight: float
10
- description: str
 
 
 
 
 
11
 
12
- class MultiLanguageDetector:
13
- """轻量级多语言代码识别器"""
14
-
15
- def __init__(self):
16
- # 定义12种编程语言的特征规则
17
- self.heuristics: Dict[str, List[HeuristicRule]] = {
18
- "html": [
19
- HeuristicRule(r"<!DOCTYPE html>", 5.0, "DOCTYPE声明"),
20
- HeuristicRule(r"<html\b", 4.0, "html标签"),
21
- HeuristicRule(r"</html>", 3.0, "html闭合标签"),
22
- HeuristicRule(r"<(head|body|div|span|p|h1|h2|ul|li|title|meta)>", 2.0, "HTML标签"),
23
- HeuristicRule(r"</\w+>", 1.5, "闭合标签"),
24
- HeuristicRule(r"<\w+\s+[^>]*>", 1.2, "带属性标签"),
25
- ],
26
- "css": [
27
- HeuristicRule(r"\.\w+\s*\{", 4.0, "类选择器"),
28
- HeuristicRule(r"#\w+\s*\{", 3.5, "ID选择器"),
29
- HeuristicRule(r"\w+\s*\{", 2.5, "标签选择器"),
30
- HeuristicRule(r"[\w-]+\s*:\s*[^;]+;", 2.0, "属性声明"),
31
- HeuristicRule(r"@media|@keyframes|@import", 3.0, "CSS规则"),
32
- ],
33
- "javascript": [
34
- HeuristicRule(r"\bfunction\s+\w+\s*\(", 4.0, "函数定义"),
35
- HeuristicRule(r"\b(const|let|var)\s+\w+\s*=", 3.0, "变量声明"),
36
- HeuristicRule(r"console\.(log|error|warn)", 2.5, "控制台输出"),
37
- HeuristicRule(r"document\.(getElementById|querySelector)", 3.0, "DOM操作"),
38
- HeuristicRule(r"addEventListener\s*\(", 2.5, "事件监听"),
39
- HeuristicRule(r"=>\s*{?", 2.0, "箭头函数"),
40
- ],
41
- "python": [
42
- HeuristicRule(r"\bdef\s+\w+\s*\(", 4.0, "函数定义"),
43
- HeuristicRule(r"\bclass\s+\w+", 3.5, "类定义"),
44
- HeuristicRule(r"\bimport\s+\w+", 3.0, "导入语句"),
45
- HeuristicRule(r"if __name__ == ['\"]__main__['\"]", 4.5, "主程序入口"),
46
- HeuristicRule(r"print\s*\([^)]+\)", 2.5, "打印语句"),
47
- HeuristicRule(r":\s*(#.*)?$", 2.0, "冒号语法"),
48
- ],
49
- "java": [
50
- HeuristicRule(r"\bpublic\s+class\b", 5.0, "公共类"),
51
- HeuristicRule(r"\bstatic\s+void\s+main\s*\(", 4.5, "主方法"),
52
- HeuristicRule(r"System\.out\.(print|println)", 3.0, "输出语句"),
53
- HeuristicRule(r"\bimport\s+java\.", 3.5, "Java导入"),
54
- HeuristicRule(r"@Override|@Deprecated", 2.5, "Java注解"),
55
- ],
56
- "cpp": [
57
- HeuristicRule(r"#include\s*<[^>]+>", 4.5, "头文件包含"),
58
- HeuristicRule(r"using\s+namespace\s+std", 3.5, "命名空间"),
59
- HeuristicRule(r"std::", 3.0, "标准库"),
60
- HeuristicRule(r"cout\s*<<", 2.5, "输出流"),
61
- HeuristicRule(r"int\s+main\s*\(", 4.0, "主函数"),
62
- ],
63
- "c": [
64
- HeuristicRule(r"#include\s*<[^>]+>", 4.0, "头文件包含"),
65
- HeuristicRule(r"int\s+main\s*\(", 4.5, "主函数"),
66
- HeuristicRule(r"printf\s*\(", 3.0, "格式化输出"),
67
- HeuristicRule(r"scanf\s*\(", 2.5, "格式化输入"),
68
- HeuristicRule(r"#define\s+\w+", 3.0, "宏定义"),
69
- ],
70
- "php": [
71
- HeuristicRule(r"<\?php", 5.0, "PHP起始标签"),
72
- HeuristicRule(r"\$\w+\s*=", 3.0, "变量赋值"),
73
- HeuristicRule(r"echo\s+[^;]+;", 2.5, "输出语句"),
74
- HeuristicRule(r"function\s+\w+\s*\(", 3.5, "函数定义"),
75
- HeuristicRule(r"->\w+\s*\(", 2.5, "方法调用"),
76
- ],
77
- "sql": [
78
- HeuristicRule(r"\bSELECT\b.*\bFROM\b", 4.0, "SELECT查询"),
79
- HeuristicRule(r"\bINSERT\s+INTO\b", 3.5, "插入语句"),
80
- HeuristicRule(r"\bUPDATE\b.*\bSET\b", 3.5, "更新语句"),
81
- HeuristicRule(r"\bCREATE\s+TABLE\b", 3.0, "创建表"),
82
- HeuristicRule(r"\bWHERE\b", 2.5, "WHERE条件"),
83
- ],
84
- "ruby": [
85
- HeuristicRule(r"def\s+\w+", 4.0, "方法定义"),
86
- HeuristicRule(r"class\s+\w+", 3.5, "类定义"),
87
- HeuristicRule(r"puts\s+", 2.5, "输出语句"),
88
- HeuristicRule(r"do\s*\|[^|]+\|", 2.0, "代码块"),
89
- HeuristicRule(r"@\w+", 2.0, "实例变量"),
90
- ],
91
- "swift": [
92
- HeuristicRule(r"func\s+\w+\s*\([^)]*\)", 4.0, "函数定义"),
93
- HeuristicRule(r"var\s+\w+", 3.0, "变量声明"),
94
- HeuristicRule(r"let\s+\w+", 3.0, "常量声明"),
95
- HeuristicRule(r"print\s*\([^)]+\)", 2.5, "打印语句"),
96
- HeuristicRule(r"import\s+\w+", 2.5, "导入语句"),
97
- ],
98
- "typescript": [
99
- HeuristicRule(r"interface\s+\w+", 4.0, "接口定义"),
100
- HeuristicRule(r"type\s+\w+", 3.5, "类型定义"),
101
- HeuristicRule(r":\s*\w+[^=];?", 3.0, "类型注解"),
102
- HeuristicRule(r"<[^>]+>", 2.5, "泛型"),
103
- HeuristicRule(r"const\s+\w+:\s*\w+", 3.0, "类型变量"),
104
- ],
105
- "go": [
106
- HeuristicRule(r"func\s+\w+\s*\([^)]*\)", 4.0, "函数定义"),
107
- HeuristicRule(r"package\s+\w+", 4.5, "包声明"),
108
- HeuristicRule(r"fmt\.Print", 3.0, "格式化输出"),
109
- HeuristicRule(r":=\s*[^;]+", 3.0, "短变量声明"),
110
- HeuristicRule(r"go\s+func\s*\([^)]*\)", 3.5, "Go协程"),
111
- ],
112
- "rust": [
113
- HeuristicRule(r"fn\s+\w+\s*\([^)]*\)", 4.0, "函数定义"),
114
- HeuristicRule(r"let\s+(mut\s+)?\w+", 3.0, "变量声明"),
115
- HeuristicRule(r"println!\s*!", 3.0, "宏输出"),
116
- HeuristicRule(r"->\s*\w+", 2.5, "返回类型"),
117
- HeuristicRule(r"impl\s+\w+", 3.0, "实现块"),
118
- ],
119
- "kotlin": [
120
- HeuristicRule(r"fun\s+\w+\s*\([^)]*\)", 4.0, "函数定义"),
121
- HeuristicRule(r"val\s+\w+", 3.0, "不可变变量"),
122
- HeuristicRule(r"var\s+\w+", 3.0, "可变变量"),
123
- HeuristicRule(r"println\s*\([^)]+\)", 2.5, "输出语句"),
124
- HeuristicRule(r":\s*\w+", 2.5, "类型声明"),
125
- ]
126
- }
127
 
128
- def detect_language(self, code: str) -> Dict[str, any]:
129
- """
130
- 识别代码语言类型
131
- 返回包含语言类型、置信度和详细分析的字典
132
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  if not code or not code.strip():
134
  return {
135
- "language": "unknown",
136
  "confidence": 0.0,
137
- "message": "代码为空"
138
- }
139
 
140
- scores: Dict[str, float] = {}
141
- explanations: Dict[str, List[str]] = {}
 
 
142
 
143
- # 计算每种语言的得分
144
- for language, rules in self.heuristics.items():
145
- total_score = 0.0
146
- explanations[language] = []
147
-
148
- for rule in rules:
149
- matches = re.findall(rule.pattern, code, flags=re.IGNORECASE | re.MULTILINE)
150
- if matches:
151
- feature_score = len(matches) * rule.weight
152
- total_score += feature_score
153
- explanations[language].append(
154
- f"{rule.description}({len(matches)}次,+{feature_score:.1f})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  )
156
 
157
- scores[language] = total_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- # 处理混合语言情况(如HTML+CSS+JS)
160
- mixed_languages = self._detect_mixed_languages(code, scores)
 
 
 
 
 
 
 
 
 
 
161
 
162
- if mixed_languages:
163
- return mixed_languages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
- # 找出得分最高的语言
166
- if not scores or max(scores.values()) == 0:
167
- return {
168
- "language": "text",
169
- "confidence": 0.0,
170
- "message": "未识别到编程语言特征,可能是纯文本"
171
- }
172
-
173
- best_language = max(scores.items(), key=lambda item: item[1])[0]
174
- best_score = scores[best_language]
175
- total_score_sum = sum(scores.values())
176
-
177
- # 计算置信度
178
- confidence = best_score / total_score_sum if total_score_sum > 0 else 0.0
179
-
180
- return {
181
- "language": best_language,
182
- "confidence": min(confidence, 0.99),
183
- "score": best_score,
184
- "all_scores": {lang: round(score, 2) for lang, score in scores.items() if score > 0},
185
- "explanation": explanations.get(best_language, []),
186
- "top_features": explanations.get(best_language, [])[:3] # 只显示前3个特征
187
- }
188
 
189
- def _detect_mixed_languages(self, code: str, scores: Dict[str, float]) -> Dict[str, any]:
190
- """检测混合语言情况"""
191
- # HTML + CSS + JavaScript 混合
192
- html_score = scores.get('html', 0)
193
- css_score = scores.get('css', 0)
194
- js_score = scores.get('javascript', 0)
195
-
196
- if html_score > 10 and (css_score > 5 or js_score > 5):
197
- return {
198
- "language": "html",
199
- "confidence": 0.85,
200
- "is_mixed": True,
201
- "mixed_with": ["css", "javascript"],
202
- "message": "检测到HTML与CSS/JavaScript混合代码",
203
- "primary_language": "html",
204
- "embedded_languages": ["css", "javascript"] if css_score > 5 or js_score > 5 else []
205
- }
206
-
207
- return None
208
-
209
- # 创建全局检测器实例
210
- language_detector = MultiLanguageDetector()
211
 
212
- # 简化接口函数
213
- def detect_code_language(code: str) -> Dict[str, any]:
214
- """
215
- 简化的语言检测接口
216
- 便于在前端系统中调用
217
- """
218
- return language_detector.detect_language(code)
219
-
220
- # 测试函数
221
- def test_detection():
222
- """测试各种语言的检测"""
223
- test_cases = [
224
- ("<html><body>Hello</body></html>", "HTML测试"),
225
- (".class { color: red; }", "CSS测试"),
226
- ("function test() { console.log('hello'); }", "JavaScript测试"),
227
- ("def hello(): print('world')", "Python测试"),
228
- ("public class Main { public static void main(String[] args) {} }", "Java测试"),
229
- ]
230
-
231
- for code, desc in test_cases:
232
- result = detect_code_language(code)
233
- print(f"{desc}: {result['language']} (置信度: {result['confidence']:.2f})")
234
 
235
  if __name__ == "__main__":
236
- test_detection()
 
 
 
 
 
 
 
 
237
 
 
1
+ import gradio as gr
2
+ import time
3
+ from language_detector import detect_language_ultra_fast
 
4
 
5
+ # 自定义CSS - 极致性能优化
6
+ custom_css = """
7
+ :root {
8
+ --primary: #6366f1;
9
+ --primary-dark: #4f46e5;
10
+ --secondary: #10b981;
11
+ --accent: #f59e0b;
12
+ --dark: #1e293b;
13
+ --darker: #0f172a;
14
+ }
15
 
16
+ .gradio-container {
17
+ background: linear-gradient(135deg, var(--darker) 0%, var(--dark) 100%) !important;
18
+ font-family: 'Segoe UI', system-ui, sans-serif !important;
19
+ }
20
+
21
+ .performance-header {
22
+ background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%) !important;
23
+ padding: 2rem !important;
24
+ border-radius: 12px !important;
25
+ margin-bottom: 1.5rem !important;
26
+ box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important;
27
+ }
28
+
29
+ .performance-header h1 {
30
+ color: white !important;
31
+ font-weight: 700 !important;
32
+ font-size: 2.2rem !important;
33
+ margin-bottom: 0.5rem !important;
34
+ text-shadow: 0 2px 4px rgba(0,0,0,0.3) !important;
35
+ }
36
+
37
+ .performance-card {
38
+ background: rgba(255,255,255,0.05) !important;
39
+ padding: 1.5rem !important;
40
+ border-radius: 12px !important;
41
+ border: 1px solid rgba(255,255,255,0.1) !important;
42
+ margin-bottom: 1rem !important;
43
+ }
44
+
45
+ .performance-metric {
46
+ background: rgba(255,255,255,0.08) !important;
47
+ padding: 1rem !important;
48
+ border-radius: 8px !important;
49
+ border-left: 4px solid var(--primary) !important;
50
+ margin: 0.5rem 0 !important;
51
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ .language-badge {
54
+ display: inline-block;
55
+ padding: 0.25rem 0.75rem;
56
+ background: var(--primary);
57
+ color: white;
58
+ border-radius: 20px;
59
+ font-size: 0.8rem;
60
+ font-weight: 600;
61
+ margin: 0.1rem;
62
+ }
63
+
64
+ .confidence-high { background: #10b981 !important; }
65
+ .confidence-medium { background: #f59e0b !important; }
66
+ .confidence-low { background: #ef4444 !important; }
67
+ """
68
+
69
+ def create_detection_interface():
70
+ """创建检测界面"""
71
+
72
+ def analyze_code_performance(code):
73
+ """高性能代码分析"""
74
  if not code or not code.strip():
75
  return {
76
+ "language": "unknown",
77
  "confidence": 0.0,
78
+ "message": "请输入代码"
79
+ }, "❌ 输入为空"
80
 
81
+ # 极速检测
82
+ start_time = time.time()
83
+ result = detect_language_ultra_fast(code)
84
+ detection_time = time.time() - start_time
85
 
86
+ # 生成报告
87
+ report = generate_performance_report(result, detection_time)
88
+
89
+ return result, report
90
+
91
+ def generate_performance_report(result, detection_time):
92
+ """生成性能报告"""
93
+ lang = result.get('language', 'unknown')
94
+ confidence = result.get('confidence', 0)
95
+ processing_time = result.get('processing_time_ms', detection_time * 1000)
96
+
97
+ # 置信度徽章
98
+ if confidence > 0.8:
99
+ conf_badge = "🟢 高置信度"
100
+ elif confidence > 0.5:
101
+ conf_badge = "🟡 中置信度"
102
+ else:
103
+ conf_badge = "🔴 低置信度"
104
+
105
+ # 性能评级
106
+ if processing_time < 1:
107
+ perf_rating = "⚡ 极速"
108
+ elif processing_time < 5:
109
+ perf_rating = "🚀 快速"
110
+ else:
111
+ perf_rating = "🐢 一般"
112
+
113
+ report_lines = [
114
+ "## 🎯 检测报告",
115
+ f"**主要语言**: `{lang.upper()}`",
116
+ f"**置信度**: `{confidence:.1%}` {conf_badge}",
117
+ f"**处理时间**: `{processing_time:.2f}ms` {perf_rating}",
118
+ ]
119
+
120
+ if 'is_mixed' in result and result['is_mixed']:
121
+ report_lines.append(f"**混合语言**: {', '.join(result.get('mixed_with', []))}")
122
+
123
+ if 'features' in result and result['features']:
124
+ report_lines.append("**识别特征**: " + " | ".join(result['features'][:3]))
125
+
126
+ if 'evolution_boost' in result:
127
+ report_lines.append(f"**进化加成**: {result['evolution_boost']}x 使用频率")
128
+
129
+ if 'all_scores' in result:
130
+ scores_text = ", ".join([f"{k}:{v}" for k, v in result['all_scores'].items()][:3])
131
+ report_lines.append(f"**语言得分**: {scores_text}")
132
+
133
+ return "\n\n".join(report_lines)
134
+
135
+ # 界面布局
136
+ with gr.Blocks(
137
+ title="Capricode 超高速语言识别",
138
+ css=custom_css,
139
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")
140
+ ) as interface:
141
+
142
+ # 头部
143
+ with gr.Column(elem_classes="performance-header"):
144
+ gr.Markdown("""
145
+ # 🚀 Capricode 超高速代码语言识别
146
+ > 极致性能 • 自我进化 • 生产就绪
147
+ """)
148
+
149
+ with gr.Row():
150
+ # 输入区域
151
+ with gr.Column(scale=1):
152
+ with gr.Column(elem_classes="performance-card"):
153
+ gr.Markdown("### 📥 输入代码")
154
+ code_input = gr.Textbox(
155
+ label="",
156
+ placeholder="""粘贴任意编程语言代码...
157
+ 🚀 支持: HTML, CSS, JavaScript, Python, Java, C++, 等12+语言""",
158
+ lines=15,
159
+ show_label=False
160
+ )
161
+ detect_btn = gr.Button(
162
+ "🎯 极速识别",
163
+ variant="primary",
164
+ size="lg"
165
  )
166
 
167
+ # 输出区域
168
+ with gr.Column(scale=1):
169
+ with gr.Column(elem_classes="performance-card"):
170
+ gr.Markdown("### 📊 检测结果")
171
+ result_json = gr.JSON(
172
+ label="详细结果",
173
+ show_label=True
174
+ )
175
+
176
+ with gr.Column(elem_classes="performance-card"):
177
+ gr.Markdown("### 📈 性能报告")
178
+ report_output = gr.Markdown(
179
+ label="分析报告",
180
+ value="等待代码分析..."
181
+ )
182
 
183
+ # 性能指标
184
+ with gr.Row():
185
+ with gr.Column(elem_classes="performance-card"):
186
+ gr.Markdown("### ⚡ 性能特性")
187
+ gr.Markdown("""
188
+ - **平均响应**: < 1ms
189
+ - **内存占用**: < 10MB
190
+ - **支持语言**: 12+
191
+ - **混合检测**: ✅ 支持
192
+ - **自我进化**: ✅ 启用
193
+ - **生产就绪**: ✅ 已验证
194
+ """)
195
 
196
+ # 示例代码
197
+ with gr.Column(elem_classes="performance-card"):
198
+ gr.Markdown("### 🧪 快速测试")
199
+ examples = gr.Examples(
200
+ examples=[
201
+ ["""<!DOCTYPE html>
202
+ <html>
203
+ <head>
204
+ <title>测试页面</title>
205
+ <style>.test { color: red; }</style>
206
+ <script>function test() { console.log('hello'); }</script>
207
+ </head>
208
+ <body>
209
+ <div>Hello World</div>
210
+ </body>
211
+ </html>"""],
212
+ ["""def fibonacci(n):
213
+ if n <= 1:
214
+ return n
215
+ return fibonacci(n-1) + fibonacci(n-2)
216
+
217
+ print(fibonacci(10))"""],
218
+ ["""public class Main {
219
+ public static void main(String[] args) {
220
+ System.out.println("Hello Java!");
221
+ }
222
+ }"""],
223
+ [""".container {
224
+ display: flex;
225
+ justify-content: center;
226
+ align-items: center;
227
+ }"""]
228
+ ],
229
+ inputs=code_input,
230
+ label="点击示例快速测试"
231
+ )
232
 
233
+ # 事件绑定
234
+ detect_btn.click(
235
+ fn=analyze_code_performance,
236
+ inputs=[code_input],
237
+ outputs=[result_json, report_output]
238
+ )
239
+
240
+ # 实时检测(可选)
241
+ code_input.change(
242
+ fn=lambda x: detect_language_ultra_fast(x) if len(x) > 50 else {"language": "typing", "confidence": 0},
243
+ inputs=[code_input],
244
+ outputs=[result_json],
245
+ show_progress="hidden"
246
+ )
 
 
 
 
 
 
 
 
 
247
 
248
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ # 创建并启动应用
251
+ def main():
252
+ interface = create_detection_interface()
253
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  if __name__ == "__main__":
256
+ # HuggingFace Space 部署配置
257
+ demo = main()
258
+ demo.launch(
259
+ server_name="0.0.0.0",
260
+ server_port=7860,
261
+ share=True,
262
+ show_error=True,
263
+ debug=False # 生产环境关闭debug
264
+ )
265