sakuragolden commited on
Commit
f036ad4
·
verified ·
1 Parent(s): baccaf7

Upload coderX.py

Browse files
Files changed (1) hide show
  1. coderX.py +289 -0
coderX.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # codegen_gradio.py
2
+ import os
3
+ import io
4
+ import json
5
+ import time
6
+ import tempfile
7
+ import requests
8
+ import gradio as gr
9
+ from typing import Tuple, Optional
10
+
11
+ # ---------------------- 配置 ----------------------
12
+ HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
13
+ # 默认模型(面向代码的强模型建议 user 输入或使用 bigcode/starcoder)
14
+ DEFAULT_HF_MODEL = "bigcode/starcoder" # or "bigcode/starcoder-base" / user can change in UI
15
+
16
+ # 支持的语言与默认扩展(可扩展)
17
+ LANG_EXT = {
18
+ "Python": ".py",
19
+ "JavaScript": ".js",
20
+ "TypeScript": ".ts",
21
+ "Go": ".go",
22
+ "Java": ".java",
23
+ "C": ".c",
24
+ "C++": ".cpp",
25
+ "C#": ".cs",
26
+ "Rust": ".rs",
27
+ "Kotlin": ".kt",
28
+ "Swift": ".swift",
29
+ "Ruby": ".rb",
30
+ "PHP": ".php",
31
+ "Shell": ".sh",
32
+ "PowerShell": ".ps1",
33
+ "HTML": ".html",
34
+ "CSS": ".css",
35
+ "SQL": ".sql",
36
+ "R": ".r",
37
+ "MATLAB": ".m",
38
+ "Scala": ".scala",
39
+ "Haskell": ".hs",
40
+ "Lua": ".lua",
41
+ "Perl": ".pl",
42
+ "Dart": ".dart",
43
+ "Elixir": ".ex",
44
+ "Julia": ".jl",
45
+ "Objective-C": ".m",
46
+ "Assembly": ".s",
47
+ "Dockerfile": "Dockerfile",
48
+ "YAML": ".yml",
49
+ "JSON": ".json",
50
+ "XML": ".xml",
51
+ "Protobuf": ".proto",
52
+ # add more if needed
53
+ }
54
+
55
+ # 多语言选项(显示顺序)
56
+ LANG_CHOICES = list(LANG_EXT.keys())
57
+
58
+ # 简单的安全黑名单(用于拒绝明显恶意请求)
59
+ DANGEROUS_KEYWORDS = [
60
+ "rm -rf", "format(", "mkfs", "dd if=", "fork bomb", "shutdown", "reboot", "poweroff",
61
+ "create user", "adduser", "useradd", "passwd", "ssh -i", "cryptominer", "virus", "malware",
62
+ "ransomware", "keylogger", "inject", "exploit", "sqlmap", "metasploit", "reverse shell",
63
+ "nc -e", "wget http", "curl http", "chmod 777 /", "sudo rm -rf /", ">: /dev/sda"
64
+ ]
65
+
66
+ # Prompt templates per task
67
+ TASK_TEMPLATES = {
68
+ "Generate code from description": "Implement the following functionality in {lang}:\n\n{content}\n\nPlease provide only the code, no extra commentary.",
69
+ "Translate code to another language": "Translate the following code from {src_lang} to {lang}. Keep behavior identical and include necessary imports/dependencies.\n\n```{src_lang}\n{content}\n```",
70
+ "Explain code": "Explain the following {lang} code. Provide a concise explanation of what it does, complexity if applicable, and potential pitfalls or edge cases.\n\n```{lang}\n{content}\n```",
71
+ "Refactor code (improve readability/performance)": "Refactor the following {lang} code for readability and performance. Keep behavior identical, explain briefly what you changed, then provide the refactored code only.\n\n```{lang}\n{content}\n```",
72
+ "Add unit tests": "Write unit tests for the following {lang} code. Use common testing framework for {lang} (e.g., pytest for Python, jest for JS). Provide test code only.\n\n```{lang}\n{content}\n```",
73
+ "Document & comment code": "Add clear inline comments and a top-level docstring explaining the purpose, inputs, outputs, and side effects for this {lang} code. Then provide the commented code only.\n\n```{lang}\n{content}\n```",
74
+ "Optimize for performance": "Optimize the following {lang} code for performance. Keep same external behavior. Explain the optimizations in 2-3 lines, then provide the optimized code only.\n\n```{lang}\n{content}\n```",
75
+ "Add type hints / static types": "Add type annotations or static types to the following {lang} code where appropriate. Make sure the code remains valid.\n\n```{lang}\n{content}\n```",
76
+ "Create CLI tool": "Create a command-line interface (CLI) tool in {lang} that wraps the following functionality: {content}. Provide a complete script with argument parsing and usage example.",
77
+ }
78
+
79
+ # ---------------------- HF Inference helper ----------------------
80
+ def call_hf_inference(model: str, hf_token: str, prompt: str, max_new_tokens: int = 512, temperature: float = 0.2, top_k: Optional[int] = None) -> str:
81
+ """
82
+ Call Hugging Face Inference API for text generation.
83
+ Returns the generated text (string) or an error message.
84
+ """
85
+ if not hf_token or hf_token.strip() == "":
86
+ return "[Error] No Hugging Face token provided. Please paste your HF token in the UI."
87
+
88
+ url = f"{HF_INFERENCE_URL}/{model}"
89
+ headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
90
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}}
91
+ if top_k is not None:
92
+ payload["parameters"]["top_k"] = top_k
93
+
94
+ try:
95
+ resp = requests.post(url, headers=headers, json=payload, timeout=120)
96
+ resp.raise_for_status()
97
+ data = resp.json()
98
+ # handle different response shapes
99
+ if isinstance(data, list) and len(data) > 0:
100
+ first = data[0]
101
+ if isinstance(first, dict) and "generated_text" in first:
102
+ return first["generated_text"]
103
+ # sometimes it's raw text
104
+ return str(first)
105
+ if isinstance(data, dict):
106
+ if "generated_text" in data:
107
+ return data["generated_text"]
108
+ if "error" in data:
109
+ return "[HF Error] " + str(data["error"])
110
+ return json.dumps(data)
111
+ return str(data)
112
+ except requests.exceptions.HTTPError as e:
113
+ return f"[HF HTTP Error] {e} - {resp.text if 'resp' in locals() else ''}"
114
+ except Exception as e:
115
+ return f"[HF Error] {e}"
116
+
117
+
118
+ # ---------------------- helpers ----------------------
119
+ def detect_dangerous(text: str) -> Optional[str]:
120
+ lower = text.lower()
121
+ for k in DANGEROUS_KEYWORDS:
122
+ if k in lower:
123
+ return k
124
+ return None
125
+
126
+ def build_prompt(task: str, lang: str, content: str, src_lang: Optional[str] = None) -> str:
127
+ tmpl = TASK_TEMPLATES.get(task, "{content}")
128
+ return tmpl.format(lang=lang, content=content, src_lang=src_lang or "")
129
+
130
+ def ext_for_language(lang: str) -> str:
131
+ return LANG_EXT.get(lang, ".txt")
132
+
133
+ def save_code_to_tempfile(code: str, filename_hint: str = "generated", ext: str = ".txt") -> str:
134
+ fd, path = tempfile.mkstemp(prefix=filename_hint + "_", suffix=ext)
135
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
136
+ f.write(code)
137
+ return path
138
+
139
+ # ---------------------- Gradio backends ----------------------
140
+ def generate_code_task(task: str, hf_token: str, hf_model: str, language: str, src_language: str, description: str,
141
+ temperature: float, max_new_tokens: int, top_k: int) -> Tuple[str, Optional[str]]:
142
+ """
143
+ Main generation entry: builds prompt, calls HF Inference, returns (code_str, download_path_or_None)
144
+ """
145
+ # security check
146
+ danger = detect_dangerous(description)
147
+ if danger:
148
+ return f"[Refused] Request contains potentially dangerous keyword: '{danger}'. Code generation aborted.", None
149
+
150
+ prompt = build_prompt(task, language, description, src_lang=src_language)
151
+ # Some code models do better if prompt includes an instruction header
152
+ instruction = f"# Instruction: {task} for language {language}\n# Begin\n{prompt}\n# End\n"
153
+ gen = call_hf_inference(hf_model, hf_token, instruction, max_new_tokens=max_new_tokens, temperature=temperature, top_k=(None if top_k==0 else top_k))
154
+
155
+ # If HF returns an error-like string, just return it
156
+ if isinstance(gen, str) and gen.startswith("[HF"):
157
+ return gen, None
158
+
159
+ # Trim any leading instruction repeats
160
+ code = gen.strip()
161
+ # If the model echoed the prompt, try to cut off the prompt portion
162
+ if instruction.strip() and code.startswith(instruction.strip()):
163
+ code = code[len(instruction.strip()):].strip()
164
+
165
+ # post-process: if model included explanation but we requested code-only, try to extract code block
166
+ if "```" in code:
167
+ # extract first fenced code block
168
+ parts = code.split("```")
169
+ if len(parts) >= 3:
170
+ # parts: [before, langinfo, code, ...] or [before, code, ...]
171
+ # find the longest code-like chunk
172
+ candidate = None
173
+ for i in range(1, len(parts), 2):
174
+ chunk = parts[i+0]
175
+ if len(chunk.strip()) > 0:
176
+ candidate = chunk
177
+ break
178
+ if candidate:
179
+ code = candidate.strip()
180
+
181
+ # Prepare downloadable file
182
+ ext = ext_for_language(language)
183
+ fname = f"code_{language.lower().replace(' ', '_')}{ext}"
184
+ path = save_code_to_tempfile(code, filename_hint="generated_code", ext=ext)
185
+ return code, path
186
+
187
+ # ---------------------- UI components & logic ----------------------
188
+ def do_generate(task, hf_token, hf_model, language, src_language, description, temperature, max_new_tokens, top_k):
189
+ # basic input validation
190
+ if not hf_token or hf_token.strip() == "":
191
+ return "[Error] Please paste your Hugging Face API token in the HF token field.", None
192
+ if not hf_model or hf_model.strip() == "":
193
+ return "[Error] Please enter a Hugging Face model name (e.g. bigcode/starcoder).", None
194
+ if not description or description.strip() == "":
195
+ return "[Error] Please provide a description or code to operate on.", None
196
+
197
+ code, path = generate_code_task(task, hf_token, hf_model, language, src_language, description, temperature, max_new_tokens, top_k)
198
+ return code, path
199
+
200
+ # Helper: simple examples
201
+ EXAMPLES = [
202
+ ("Generate code from description", "bigcode/starcoder", "Python", "", "A function that computes the nth Fibonacci number using dynamic programming and returns results as integers.", 0.2, 256, 0),
203
+ ("Translate code to another language", "bigcode/starcoder", "JavaScript", "Python", "def greet(name):\n return f\"Hello, {name}!\"", 0.2, 256, 0),
204
+ ("Add unit tests", "bigcode/starcoder", "Python", "", "def add(a, b):\n return a + b", 0.2, 256, 0),
205
+ ("Explain code", "bigcode/starcoder", "Go", "", 'package main\n\nimport "fmt"\n\nfunc main() {\n fmt.Println("Hello world")\n}', 0.2, 256, 0),
206
+ ]
207
+
208
+ def build_ui():
209
+ with gr.Blocks(title="Polyglot Code Generator (HF Inference)") as demo:
210
+ gr.Markdown("# 🚀 Polyglot Code Generator\nGenerate, translate, explain, refactor, test and document code in many languages using Hugging Face models.\n\n**Important:** paste your Hugging Face Inference API token below (Settings → Access Tokens on Hugging Face).")
211
+
212
+ with gr.Row():
213
+ with gr.Column(scale=3):
214
+ hf_token = gr.Textbox(label="Hugging Face API Token (paste here)", type="password", placeholder="hf_xxx...")
215
+ hf_model = gr.Textbox(label="HF model name", value=DEFAULT_HF_MODEL, placeholder="bigcode/starcoder or bigcode/starcoder-base")
216
+ task = gr.Dropdown(label="Task", choices=list(TASK_TEMPLATES.keys()), value="Generate code from description")
217
+ language = gr.Dropdown(label="Target language", choices=LANG_CHOICES, value="Python")
218
+ src_language = gr.Textbox(label="Source language (for translation)", placeholder="e.g. Python", value="")
219
+ description = gr.Textbox(label="Description / Input code", lines=8, placeholder="Describe the feature or paste the code to transform...")
220
+ temp = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, value=0.2, step=0.05)
221
+ max_tokens = gr.Slider(label="max_new_tokens", minimum=16, maximum=2048, value=512, step=16)
222
+ top_k = gr.Slider(label="top_k (0 = default)", minimum=0, maximum=100, value=0, step=1)
223
+ gen_btn = gr.Button("Generate Code")
224
+
225
+ with gr.Accordion("Prompt templates & examples", open=False):
226
+ gr.Markdown("Choose a task and the app will apply an appropriate prompt template. Examples below can be loaded into the inputs.")
227
+ example_btns = []
228
+ for ex in EXAMPLES:
229
+ b = gr.Button(f"Load example: {ex[0]} → {ex[2]}")
230
+ example_btns.append((b, ex))
231
+
232
+ with gr.Column(scale=2):
233
+ gr.Markdown("### Output")
234
+ code_out = gr.Code(value="", language="python", label="Generated Code / Explanation")
235
+ download_file = gr.File(label="Download generated file (click to download)")
236
+
237
+ with gr.Row():
238
+ copy_btn = gr.Button("Copy to clipboard (browser)") # gradio supports copy via JS, left as UI hint
239
+ save_btn = gr.Button("Save as file (prepare download)")
240
+
241
+ gr.Markdown("### Quick actions")
242
+ explain_btn = gr.Button("Explain this code") # convenience to re-run with Explain task
243
+
244
+ # Wiring
245
+ def load_example(example):
246
+ task_v, hf_mod, lang, src_lang, desc, temperature, max_new_tokens, top_k = example
247
+ return hf_mod, task_v, lang, src_lang, desc, temperature, max_new_tokens, top_k
248
+
249
+ # register example buttons
250
+ for btn, ex in example_btns:
251
+ btn.click(fn=load_example, inputs=None, outputs=[hf_model, task, language, src_language, description, temp, max_tokens, top_k], _js=None).then(lambda: None)
252
+
253
+ def prepare_and_generate(hf_token_val, hf_model_val, task_val, language_val, src_language_val, description_val, temp_val, max_tokens_val, top_k_val):
254
+ code, path = do_generate(task_val, hf_token_val, hf_model_val, language_val, src_language_val, description_val, temp_val, int(max_tokens_val), int(top_k_val))
255
+ # Set language for highlighter
256
+ lang_for_highlight = language_val.lower() if language_val else "text"
257
+ return code, path, lang_for_highlight
258
+
259
+ gen_btn.click(fn=prepare_and_generate,
260
+ inputs=[hf_token, hf_model, task, language, src_language, description, temp, max_tokens, top_k],
261
+ outputs=[code_out, download_file, code_out]) # last output used to set language attr
262
+
263
+ # Save file (prepare download) button: write latest code to temp file and return path
264
+ def save_generated_as_file(code_text, language_val):
265
+ if not code_text or code_text.strip() == "":
266
+ return None
267
+ ext = ext_for_language(language_val)
268
+ path = save_code_to_tempfile(code_text, filename_hint="generated_code", ext=ext)
269
+ return path
270
+
271
+ save_btn.click(fn=save_generated_as_file, inputs=[code_out, language], outputs=[download_file])
272
+
273
+ # Explain selected code quickly
274
+ def quick_explain(hf_token_val, hf_model_val, code_text, language_val, temp_val, max_tokens_val):
275
+ if not code_text or code_text.strip() == "":
276
+ return "[Error] No code to explain."
277
+ # reuse TASK_TEMPLATES explanation
278
+ prompt = build_prompt("Explain code", language_val, code_text)
279
+ return call_hf_inference(hf_model_val, hf_token_val, f"# Instruction: Explain code\n{prompt}\n", max_new_tokens=int(max_tokens_val), temperature=float(temp_val))
280
+
281
+ explain_btn.click(fn=quick_explain, inputs=[hf_token, hf_model, code_out, language, temp, max_tokens], outputs=[code_out])
282
+
283
+ gr.Markdown("---\n**Notes & safety**: This app calls the Hugging Face Inference API; keep your token private. The app refuses obviously destructive requests by simple keyword checks. Do not use generated code in production without review.")
284
+
285
+ return demo
286
+
287
+ if __name__ == "__main__":
288
+ demo_app = build_ui()
289
+ demo_app.launch(server_name="0.0.0.0", share=False)