Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -56,6 +56,70 @@ def log_request(instruction: str, generated_code: str, tokens_generated: int, la
|
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Logging failed: {e}")
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
def load_model():
|
| 60 |
"""Lazy load model on first request"""
|
| 61 |
global tokenizer, model
|
|
@@ -85,12 +149,19 @@ def load_model():
|
|
| 85 |
|
| 86 |
|
| 87 |
def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.7):
|
| 88 |
-
"""Generate code from instruction with monitoring"""
|
| 89 |
start_time = time.time()
|
| 90 |
|
| 91 |
if not instruction.strip():
|
| 92 |
return "Please enter an instruction."
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
generated_code = ""
|
| 95 |
tokens_generated = 0
|
| 96 |
error = None
|
|
@@ -136,6 +207,9 @@ def generate_code(instruction: str, max_tokens: int = 256, temperature: float =
|
|
| 136 |
else:
|
| 137 |
generated_code = generated.strip()
|
| 138 |
|
|
|
|
|
|
|
|
|
|
| 139 |
except Exception as e:
|
| 140 |
error = str(e)
|
| 141 |
generated_code = f"Error: {error}\n\nPlease try again."
|
|
@@ -172,6 +246,17 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
|
|
| 172 |
**Performance**: 76% syntax correctness | **BLEU Score: 16.83** (+53% improvement over baseline 11.00)
|
| 173 |
|
| 174 |
**Note**: First request may take 1-2 minutes as the model loads on HuggingFace servers. Subsequent requests are instant!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
"""
|
| 176 |
)
|
| 177 |
|
|
|
|
| 56 |
except Exception as e:
|
| 57 |
print(f"Logging failed: {e}")
|
| 58 |
|
| 59 |
+
# Safety filters - Layer 1: Input Validation
|
| 60 |
+
DANGEROUS_KEYWORDS = [
|
| 61 |
+
"delete all files", "rm -rf", "shutil.rmtree",
|
| 62 |
+
"sql injection", "drop table", "truncate table",
|
| 63 |
+
"keylogger", "backdoor", "exploit",
|
| 64 |
+
"hack into", "steal password", "crack password",
|
| 65 |
+
"ddos", "denial of service", "fork bomb",
|
| 66 |
+
"malware", "ransomware", "trojan"
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
def validate_input(instruction: str) -> tuple:
|
| 70 |
+
"""
|
| 71 |
+
Validate input for dangerous keywords.
|
| 72 |
+
Returns: (is_valid: bool, error_message: str)
|
| 73 |
+
"""
|
| 74 |
+
instruction_lower = instruction.lower()
|
| 75 |
+
|
| 76 |
+
for keyword in DANGEROUS_KEYWORDS:
|
| 77 |
+
if keyword in instruction_lower:
|
| 78 |
+
return False, f"⚠️ Safety Filter: Request blocked. Your instruction contains potentially unsafe content related to '{keyword}'.\n\nPlease rephrase your request to focus on legitimate programming tasks."
|
| 79 |
+
|
| 80 |
+
return True, ""
|
| 81 |
+
|
| 82 |
+
# Safety filters - Layer 2: Output Filtering
|
| 83 |
+
DANGEROUS_PATTERNS = [
|
| 84 |
+
("os.remove", "file deletion"),
|
| 85 |
+
("shutil.rmtree", "directory deletion"),
|
| 86 |
+
("os.unlink", "file deletion"),
|
| 87 |
+
("DROP TABLE", "database destruction"),
|
| 88 |
+
("TRUNCATE TABLE", "database destruction"),
|
| 89 |
+
("DELETE FROM", "database deletion"),
|
| 90 |
+
("eval(", "arbitrary code execution"),
|
| 91 |
+
("exec(", "arbitrary code execution"),
|
| 92 |
+
("__import__", "dynamic imports"),
|
| 93 |
+
("os.system", "system command execution"),
|
| 94 |
+
("subprocess.call", "system command execution"),
|
| 95 |
+
("subprocess.run", "system command execution"),
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
def filter_dangerous_code(code: str) -> str:
|
| 99 |
+
"""
|
| 100 |
+
Filter dangerous code patterns from output.
|
| 101 |
+
Returns: filtered code or safety warning
|
| 102 |
+
"""
|
| 103 |
+
code_lower = code.lower()
|
| 104 |
+
|
| 105 |
+
for pattern, reason in DANGEROUS_PATTERNS:
|
| 106 |
+
if pattern.lower() in code_lower:
|
| 107 |
+
return f"""# ⚠️ SAFETY FILTER ACTIVATED
|
| 108 |
+
#
|
| 109 |
+
# Code generation blocked: Potentially dangerous pattern detected ({reason})
|
| 110 |
+
# Pattern: {pattern}
|
| 111 |
+
#
|
| 112 |
+
# This is a safety feature to prevent generating code that could:
|
| 113 |
+
# - Delete files or data
|
| 114 |
+
# - Execute arbitrary system commands
|
| 115 |
+
# - Compromise system security
|
| 116 |
+
#
|
| 117 |
+
# Please rephrase your request with safer requirements.
|
| 118 |
+
# For educational purposes, consult official documentation or security resources.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
return code
|
| 122 |
+
|
| 123 |
def load_model():
|
| 124 |
"""Lazy load model on first request"""
|
| 125 |
global tokenizer, model
|
|
|
|
| 149 |
|
| 150 |
|
| 151 |
def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.7):
|
| 152 |
+
"""Generate code from instruction with monitoring and safety filters"""
|
| 153 |
start_time = time.time()
|
| 154 |
|
| 155 |
if not instruction.strip():
|
| 156 |
return "Please enter an instruction."
|
| 157 |
|
| 158 |
+
# Layer 1: Input validation
|
| 159 |
+
is_valid, validation_error = validate_input(instruction)
|
| 160 |
+
if not is_valid:
|
| 161 |
+
# Log blocked request
|
| 162 |
+
log_request(instruction, validation_error, 0, time.time() - start_time, "BLOCKED_BY_SAFETY_FILTER")
|
| 163 |
+
return validation_error
|
| 164 |
+
|
| 165 |
generated_code = ""
|
| 166 |
tokens_generated = 0
|
| 167 |
error = None
|
|
|
|
| 207 |
else:
|
| 208 |
generated_code = generated.strip()
|
| 209 |
|
| 210 |
+
# Layer 2: Output filtering for dangerous patterns
|
| 211 |
+
generated_code = filter_dangerous_code(generated_code)
|
| 212 |
+
|
| 213 |
except Exception as e:
|
| 214 |
error = str(e)
|
| 215 |
generated_code = f"Error: {error}\n\nPlease try again."
|
|
|
|
| 246 |
**Performance**: 76% syntax correctness | **BLEU Score: 16.83** (+53% improvement over baseline 11.00)
|
| 247 |
|
| 248 |
**Note**: First request may take 1-2 minutes as the model loads on HuggingFace servers. Subsequent requests are instant!
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
### 🛡️ Safety Features
|
| 253 |
+
|
| 254 |
+
This demo includes production-grade safety filters:
|
| 255 |
+
- **Input Validation**: Blocks requests with potentially dangerous keywords
|
| 256 |
+
- **Output Filtering**: Prevents generation of code that could delete files, execute arbitrary commands, or compromise security
|
| 257 |
+
- **Production Monitoring**: All requests are logged for quality tracking (privacy-respecting, no personal data stored)
|
| 258 |
+
|
| 259 |
+
⚠️ **AI-Generated Code Disclaimer**: Always review generated code before use. AI models can make mistakes.
|
| 260 |
"""
|
| 261 |
)
|
| 262 |
|