Spaces:

DarthBihan
/

ai-code-security-backend

Running

App Files Files Community

Bihan-Banerjee commited on 20 days ago

Commit

c109b62

1 Parent(s): 7374940

initial commit

Browse files

Files changed (17) hide show

.env +4 -0
DockerFile +25 -0
__pycache__/model.cpython-313.pyc +0 -0
__pycache__/schemas.cpython-313.pyc +0 -0
app.py +254 -0
model.py +133 -0
models/__pycache__/reviews.cpython-313.pyc +0 -0
models/history.py +10 -0
models/reviews.py +7 -0
requirements.txt +32 -0
routes/__pycache__/auth.cpython-313.pyc +0 -0
routes/__pycache__/reviews.cpython-313.pyc +0 -0
routes/auth.py +45 -0
routes/reviews.py +31 -0
schemas.py +11 -0
temp/test.py +0 -0
user_model.py +10 -0

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+MONGO_URI=mongodb+srv://terimarzi:8pKpTnnZdqPBIAE8@securecode.irgi2mi.mongodb.net/
+SECRET_KEY=T_u_H_i_B_a_T_a
+JWT_SECRET=supersecretkey
+USE_REDIS=false

DockerFile ADDED Viewed

	@@ -0,0 +1,25 @@

+# Use Python 3.9
+FROM python:3.9
+# Set working directory
+WORKDIR /app
+# Copy requirements and install dependencies
+COPY requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy the rest of the backend code
+COPY . .
+# Create a non-root user (Security requirement for HF Spaces)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Expose port 7860 (Required by Hugging Face)
+EXPOSE 7860
+# Command to run the application
+# We use gunicorn for production performance
+CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app", "--timeout", "120"]

__pycache__/model.cpython-313.pyc ADDED Viewed

Binary file (6.9 kB). View file

__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (870 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+from flask import Flask, request, jsonify
+from flask_cors import CORS
+from flask_jwt_extended import JWTManager, jwt_required, get_jwt_identity
+from routes.auth import auth_bp
+from dotenv import load_dotenv
+from model import enhance_code
+import tempfile
+import os
+import subprocess
+import json
+from pymongo import MongoClient
+from routes.reviews import reviews_bp
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+from datetime import datetime
+from datetime import timedelta
+from bson import ObjectId
+from models.reviews import reviews_collection
+from schemas import ScanRequest
+from pydantic import ValidationError
+from flask_compress import Compress
+from flask_caching import Cache
+import hashlib
+load_dotenv()
+app = Flask(__name__)
+Compress(app)
+CORS(app)
+if os.getenv("USE_REDIS", "false").lower() == "true":
+    cache = Cache(config={
+        "CACHE_TYPE": "RedisCache",
+        "CACHE_REDIS_URL": os.getenv("REDIS_URL", "redis://localhost:6379/0"),
+        "CACHE_DEFAULT_TIMEOUT": 3600
+    })
+else:
+    cache = Cache(config={
+        "CACHE_TYPE": "SimpleCache",
+        "CACHE_DEFAULT_TIMEOUT": 3600
+    })
+cache.init_app(app)
+cache.init_app(app)
+def files_hash(files):
+    h = hashlib.sha256()
+    for f in sorted(files, key=lambda x: x["filename"]):
+        h.update(f["filename"].encode())
+        h.update(b"\0")
+        h.update(f["content"].encode())
+    return h.hexdigest()
+app.config['MONGO_URI'] = os.getenv("MONGO_URI")
+app.config['SECRET_KEY'] = os.getenv('JWT_SECRET') or 'super-secret-key'
+app.config["JWT_SECRET_KEY"] = os.getenv('JWT_SECRET') or 'super-secret-key'
+app.config["JWT_ACCESS_TOKEN_EXPIRES"] = timedelta(minutes=60)
+app.register_blueprint(reviews_bp)
+jwt = JWTManager(app)
+client = MongoClient(app.config['MONGO_URI'])
+db = client["codewhisperer"]
+users = db["users"]
+enhance_history = db["enhance_history"]
+scan_history = db["scan_history"]
+app.register_blueprint(auth_bp, url_prefix="/api")
+limiter = Limiter(
+    key_func=get_remote_address,
+    default_limits=["20 per minute"]
+)
+limiter.init_app(app)
+@app.route('/api/scan', methods=['POST'])
+@limiter.limit("5/minute")
+@jwt_required()
+def scan_code():
+    try:
+        data = request.get_json()
+        app.logger.info(f"Incoming request: {data}")
+        try:
+            req = ScanRequest(**data)
+        except ValidationError as e:
+            app.logger.error(f"Validation error: {e.errors()}")
+            return jsonify({"error": e.errors()}), 400
+        files = [f.dict() for f in req.files]
+        language = req.language.lower()
+        username = get_jwt_identity()
+        key = f"scan:{username}:{files_hash(files)}"
+        cached = cache.get(key)
+        if cached:
+            return jsonify({"result": cached, "cached": True})
+        with tempfile.TemporaryDirectory() as temp_dir:
+            for f in files:
+                path = os.path.join(temp_dir, f["filename"])
+                with open(path, "w", encoding="utf-8") as code_file:
+                    code_file.write(f["content"])
+            if language == "python":
+                scan_command = ["python", "-m", "bandit", "-r", temp_dir, "-f", "json"]
+            elif language == "javascript":
+                scan_command = ["python", "-m", "semgrep", "--config=p/javascript", "--json", temp_dir]
+            else:
+                return jsonify({"error": "Unsupported language"}), 400
+            app.logger.info(f"Running: {' '.join(scan_command)}")
+            result = subprocess.run(scan_command, capture_output=True, text=True)
+            app.logger.info(f"stdout: {result.stdout[:500]}")
+            app.logger.info(f"stderr: {result.stderr}")
+            if result.returncode not in (0, 1, 2):
+                return jsonify({"error": result.stderr}), 500
+            try:
+                output_json = json.loads(result.stdout)
+            except Exception as e:
+                return jsonify({"error": f"JSON parse failed: {str(e)}", "raw": result.stdout}), 500
+            cache.set(key, output_json)
+            scan_history.insert_one({
+                "username": username,
+                "language": language,
+                "files": files,
+                "result": output_json,
+                "timestamp": datetime.utcnow().isoformat()
+            })
+            return jsonify({"result": output_json})
+    except Exception as e:
+        app.logger.error(f"Unexpected error: {str(e)}")
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/health")
+def health():
+    return jsonify({"status": "ok"})
+@app.route('/api/enhance', methods=['POST'])
+@limiter.limit("5/minute")
+@jwt_required()
+def enhance():
+    try:
+        data = request.get_json()
+        code = data.get("code", "")
+        language = data.get("language", "python").lower()
+        username = get_jwt_identity()
+        if language not in ["python", "javascript"]:
+            return jsonify({"error": "Unsupported language"}), 400
+        if not code.strip():
+            return jsonify({"error": "No code provided"}), 400
+        # 🔹 New format (returns dict)
+        result = enhance_code(code, language)
+        # Save to history (with candidates + explanations)
+        enhance_history.insert_one({
+            "username": username,
+            "code": code,
+            "language": language,
+            "enhanced_code": result["enhanced_code"],
+            "diff": result["diff"],
+            "candidates": result.get("candidates", []),
+            "explanations": result.get("explanations", []),
+            "timestamp": datetime.utcnow().isoformat()
+        })
+        return jsonify(result), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/api/history', methods=['GET'])
+@limiter.limit("10/minute")
+@jwt_required()
+def history():
+    try:
+        username = get_jwt_identity()
+        # Fetch both histories
+        enhance_records = list(enhance_history.find({"username": username}).sort("timestamp", -1))
+        scan_records = list(scan_history.find({"username": username}).sort("timestamp", -1))
+        # Convert ObjectId to string & return only relevant fields
+        def clean(record, record_type):
+            return {
+                "id": str(record.get("_id")),
+                "language": record.get("language"),
+                "code": record.get("code"),
+                "enhanced_code": record.get("enhanced_code"),
+                "diff": record.get("diff"),
+                "candidates": record.get("candidates", []),   # ✅ added
+                "explanations": record.get("explanations", []), # ✅ added
+                "result": record.get("result") if record_type == "scan" else None,
+                "timestamp": record.get("timestamp"),
+            }
+        enhance_list = [clean(r, "enhance") for r in enhance_records]
+        scan_list = [clean(r, "scan") for r in scan_records]
+        return jsonify({
+            "enhance": enhance_list,
+            "scan": scan_list
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/api/reviews", methods=["POST"])
+@limiter.limit("5/minute")
+def submit_review():
+    try:
+        data = request.get_json()
+        name = data.get("name")
+        email = data.get("email")
+        rating = data.get("rating")
+        review = data.get("review")
+        date = data.get("date", datetime.utcnow().isoformat())
+        if not all([name, email, rating, review]):
+            return jsonify({"error": "All fields are required"}), 400
+        review_doc = {
+            "name": name,
+            "email": email,
+            "rating": rating,
+            "review": review,
+            "date": date,
+        }
+        result = reviews_collection.insert_one(review_doc)
+        return jsonify({
+            "message": "Review submitted successfully",
+            "id": str(result.inserted_id)
+        }), 201
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=5000, debug=True)

model.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    AutoModelForCausalLM
+)
+import difflib, re, torch
+# Models and their types
+MODEL_CONFIGS = {
+    "Salesforce/codet5-base": "seq2seq",        # CodeT5
+    "EleutherAI/gpt-neo-1.3B": "causal",        # GPT-Neo
+    "microsoft/CodeGPT-small-py": "causal",     # CodeGPT-small (Python)
+}
+# Load tokenizers and models
+tokenizers, models = {}, {}
+for name, mtype in MODEL_CONFIGS.items():
+    tokenizers[name] = AutoTokenizer.from_pretrained(name)
+    if mtype == "seq2seq":
+        models[name] = AutoModelForSeq2SeqLM.from_pretrained(name)
+    else:
+        models[name] = AutoModelForCausalLM.from_pretrained(name)
+# Rule-based fixes
+SECURE_REPLACEMENTS = {
+    "hashlib.md5": ("hashlib.sha256", "MD5 is weak, replaced with SHA-256."),
+    "hashlib.sha1": ("hashlib.sha256", "SHA1 is weak, replaced with SHA-256."),
+    "eval(": ("ast.literal_eval(", "Unsafe eval removed, replaced with safe literal_eval."),
+    "pickle.load(": ("# pickle.load removed", "pickle.load is unsafe, consider json/safe loaders."),
+}
+def rule_based_patch(code: str):
+    explanations = []
+    patched = code
+    for bad, (good, reason) in SECURE_REPLACEMENTS.items():
+        if bad in patched:
+            patched = patched.replace(bad, good)
+            explanations.append({"change": f"{bad} → {good}", "reason": reason})
+    return patched, explanations
+def preserve_structure(original: str, enhanced: str):
+    """Ensure imports and function signatures remain if model drops them."""
+    final_code = enhanced
+    original_imports = [l for l in original.splitlines() if l.strip().startswith("import")]
+    for imp in original_imports:
+        if imp not in final_code:
+            final_code = imp + "\n" + final_code
+    original_defs = [l for l in original.splitlines() if l.strip().startswith("def ")]
+    for d in original_defs:
+        if d.split("(")[0] not in final_code:
+            final_code = d + "\n    # [!] Function body missing, please review\n" + final_code
+    return final_code
+def create_diff(original: str, enhanced: str):
+    """Return structured diff for frontend rendering."""
+    diff_lines = difflib.unified_diff(
+        original.splitlines(), enhanced.splitlines(),
+        fromfile="Original", tofile="Enhanced", lineterm=""
+    )
+    formatted = []
+    for line in diff_lines:
+        if line.startswith("+") and not line.startswith("+++"):
+            formatted.append({"type": "add", "content": line[1:]})
+        elif line.startswith("-") and not line.startswith("---"):
+            formatted.append({"type": "remove", "content": line[1:]})
+        elif not line.startswith("@@"):
+            formatted.append({"type": "context", "content": line})
+    return formatted
+def postprocess_code(code: str):
+    code = re.sub(r'^"""|"""$', '', code.strip())
+    lines = code.splitlines()
+    return "\n".join([l.replace("\t", "    ").rstrip() for l in lines])
+def run_model(model_name, code, language):
+    tokenizer = tokenizers[model_name]
+    model = models[model_name]
+    mtype = MODEL_CONFIGS[model_name]
+    prompt = f"fix {language} code: {code}"
+    if mtype == "seq2seq":
+        inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
+    else:
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=256,
+            temperature=0.3,
+            top_p=0.95,
+            do_sample=False
+        )
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def enhance_code(code: str, language: str):
+    try:
+        patched_code, rule_explanations = rule_based_patch(code)
+        candidates = []
+        for m in MODEL_CONFIGS.keys():
+            try:
+                enhanced = run_model(m, patched_code, language)
+                enhanced = postprocess_code(enhanced)
+                enhanced = preserve_structure(code, enhanced)
+                candidates.append({"model": m, "code": enhanced})
+            except Exception as e:
+                candidates.append({"model": m, "code": f"# [!] Failed: {str(e)}"})
+        best = max(candidates, key=lambda c: len(c["code"]))
+        diff = create_diff(code, best["code"])
+        explanations = rule_explanations + [
+            {"change": "Model improvements", "reason": "Best candidate chosen among ensemble"}
+        ]
+        return {
+            "enhanced_code": best["code"],
+            "diff": diff,
+            "candidates": candidates[:3],
+            "explanations": explanations
+        }
+    except Exception as e:
+        fallback = code + f"\n# [!] Enhancer crashed: {str(e)}"
+        return {
+            "enhanced_code": fallback,
+            "diff": create_diff(code, fallback),
+            "candidates": [],
+            "explanations": [{"change": "Error", "reason": str(e)}]
+        }

models/__pycache__/reviews.cpython-313.pyc ADDED Viewed

Binary file (489 Bytes). View file

models/history.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+client = MongoClient(os.getenv("MONGO_URI"))
+db = client["codewhisperer"]
+enhance_history = db["enhance_history"]
+scan_history = db["scan_history"]

models/reviews.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pymongo import MongoClient
+from datetime import datetime
+import os
+client = MongoClient(os.getenv("MONGO_URI", "mongodb://localhost:27017"))
+db = client["securecode"]
+reviews_collection = db["reviews"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+# Flask dependencies
+flask==2.3.3
+flask-cors==4.0.0
+flask-jwt-extended==4.5.3
+flask-limiter==3.5.0
+flask-compress==1.13
+flask-caching==2.1.0
+python-dotenv==1.0.0
+# Database
+pymongo==4.5.0
+# Validation
+pydantic>=2.9.2
+# AI/ML dependencies
+torch>=2.0.0
+transformers>=4.30.0
+# Security scanning tools
+bandit>=1.7.5
+semgrep>=1.45.0
+# Optional: for better performance
+accelerate>=0.20.0
+safetensors>=0.3.0
+bcrypt>=4.0.1
+python-dotenv
+gunicorn

routes/__pycache__/auth.cpython-313.pyc ADDED Viewed

Binary file (2.41 kB). View file

routes/__pycache__/reviews.cpython-313.pyc ADDED Viewed

Binary file (1.96 kB). View file

routes/auth.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from flask import Blueprint, request, jsonify
+from flask_jwt_extended import create_access_token
+import bcrypt
+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+auth_bp = Blueprint('auth', __name__)
+client = MongoClient(MONGO_URI)
+db = client["codewhisperer"]
+users = db["users"]
+@auth_bp.route("/register", methods=["POST"])
+def register():
+    data = request.json
+    username = data["username"]
+    email = data["email"]
+    password = data["password"]
+    if users.find_one({"username": username}):
+        return jsonify({"error": "User already exists"}), 400
+    hashed_pw = bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt())
+    users.insert_one({"username": username, "email": email, "password": hashed_pw})
+    token = create_access_token(identity=username)
+    return jsonify({"token": token}), 201
+@auth_bp.route("/login", methods=["POST"])
+def login():
+    data = request.json
+    username = data["username"]
+    password = data["password"]
+    user = users.find_one({"username": username})
+    if not user:
+        return jsonify({"error": "Invalid credentials"}), 401
+    if not bcrypt.checkpw(password.encode("utf-8"), user["password"]):
+        return jsonify({"error": "Invalid credentials"}), 401
+    token = create_access_token(identity=username)
+    return jsonify({"token": token})

routes/reviews.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from flask import Blueprint, request, jsonify
+from pymongo import MongoClient
+from datetime import datetime
+import os
+reviews_bp = Blueprint("reviews", __name__)
+client = MongoClient(os.getenv("MONGO_URI"))
+db = client["securecode_ai"]
+reviews_collection = db["reviews"]
+@reviews_bp.route("/api/reviews", methods=["POST"])
+def submit_review():
+    try:
+        data = request.get_json()
+        required_fields = ["name", "email", "rating", "review"]
+        if not all(field in data and data[field] for field in required_fields):
+            return jsonify({"error": "All fields are required"}), 400
+        new_review = {
+            "name": data["name"],
+            "email": data["email"],
+            "rating": data["rating"],
+            "review": data["review"],
+            "date": datetime.utcnow().isoformat()
+        }
+        reviews_collection.insert_one(new_review)
+        return jsonify({"message": "Review submitted successfully!"}), 201
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500

schemas.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# backend/schemas.py
+from pydantic import BaseModel
+from typing import List, Optional
+class FileModel(BaseModel):
+    filename: str
+    content: str
+class ScanRequest(BaseModel):
+    files: List[FileModel]
+    language: str = "python"

temp/test.py ADDED Viewed

Binary file (34 Bytes). View file

user_model.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pymongo import MongoClient
+from dotenv import load_dotenv
+import os
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client["codewhisperer"]
+users = db["users"]