jayansh21 commited on
Commit
98a28e4
Β·
verified Β·
1 Parent(s): dfb2008

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +10 -12
  2. app.py +67 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,10 @@
1
- ---
2
- title: Codesheriff Inference
3
- emoji: πŸ“š
4
- colorFrom: red
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 6.9.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: CodeSheriff Inference
3
+ emoji: πŸ”
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: "4.44.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
 
 
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CodeSheriff Inference Space
3
+
4
+ A lightweight Gradio app that loads the fine-tuned CodeBERT classifier
5
+ and exposes a /predict API endpoint. Called remotely by the Render backend.
6
+ """
7
+
8
+ import gradio as gr
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
11
+
12
+ MODEL_ID = "jayansh21/codesheriff-bug-classifier"
13
+ NUM_LABELS = 5
14
+ MAX_LENGTH = 512
15
+ LABEL_NAMES = {
16
+ 0: "Clean",
17
+ 1: "Null Reference Risk",
18
+ 2: "Type Mismatch",
19
+ 3: "Security Vulnerability",
20
+ 4: "Logic Flaw",
21
+ }
22
+
23
+ print("Loading CodeSheriff classifier …")
24
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
25
+ model = AutoModelForSequenceClassification.from_pretrained(
26
+ MODEL_ID, num_labels=NUM_LABELS
27
+ )
28
+ model.eval()
29
+ print("Model loaded βœ…")
30
+
31
+
32
+ def predict(code_snippet: str) -> dict:
33
+ """Classify a code snippet and return label, confidence, label_id."""
34
+ if not code_snippet or not code_snippet.strip():
35
+ return {"label": "Clean", "confidence": 0.0, "label_id": 0}
36
+
37
+ encoding = tokenizer(
38
+ code_snippet,
39
+ truncation=True,
40
+ padding="max_length",
41
+ max_length=MAX_LENGTH,
42
+ return_tensors="pt",
43
+ )
44
+ with torch.no_grad():
45
+ outputs = model(**encoding)
46
+
47
+ probs = torch.softmax(outputs.logits, dim=-1).squeeze(0)
48
+ label_id = int(torch.argmax(probs).item())
49
+ confidence = float(probs[label_id].item())
50
+
51
+ return {
52
+ "label": LABEL_NAMES.get(label_id, f"Unknown({label_id})"),
53
+ "confidence": round(confidence, 4),
54
+ "label_id": label_id,
55
+ }
56
+
57
+
58
+ demo = gr.Interface(
59
+ fn=predict,
60
+ inputs=gr.Textbox(label="Code Snippet", lines=5, placeholder="Paste code here …"),
61
+ outputs=gr.JSON(label="Classification"),
62
+ title="πŸ” CodeSheriff Bug Classifier",
63
+ description="Fine-tuned CodeBERT model for detecting common bug patterns.",
64
+ api_name="predict",
65
+ )
66
+
67
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio