Kishoreuses5 commited on
Commit
7e042dd
·
verified ·
1 Parent(s): b1a8eda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoTokenizer, AutoModel
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ # Load CodeT5p embedding model
8
+ model_name = "Salesforce/codet5p-110m-embedding"
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModel.from_pretrained(model_name)
12
+
13
+ def get_embedding(code):
14
+ inputs = tokenizer(
15
+ code,
16
+ return_tensors="pt",
17
+ truncation=True,
18
+ max_length=512
19
+ )
20
+ with torch.no_grad():
21
+ outputs = model(**inputs)
22
+ embedding = outputs.last_hidden_state.mean(dim=1)
23
+ return embedding.numpy()
24
+
25
+ student_codes = [] # stored embeddings
26
+ raw_codes = [] # original text
27
+
28
+ def analyze(code):
29
+ global student_codes, raw_codes
30
+
31
+ emb = get_embedding(code)
32
+
33
+ result = ""
34
+
35
+ if len(student_codes) == 0:
36
+ result += "First submission stored. No comparison yet.\n"
37
+ else:
38
+ all_embeddings = np.vstack(student_codes)
39
+ sims = cosine_similarity(emb, all_embeddings)[0]
40
+
41
+ max_sim = float(np.max(sims))
42
+ idx = int(np.argmax(sims))
43
+
44
+ result += f"Most similar previous submission score: {max_sim:.3f}\n"
45
+ result += f"Most similar code index: {idx}\n\n"
46
+
47
+ if max_sim > 0.9:
48
+ result += "⚠ Very high similarity — likely same approach / plagiarism\n"
49
+ elif max_sim > 0.7:
50
+ result += "🔁 Same structure / same algorithm\n"
51
+ elif max_sim > 0.5:
52
+ result += "🟡 Partially similar approach\n"
53
+ else:
54
+ result += "🟢 Unique solution style\n"
55
+
56
+ student_codes.append(emb)
57
+ raw_codes.append(code)
58
+
59
+ return result
60
+
61
+ gr.Interface(
62
+ fn=analyze,
63
+ inputs=gr.Textbox(lines=10, label="Student Python Code"),
64
+ outputs=gr.Textbox(lines=12, label="Code Similarity / Approach Analysis"),
65
+ title="CodeT5p — Code Similarity & Approach Clustering"
66
+ ).launch()