Spaces:
Build error
Build error
Commit
·
9ac8247
1
Parent(s):
00e9766
added classifier implementation and associated files
Browse files- app.py +45 -4
- cr_tokenizer.json +0 -0
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -1,7 +1,48 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tensorflow as tf
|
| 4 |
+
from tokenizers import Tokenizer
|
| 5 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| 6 |
|
| 7 |
+
# Load trained tokenizer and model
|
| 8 |
+
tokenizer = Tokenizer.from_file("cr_tokenizer.json")
|
| 9 |
+
model = tf.keras.models.load_model("crv3.keras")
|
| 10 |
|
| 11 |
+
# Tokenization function
|
| 12 |
+
def tokenize_java_code(code: str, max_length=100):
|
| 13 |
+
"""Tokenizes and pads Java code for model input."""
|
| 14 |
+
encoded = tokenizer.encode(code).ids
|
| 15 |
+
padded_sequence = pad_sequences([encoded], maxlen=max_length, padding="post")[0]
|
| 16 |
+
return np.array(padded_sequence).reshape(1, -1) # Ensure correct shape for model
|
| 17 |
+
|
| 18 |
+
# Prediction function
|
| 19 |
+
def classify_code(input_text, input_file):
|
| 20 |
+
"""Classifies Java code readability based on user input."""
|
| 21 |
+
# Load Java file if provided
|
| 22 |
+
if input_file is not None:
|
| 23 |
+
code = input_file.read().decode("utf-8") # Read Java file as text
|
| 24 |
+
else:
|
| 25 |
+
code = input_text # Use text input
|
| 26 |
+
|
| 27 |
+
if not code.strip(): # Ensure input is not empty
|
| 28 |
+
return "Please provide a Java code snippet."
|
| 29 |
+
|
| 30 |
+
# Tokenize and predict
|
| 31 |
+
tokenized_code = tokenize_java_code(code)
|
| 32 |
+
prediction = model.predict(tokenized_code)[0][0]
|
| 33 |
+
|
| 34 |
+
# Convert to readable/unreadable
|
| 35 |
+
return "Readable" if prediction > 0.5 else "Unreadable"
|
| 36 |
+
|
| 37 |
+
# Create Gradio interface
|
| 38 |
+
gr.Interface(
|
| 39 |
+
fn=classify_code,
|
| 40 |
+
inputs=[
|
| 41 |
+
gr.Textbox(lines=10, placeholder="Paste Java code here...", label="Java Code Snippet"),
|
| 42 |
+
gr.File(type="binary", label="Upload Java File (.java)")
|
| 43 |
+
],
|
| 44 |
+
outputs=gr.Text(label="Readability Prediction"),
|
| 45 |
+
title="Java Code Readability Classifier",
|
| 46 |
+
description="Upload a Java file or paste a Java code snippet to check if it's readable or unreadable.",
|
| 47 |
+
allow_flagging="never"
|
| 48 |
+
).launch()
|
cr_tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
tensorflow
|
| 3 |
+
tokenizers
|