aabdoo234's picture
Update app.py
7d05134 verified
raw
history blame
4.06 kB
import gradio as gr
import numpy as np
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.utils import to_categorical
import json
# Load the pre-trained model and tokenizer
model = load_model("code_language_cnn.keras")
with open("tokenizer.json", "r") as f:
tokenizer_data = f.read()
tokenizer = tokenizer_from_json(tokenizer_data)
max_sequence_length = 500
languages = ["C", "C++", "JAVA", "Python"]
try:
with open("feedback.json", "r") as f:
feedback_data = json.load(f)
except FileNotFoundError:
feedback_data = []
def predict_language(code_snippet):
seq = tokenizer.texts_to_sequences([code_snippet])
padded_seq = pad_sequences(seq, maxlen=max_sequence_length, padding='post', truncating='post')
predictions = model.predict(padded_seq)[0]
confidence_scores = {languages[i]: f"{predictions[i] * 100:.2f}%" for i in range(len(languages))}
predicted_language = languages[np.argmax(predictions)]
return predicted_language, confidence_scores
def provide_feedback(code_snippet, predicted_language, feedback, correct_language=None):
global feedback_data
feedback_entry = {
"code": code_snippet,
"predicted_language": predicted_language,
"feedback": feedback,
"correct_language": correct_language if feedback == "Incorrect" else predicted_language
}
feedback_data.append(feedback_entry)
# Save feedback to file
with open("feedback.json", "w") as f:
json.dump(feedback_data, f, indent=4)
if feedback == "Incorrect":
retrain_model(code_snippet, correct_language)
return "Thank you for your feedback!"
def retrain_model():
global model
# Prepare the feedback data (new training data)
if feedback_data.count("Incorrect") < 10: # Minimum 10 incorrect feedbacks required to retrain
return
feedback_texts = [entry["code"] for entry in feedback_data]
feedback_labels = [entry["correct_language"] for entry in feedback_data]
# Tokenize and pad the new data
seq = tokenizer.texts_to_sequences(feedback_texts)
padded_seq = pad_sequences(seq, maxlen=max_sequence_length, padding='post', truncating='post')
# Convert labels to categorical (one-hot encoding)
labels = [languages.index(lang) for lang in feedback_labels]
labels = to_categorical(labels, num_classes=len(languages))
# Retrain the model
model.fit(padded_seq, labels, epochs=2, batch_size=32, verbose=1)
feedback_data = [] # Clear the feedback data after retraining
# Save the retrained model
# model.save("code_language_cnn_retrained.keras")
print("Model retrained")
# Define Gradio components
def interface_func(code_snippet):
predicted_language, confidence_scores = predict_language(code_snippet)
return predicted_language, confidence_scores
# Build Gradio interface
with gr.Blocks() as demo:
gr.Markdown("### Programming Language Detection with Feedback")
code_input = gr.Textbox(label="Enter Code Snippet")
predict_button = gr.Button("Predict")
predicted_label = gr.Label(label="Predicted Language")
confidence_output = gr.JSON(label="Confidence Scores")
feedback_dropdown = gr.Radio(["Correct", "Incorrect"], label="Was the prediction correct?")
correct_language_dropdown = gr.Dropdown(languages, label="If incorrect, select the correct language (optional)")
feedback_button = gr.Button("Submit Feedback")
feedback_message = gr.Label(label="Feedback Status")
# Prediction workflow
predict_button.click(
interface_func,
inputs=[code_input],
outputs=[predicted_label, confidence_output]
)
# Feedback workflow
feedback_button.click(
provide_feedback,
inputs=[code_input, predicted_label, feedback_dropdown, correct_language_dropdown],
outputs=[feedback_message]
)
# Launch the interface
demo.launch(share=True)