import json import random from pathlib import Path import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline # Constants MIN_WORDS = 50 MAX_WORDS = 500 SAMPLE_JSON_PATH = Path('samples.json') # Load models def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) return pipeline('text-classification', model=model, tokenizer=tokenizer, truncation=True, max_length=512, top_k=4) classifier = load_model("ziadmostafa/MGT-Detection_deberta-base") # Load sample essays with open(SAMPLE_JSON_PATH, 'r') as f: demo_essays = json.load(f) # Global variable to store the current essay index current_essay_index = None TEXT_CLASS_MAPPING = { 'LABEL_0': 'Human-Written', 'LABEL_2': 'Machine-Generated' } def process_result(text): result = classifier(text)[0] labels = [TEXT_CLASS_MAPPING[x['label']] for x in result if x['label'] in TEXT_CLASS_MAPPING] scores = list(np.array([x['score'] for x in result if x['label'] in TEXT_CLASS_MAPPING])) final_results = dict(zip(labels, scores)) # Return only the label with the highest score return max(final_results, key=final_results.get) def update_result(name): if name == '': return "" return process_result(name) def active_button(input_text): if not (50 <= len(input_text.split()) <= 500): return gr.Button("Check Origin", variant="primary", interactive=False) return gr.Button("Check Origin", variant="primary", interactive=True) def clear_inputs(): return "", gr.Button("Check Origin", variant="primary", interactive=False) def count_words(text): return f'{len(text.split())}/500 words (Minimum 50 words)' css = """ body, .gradio-container { font-family: Arial, sans-serif; } .gr-input, .gr-textarea { } .class-intro { padding: 15px; margin-bottom: 20px; border-radius: 5px; } .class-intro h2 { margin-top: 0; } .class-intro p { margin-bottom: 5px; } """ class_intro_html = """
Human-Written: Original text created by humans.
Machine-Generated: Text created by AI from basic prompts, without style instructions.