from flask import Flask, request, jsonify, render_template_string from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM import torch from langdetect import detect import re import logging # Silence Flask dev server logs logging.getLogger("werkzeug").setLevel(logging.ERROR) app = Flask(__name__) # ====================================================== # Grammar correction model (NO Java, CPU only) # ====================================================== GRAMMAR_MODEL = "vennify/t5-base-grammar-correction" grammar_tokenizer = AutoTokenizer.from_pretrained(GRAMMAR_MODEL) grammar_model = AutoModelForSeq2SeqLM.from_pretrained(GRAMMAR_MODEL) grammar_model.eval() def grammar_check(text): if not text.strip(): return text inputs = grammar_tokenizer( "grammar: " + text, return_tensors="pt", truncation=True ) with torch.no_grad(): outputs = grammar_model.generate( **inputs, max_length=256 ) return grammar_tokenizer.decode( outputs[0], skip_special_tokens=True ) # ====================================================== # Text generation model (CPU) # ====================================================== CHAT_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" chat_tokenizer = AutoTokenizer.from_pretrained(CHAT_MODEL) chat_model = AutoModelForCausalLM.from_pretrained( CHAT_MODEL, dtype=torch.float32 ) chat_model.eval() def generate_text(prompt): if not prompt.strip(): return "" inputs = chat_tokenizer( prompt, return_tensors="pt" ) with torch.no_grad(): output = chat_model.generate( **inputs, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9 ) return chat_tokenizer.decode( output[0], skip_special_tokens=True ) # ====================================================== # Helpers # ====================================================== def detect_language(text): try: return detect(text) except: return "unknown" def ai_text_detector(text): words = text.split() if len(words) < 30: return "Not enough text to analyze" avg_len = sum(len(w) for w in words) / len(words) repetition = len(set(words)) / len(words) score = 0 if avg_len > 5.5: score += 1 if repetition < 0.55: score += 1 if re.search( r"\b(in conclusion|overall|moreover|furthermore)\b", text.lower() ): score += 1 return "Likely AI-generated" if score >= 2 else "Likely human-written" # ====================================================== # UI (HTML + CSS + JS) # ====================================================== HTML_PAGE = """