Spaces:
Running
Running
| """ | |
| 𧬠Advanced AI Text Humanizer | |
| βββββββββββββββββββββββββββββ | |
| Multi-model ensemble humanization pipeline for Hugging Face Spaces. | |
| Uses state-of-the-art LLMs with multiple rewriting strategies, | |
| style transfer, readability optimization, and AI-detection evasion. | |
| Models Used (in ensemble pipeline): | |
| 1. meta-llama/Llama-3.3-70B-Instruct β Primary rewriter | |
| 2. mistralai/Mistral-7B-Instruct-v0.3 β Secondary rewriter | |
| 3. HuggingFaceH4/zephyr-7b-beta β Style transfer | |
| 4. facebook/bart-large-cnn β Paraphrase refinement | |
| 5. SentenceTransformers for similarity scoring | |
| Author: Advanced Humanizer Pipeline | |
| Space Hardware: GPU A100 (paid config) | |
| """ | |
| import gradio as gr | |
| # Temporary workaround for Gradio/Jinja template cache crash on Spaces | |
| try: | |
| from jinja2.utils import LRUCache as _LRUCache | |
| _orig_lru_get = _LRUCache.get | |
| def _safe_lru_get(self, key, default=None): | |
| try: | |
| return _orig_lru_get(self, key, default) | |
| except TypeError: | |
| return default | |
| _LRUCache.get = _safe_lru_get | |
| print("β jinja2 LRUCache patch applied") | |
| except Exception as e: | |
| print(f"β οΈ jinja2 patch not applied: {e}") | |
| import os | |
| import re | |
| import json | |
| import time | |
| import random | |
| import logging | |
| import hashlib | |
| import textwrap | |
| import difflib | |
| from typing import Optional, List, Dict, Tuple, Any | |
| from dataclasses import dataclass, field | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from collections import Counter | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| AutoModelForSeq2SeqLM, | |
| pipeline, | |
| TextGenerationPipeline, | |
| set_seed, | |
| ) | |
| from transformers.generation.utils import GenerationConfig | |
| from huggingface_hub import InferenceClient | |
| import requests | |
| from sentence_transformers import SentenceTransformer | |
| import nltk | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import stopwords | |
| from readability import Readability | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Download NLTK data | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| for nltk_resource in ["punkt", "punkt_tab", "stopwords", "averaged_perceptron_tagger"]: | |
| try: | |
| nltk.data.find(f"tokenizers/{nltk_resource}") | |
| except LookupError: | |
| nltk.download(nltk_resource, quiet=True) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Configuration | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| ) | |
| logger = logging.getLogger("humanizer") | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Data Classes | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class HumanizationResult: | |
| original: str | |
| humanized: str | |
| model_used: str | |
| mode: str | |
| changes_made: int | |
| similarity_score: float | |
| readability_before: Dict[str, float] | |
| readability_after: Dict[str, float] | |
| ai_probability_before: float | |
| ai_probability_after: float | |
| processing_time: float | |
| strategies_applied: List[str] | |
| word_count_before: int | |
| word_count_after: int | |
| perplexity_before: float | |
| perplexity_after: float | |
| class PipelineConfig: | |
| mode: str = "balanced" | |
| intensity: float = 0.5 | |
| preserve_meaning: bool = True | |
| add_imperfections: bool = True | |
| vary_sentence_length: bool = True | |
| add_transitions: bool = True | |
| remove_patterns: bool = True | |
| add_personal_touch: bool = True | |
| temperature: float = 0.7 | |
| top_p: float = 0.9 | |
| max_tokens: int = 2048 | |
| ensemble: bool = True | |
| use_all_models: bool = True | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Model Registry | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_REGISTRY = { | |
| "llama_3_70b": { | |
| "id": "meta-llama/Llama-3.3-70B-Instruct", | |
| "name": "Llama 3.3 70B Instruct", | |
| "type": "chat", | |
| "max_length": 8192, | |
| "description": "Primary powerhouse model for deep rewriting", | |
| }, | |
| "mistral_7b": { | |
| "id": "mistralai/Mistral-7B-Instruct-v0.3", | |
| "name": "Mistral 7B Instruct v0.3", | |
| "type": "chat", | |
| "max_length": 32768, | |
| "description": "Fast and creative secondary model", | |
| }, | |
| "zephyr_7b": { | |
| "id": "HuggingFaceH4/zephyr-7b-beta", | |
| "name": "Zephyr 7B Beta", | |
| "type": "chat", | |
| "max_length": 4096, | |
| "description": "Excellent style transfer capabilities", | |
| }, | |
| "phi_3_mini": { | |
| "id": "microsoft/Phi-3-mini-128k-instruct", | |
| "name": "Phi-3 Mini 128K", | |
| "type": "chat", | |
| "max_length": 128000, | |
| "description": "Lightweight model for quick passes", | |
| }, | |
| "bart_paraphrase": { | |
| "id": "facebook/bart-large-cnn", | |
| "name": "BART Large CNN", | |
| "type": "seq2seq", | |
| "max_length": 1024, | |
| "description": "Specialized paraphrasing model", | |
| }, | |
| "gemma_2_27b": { | |
| "id": "google/gemma-2-27b-it", | |
| "name": "Gemma 2 27B IT", | |
| "type": "chat", | |
| "max_length": 8192, | |
| "description": "Google's instruction-tuned model", | |
| }, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # AI Detection Model | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class AIDetector: | |
| """Estimates probability that text is AI-generated.""" | |
| def __init__(self): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Use a lightweight detector | |
| self.model_name = "roberta-base-openai-detector" | |
| try: | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, device_map="auto" | |
| ) | |
| self.loaded = True | |
| logger.info(f"AI Detector loaded: {self.model_name}") | |
| except Exception as e: | |
| logger.warning(f"AI Detector failed to load: {e}") | |
| self.loaded = False | |
| def detect(self, text: str) -> float: | |
| """Returns probability (0-1) that text is AI-generated.""" | |
| if not self.loaded or not text.strip(): | |
| return self._heuristic_detect(text) | |
| try: | |
| inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| logits = outputs.logits | |
| # Get probabilities for human (0) vs AI (1) | |
| probs = torch.softmax(logits[0], dim=-1) | |
| ai_prob = probs[0][1].item() if probs.shape[-1] > 1 else 0.5 | |
| return min(max(ai_prob, 0.0), 1.0) | |
| except Exception as e: | |
| logger.error(f"Detection error: {e}") | |
| return self._heuristic_detect(text) | |
| def _heuristic_detect(self, text: str) -> float: | |
| """Fallback heuristic AI detection.""" | |
| if not text.strip(): | |
| return 0.5 | |
| ai_indicators = [ | |
| r"\b(In conclusion|Furthermore|Moreover|Additionally|It's important to note|Delve|Tapestry|Testament|Landscape|Realm|Harness|Leverage)\b", | |
| r"\b(very|really|quite|extremely|significantly)\b", | |
| r"\b(as an AI|language model|I don't have|I cannot)\b", | |
| r"[.,]{2,}", | |
| r"\b(fist|second|third|finally|in summary)\b", | |
| ] | |
| sentences = sent_tokenize(text) | |
| score = 0.0 | |
| if len(sentences) > 0: | |
| avg_len = sum(len(s.split()) for s in sentences) / len(sentences) | |
| # AI tends to have very uniform sentence lengths | |
| if avg_len > 15 and avg_len < 25: | |
| score += 0.2 | |
| for pattern in ai_indicators: | |
| matches = len(re.findall(pattern, text, re.IGNORECASE)) | |
| score += matches * 0.1 | |
| # Check for low burstiness (uniform complexity) | |
| words = text.split() | |
| if len(words) > 10: | |
| word_lengths = [len(w) for w in words] | |
| variance = np.var(word_lengths) | |
| if variance < 3.0: | |
| score += 0.15 | |
| return min(max(score, 0.0), 1.0) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Readability Analyzer | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class ReadabilityAnalyzer: | |
| """Comprehensive readability analysis.""" | |
| def analyze(text: str) -> Dict[str, float]: | |
| if not text.strip(): | |
| return {} | |
| try: | |
| r = Readability(text) | |
| results = {} | |
| try: | |
| fm = r.flesch_michaud() | |
| results["flesch_reading_ease"] = fm.score | |
| results["grade_level"] = fm.grade_level | |
| except: | |
| pass | |
| try: | |
| fk = r.flesch_kincaid() | |
| results["flesch_kincaid_grade"] = fk.grade_level | |
| except: | |
| pass | |
| try: | |
| g = r.gunning_fog() | |
| results["gunning_fog"] = g.grade_level | |
| except: | |
| pass | |
| try: | |
| smog = r.smog() | |
| results["smog_index"] = smog.grade_level | |
| except: | |
| pass | |
| results["word_count"] = len(text.split()) | |
| results["sentence_count"] = len(sent_tokenize(text)) | |
| results["avg_words_per_sentence"] = ( | |
| results["word_count"] / max(results["sentence_count"], 1) | |
| ) | |
| results["avg_word_length"] = np.mean([len(w) for w in text.split()]) if text.split() else 0 | |
| # Burstiness (variation in sentence length) | |
| sent_lengths = [len(s.split()) for s in sent_tokenize(text)] | |
| if len(sent_lengths) > 1: | |
| results["burstiness"] = np.std(sent_lengths) | |
| results["perplexity"] = np.exp( | |
| -np.mean([np.log(max(l, 1)) for l in sent_lengths]) | |
| ) | |
| else: | |
| results["burstiness"] = 0 | |
| results["perplexity"] = 1 | |
| return results | |
| except Exception as e: | |
| logger.error(f"Readability analysis error: {e}") | |
| return {"error": str(e)} | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Similarity Scorer | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class SimilarityScorer: | |
| """Measures semantic similarity between texts.""" | |
| def __init__(self): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| try: | |
| self.model = SentenceTransformer( | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| device=self.device, | |
| ) | |
| self.loaded = True | |
| logger.info("Similarity scorer loaded") | |
| except Exception as e: | |
| logger.warning(f"Similarity scorer failed: {e}") | |
| self.loaded = False | |
| def score(self, text1: str, text2: str) -> float: | |
| if not self.loaded: | |
| return self._simple_similarity(text1, text2) | |
| try: | |
| embeddings = self.model.encode([text1, text2], convert_to_numpy=True) | |
| sim = float( | |
| np.dot(embeddings[0], embeddings[1]) | |
| / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])) | |
| ) | |
| return max(0.0, min(1.0, sim)) | |
| except Exception as e: | |
| logger.error(f"Similarity scoring error: {e}") | |
| return self._simple_similarity(text1, text2) | |
| def _simple_similarity(t1: str, t2: str) -> float: | |
| words1 = set(t1.lower().split()) | |
| words2 = set(t2.lower().split()) | |
| if not words1 or not words2: | |
| return 0.0 | |
| return len(words1 & words2) / len(words1 | words2) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Prompt Templates | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| PROMPT_TEMPLATES = { | |
| "casual": { | |
| "system": """You are an expert at rewriting AI-generated text to sound like it was written by a real, casual human. Your writing has these characteristics: | |
| - Uses contractions naturally (don't, can't, it's, I'm) | |
| - Varies sentence length significantly (some very short, some longer) | |
| - Occasionally starts sentences with "And", "But", "So" | |
| - Uses colloquial expressions and mild interjections | |
| - Has natural imperfections β not every sentence is grammatically perfect | |
| - Sounds conversational, like explaining something to a friend | |
| - Uses specific examples and personal-feeling language | |
| - Avoids overly formal transitions and academic phrasing | |
| - Writes with personality and occasional humor | |
| - Uses rhetorical questions naturally""", | |
| "user": """Rewrite the following text to sound completely human and casual. Make it sound like a real person wrote it naturally. Preserve the core meaning and information, but completely transform the style. | |
| RULES: | |
| 1. DO NOT use phrases like "In conclusion", "Furthermore", "Moreover", "Additionally", "It's important to note" | |
| 2. DO NOT use overly formal academic language | |
| 3. DO NOT make every sentence the same length | |
| 4. DO use contractions frequently | |
| 5. DO vary your sentence structure | |
| 6. DO add natural transitions that humans actually use | |
| 7. DO make it sound like someone speaking casually but intelligently | |
| Original text: | |
| {text}""", | |
| }, | |
| "professional": { | |
| "system": """You are an expert professional writer who makes AI text sound authentically human. Your professional writing: | |
| - Uses precise, industry-appropriate language without being robotic | |
| - Varies sentence structure and length naturally | |
| - Includes subtle personal insights and perspective | |
| - Uses professional but warm tone | |
| - Avoids clichΓ© AI phrases and patterns | |
| - Writes with authority but approachability | |
| - Uses specific data points and concrete examples | |
| - Has natural paragraph flow""", | |
| "user": """Rewrite the following text to sound like it was written by a seasoned professional in the field. Make it sound authentically human while maintaining professionalism. | |
| RULES: | |
| 1. Remove any robotic or template-sounding phrases | |
| 2. Add subtle professional personality | |
| 3. Use specific, concrete language | |
| 4. Vary sentence structure naturally | |
| 5. Maintain the core information and accuracy | |
| 6. Sound authoritative but approachable | |
| 7. Avoid AI-typical transition words | |
| Original text: | |
| {text}""", | |
| }, | |
| "creative": { | |
| "system": """You are a creative writer who excels at making text sound deeply human and engaging. Your writing: | |
| - Uses vivid imagery and sensory details | |
| - Employs metaphor and analogy naturally | |
| - Has strong narrative flow | |
| - Varies rhythm and pacing | |
| - Shows personality and voice | |
| - Uses creative sentence structures | |
| - Includes unexpected but fitting word choices | |
| - Feels alive and dynamic""", | |
| "user": """Transform the following text into something that reads like it was written by a talented creative human writer. Make it engaging, vivid, and full of personality while preserving the core message. | |
| RULES: | |
| 1. Add vivid imagery and sensory details where appropriate | |
| 2. Use metaphor and creative comparisons | |
| 3. Vary rhythm β mix short punchy sentences with longer flowing ones | |
| 4. Show, don't just tell | |
| 5. Make it emotionally engaging | |
| 6. Avoid any AI-sounding clichΓ©s | |
| 7. Write with unmistakable human voice and style | |
| Original text: | |
| {text}""", | |
| }, | |
| "academic": { | |
| "system": """You are an academic writer who makes scholarly text sound authentically human. Your academic writing: | |
| - Uses precise scholarly language without being mechanical | |
| - Shows genuine intellectual curiosity | |
| - Includes nuanced arguments and counterpoints | |
| - Uses natural academic transitions | |
| - Varies sentence complexity | |
| - Shows the author's analytical voice | |
| - Cites reasoning naturally | |
| - Avoids formulaic academic AI patterns""", | |
| "user": """Rewrite the following academic text to sound like it was written by a thoughtful human scholar. Make it sound like genuine intellectual writing, not AI-generated academic prose. | |
| RULES: | |
| 1. Remove formulaic academic AI phrases | |
| 2. Show genuine analytical thinking | |
| 3. Use natural scholarly transitions | |
| 4. Include nuanced perspectives | |
| 5. Vary sentence complexity naturally | |
| 6. Sound like a real academic with a distinct voice | |
| 7. Maintain academic rigor while sounding human | |
| Original text: | |
| {text}""", | |
| }, | |
| "balanced": { | |
| "system": """You are an expert at making AI-generated text sound completely human. You analyze the input text and rewrite it with these human characteristics: | |
| - Natural sentence variation (mix of short, medium, and long sentences) | |
| - Authentic voice and personality | |
| - Natural imperfections (occasional fragments, starting with conjunctions) | |
| - Realistic transitions (not formulaic) | |
| - Appropriate use of contractions | |
| - Specific and concrete language instead of vague generalizations | |
| - Natural paragraph structure | |
| - Human-like word choice and phrasing | |
| - Appropriate level of formality based on context""", | |
| "user": """Rewrite the following text to make it sound 100% human-written. The goal is to preserve all the original information and meaning while completely transforming how it reads β it should pass as authentic human writing. | |
| RULES: | |
| 1. NEVER use: "In conclusion", "Furthermore", "Moreover", "Additionally", "It's important to note", "Delve", "Tapestry", "Testament" | |
| 2. Vary sentence length significantly β include some very short sentences | |
| 3. Use contractions naturally | |
| 4. Add subtle personality and voice | |
| 5. Use specific, concrete language | |
| 6. Start some sentences with "And", "But", "So", "Because" | |
| 7. Make it read like a smart human wrote it naturally | |
| Original text: | |
| {text}""", | |
| }, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Text Analysis Utilities | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class TextAnalyzer: | |
| """Comprehensive text analysis utilities.""" | |
| def detect_ai_patterns(text: str) -> List[Dict[str, Any]]: | |
| """Detect common AI writing patterns.""" | |
| patterns = [] | |
| ai_phrases = [ | |
| "in conclusion", "furthermore", "moreover", "additionally", | |
| "it's important to note", "it is important to note", | |
| "delve into", "delve deep", "tapestry", "testament to", | |
| "in the realm of", "in today's world", "in today's digital", | |
| "ever-evolving", "rapidly evolving", "fast-paced", | |
| "harness the power", "leverage", "utilize", | |
| "a testament to", "a rich tapestry", "navigate the landscape", | |
| "foster a sense", "fosters a deeper", "pivotal role", | |
| "shed light on", "play a crucial role", "plays a vital role", | |
| "it's worth noting", "it is worth noting", | |
| "notably", "crucially", "significantly", | |
| "in essence", "in summary", "to summarize", | |
| ] | |
| text_lower = text.lower() | |
| for phrase in ai_phrases: | |
| if phrase in text_lower: | |
| patterns.append({ | |
| "type": "ai_phrase", | |
| "phrase": phrase, | |
| "severity": "medium", | |
| }) | |
| # Check for overly uniform sentence lengths | |
| sentences = sent_tokenize(text) | |
| if len(sentences) > 3: | |
| lengths = [len(s.split()) for s in sentences] | |
| std_dev = np.std(lengths) | |
| if std_dev < 3: | |
| patterns.append({ | |
| "type": "uniform_sentences", | |
| "detail": f"Low sentence length variation (std={std_dev:.1f})", | |
| "severity": "high", | |
| }) | |
| # Check for lack of contractions | |
| contraction_count = len(re.findall(r"\b\w+'\w+\b", text)) | |
| word_count = len(text.split()) | |
| if word_count > 50 and contraction_count < 3: | |
| patterns.append({ | |
| "type": "no_contractions", | |
| "detail": f"Only {contraction_count} contractions in {word_count} words", | |
| "severity": "medium", | |
| }) | |
| # Check for repetitive sentence starters | |
| starters = [s.split()[0].lower() if s.split() else "" for s in sentences] | |
| starter_counts = Counter(starters) | |
| for starter, count in starter_counts.items(): | |
| if count > len(sentences) * 0.3 and len(starter) > 2: | |
| patterns.append({ | |
| "type": "repetitive_start", | |
| "detail": f"'{starter}' starts {count}/{len(sentences)} sentences", | |
| "severity": "medium", | |
| }) | |
| return patterns | |
| def get_diff_html(original: str, humanized: str) -> str: | |
| """Generate HTML diff showing changes.""" | |
| orig_words = original.split() | |
| human_words = humanized.split() | |
| matcher = difflib.SequenceMatcher(None, orig_words, human_words) | |
| html = [] | |
| for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
| if tag == "equal": | |
| html.extend(orig_words[i1:i2]) | |
| elif tag == "replace": | |
| html.append('<span style="background:#ffcccc;text-decoration:line-through">' | |
| + " ".join(orig_words[i1:i2]) + "</span>") | |
| html.append('<span style="background:#ccffcc">' | |
| + " ".join(human_words[j1:j2]) + "</span>") | |
| elif tag == "delete": | |
| html.append('<span style="background:#ffcccc;text-decoration:line-through">' | |
| + " ".join(orig_words[i1:i2]) + "</span>") | |
| elif tag == "insert": | |
| html.append('<span style="background:#ccffcc">' | |
| + " ".join(human_words[j1:j2]) + "</span>") | |
| return " ".join(html) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Model Manager | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class ModelManager: | |
| """Manages loading and inference of all models.""" | |
| def __init__(self): | |
| self.models = {} | |
| self.pipelines = {} | |
| self.tokenizers = {} | |
| self.loaded = False | |
| def load_models(self, model_keys: Optional[List[str]] = None): | |
| """Load specified models into memory.""" | |
| if model_keys is None: | |
| model_keys = ["llama_3_70b", "mistral_7b", "bart_paraphrase"] | |
| for key in model_keys: | |
| if key not in MODEL_REGISTRY: | |
| continue | |
| model_info = MODEL_REGISTRY[key] | |
| try: | |
| logger.info(f"Loading model: {model_info['name']}...") | |
| if model_info["type"] == "chat": | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_info["id"], | |
| token=HF_TOKEN, | |
| trust_remote_code=True, | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_info["id"], | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| token=HF_TOKEN, | |
| trust_remote_code=True, | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| self.models[key] = model | |
| self.tokenizers[key] = tokenizer | |
| self.pipelines[key] = pipe | |
| elif model_info["type"] == "seq2seq": | |
| tokenizer = AutoTokenizer.from_pretrained(model_info["id"]) | |
| model = AutoModelForSeq2SeqLM.from_pretrained( | |
| model_info["id"], | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| pipe = pipeline( | |
| "text2text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| self.models[key] = model | |
| self.tokenizers[key] = tokenizer | |
| self.pipelines[key] = pipe | |
| logger.info(f"β Loaded: {model_info['name']}") | |
| except Exception as e: | |
| logger.error(f"β Failed to load {model_info['name']}: {e}") | |
| # Try HF Inference API as fallback | |
| try: | |
| client = InferenceClient( | |
| model=model_info["id"], | |
| token=HF_TOKEN, | |
| ) | |
| self.pipelines[key] = client | |
| logger.info(f"β Using Inference API for: {model_info['name']}") | |
| except Exception as e2: | |
| logger.error(f"β Inference API also failed for {key}: {e2}") | |
| self.loaded = True | |
| logger.info(f"Model loading complete. Loaded: {list(self.pipelines.keys())}") | |
| def generate( | |
| self, | |
| model_key: str, | |
| prompt: str, | |
| system_prompt: str = "", | |
| temperature: float = 0.7, | |
| top_p: float = 0.9, | |
| max_tokens: int = 2048, | |
| ) -> str: | |
| """Generate text using specified model.""" | |
| if model_key not in self.pipelines: | |
| logger.error(f"Model {model_key} not loaded") | |
| return "" | |
| pipe = self.pipelines[model_key] | |
| model_info = MODEL_REGISTRY.get(model_key, {}) | |
| try: | |
| if model_info.get("type") == "chat" or isinstance(pipe, TextGenerationPipeline): | |
| messages = [] | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.append({"role": "user", "content": prompt}) | |
| result = pipe( | |
| messages, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True, | |
| return_full_text=False, | |
| ) | |
| if isinstance(result, list): | |
| output = result[0]["generated_text"] | |
| if isinstance(output, str): | |
| return output.strip() | |
| elif isinstance(output, list): | |
| return output[-1].get("content", "").strip() | |
| elif isinstance(result, dict): | |
| output = result.get("generated_text", "") | |
| if isinstance(output, str): | |
| return output.strip() | |
| elif isinstance(pipe, InferenceClient): | |
| messages = [] | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.append({"role": "user", "content": prompt}) | |
| response = pipe.chat_completion( | |
| messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| else: | |
| # Seq2seq pipeline | |
| result = pipe( | |
| prompt, | |
| max_length=min(max_tokens + len(prompt.split()), 1024), | |
| temperature=temperature, | |
| do_sample=True, | |
| ) | |
| if isinstance(result, list) and len(result) > 0: | |
| return result[0]["generated_text"].strip() | |
| except Exception as e: | |
| logger.error(f"Generation error with {model_key}: {e}") | |
| return "" | |
| return "" | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Humanization Engine | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class HumanizationEngine: | |
| """Core humanization engine with multi-model ensemble.""" | |
| def __init__(self): | |
| self.model_manager = ModelManager() | |
| self.ai_detector = AIDetector() | |
| self.readability = ReadabilityAnalyzer() | |
| self.similarity = SimilarityScorer() | |
| self.analyzer = TextAnalyzer() | |
| self.initialized = False | |
| def initialize(self): | |
| """Initialize all components.""" | |
| if self.initialized: | |
| return | |
| logger.info("Initializing Humanization Engine...") | |
| self.model_manager.load_models() | |
| self.initialized = True | |
| logger.info("β Engine initialized") | |
| def humanize( | |
| self, | |
| text: str, | |
| config: PipelineConfig, | |
| ) -> HumanizationResult: | |
| """Main humanization pipeline.""" | |
| start_time = time.time() | |
| strategies = [] | |
| # Pre-analysis | |
| ai_prob_before = self.ai_detector.detect(text) | |
| readability_before = self.readability.analyze(text) | |
| word_count_before = len(text.split()) | |
| ai_patterns = self.analyzer.detect_ai_patterns(text) | |
| # Get appropriate prompt template | |
| mode = config.mode | |
| if mode not in PROMPT_TEMPLATES: | |
| mode = "balanced" | |
| template = PROMPT_TEMPLATES[mode] | |
| # Apply pre-processing transformations | |
| processed_text = text | |
| if config.remove_patterns: | |
| processed_text = self._remove_ai_patterns(processed_text, strategies) | |
| if config.add_imperfections: | |
| processed_text = self._add_human_imperfections(processed_text, strategies) | |
| # Primary model generation | |
| prompt = template["user"].format(text=processed_text) | |
| system_prompt = template["system"] | |
| primary_model = "llama_3_70b" if "llama_3_70b" in self.model_manager.pipelines else ( | |
| "gemma_2_27b" if "gemma_2_27b" in self.model_manager.pipelines else | |
| "mistral_7b" if "mistral_7b" in self.model_manager.pipelines else | |
| list(self.model_manager.pipelines.keys())[0] if self.model_manager.pipelines else None | |
| ) | |
| if primary_model is None: | |
| raise RuntimeError("No models available for humanization") | |
| humanized_text = self.model_manager.generate( | |
| model_key=primary_model, | |
| prompt=prompt, | |
| system_prompt=system_prompt, | |
| temperature=config.temperature, | |
| top_p=config.top_p, | |
| max_tokens=config.max_tokens, | |
| ) | |
| if not humanized_text: | |
| raise RuntimeError("Model returned empty output") | |
| strategies.append(f"primary_rewrite_{primary_model}") | |
| # Ensemble: secondary model refinement | |
| if config.ensemble and len(self.model_manager.pipelines) > 1: | |
| secondary_models = [ | |
| k for k in self.model_manager.pipelines.keys() | |
| if k != primary_model | |
| ][:2] # Use up to 2 secondary models | |
| for sec_model in secondary_models: | |
| refine_prompt = f"""Take this text and make it sound even MORE human. Add natural flow, vary sentence rhythm, and ensure it reads like authentic human writing. Don't change the meaning. | |
| Text to refine: | |
| {humanized_text[:3000]}""" | |
| refined = self.model_manager.generate( | |
| model_key=sec_model, | |
| prompt=refine_prompt, | |
| system_prompt="You are an expert editor who makes text sound deeply human. Your edits are subtle but transformative.", | |
| temperature=config.temperature * 0.8, | |
| top_p=config.top_p, | |
| max_tokens=config.max_tokens, | |
| ) | |
| if refined and len(refined) > len(humanized_text) * 0.5: | |
| # Compare quality - choose the better one | |
| ai_prob_refined = self.ai_detector.detect(refined) | |
| ai_prob_current = self.ai_detector.detect(humanized_text) | |
| if ai_prob_refined < ai_prob_current: | |
| humanized_text = refined | |
| strategies.append(f"ensemble_refined_{sec_model}") | |
| else: | |
| strategies.append(f"ensemble_attempted_{sec_model}") | |
| # Post-processing | |
| if config.vary_sentence_length: | |
| humanized_text = self._vary_sentence_structure(humanized_text, strategies) | |
| if config.add_transitions: | |
| humanized_text = self._improve_transitions(humanized_text, strategies) | |
| if config.add_personal_touch: | |
| humanized_text = self._add_personal_elements(humanized_text, strategies) | |
| # Post-analysis | |
| ai_prob_after = self.ai_detector.detect(humanized_text) | |
| readability_after = self.readability.analyze(humanized_text) | |
| word_count_after = len(humanized_text.split()) | |
| similarity_score = self.similarity.score(text, humanized_text) | |
| processing_time = time.time() - start_time | |
| # Count changes | |
| orig_words = set(text.lower().split()) | |
| new_words = set(humanized_text.lower().split()) | |
| changes = len(orig_words.symmetric_difference(new_words)) | |
| return HumanizationResult( | |
| original=text, | |
| humanized=humanized_text, | |
| model_used=primary_model, | |
| mode=mode, | |
| changes_made=changes, | |
| similarity_score=similarity_score, | |
| readability_before=readability_before, | |
| readability_after=readability_after, | |
| ai_probability_before=ai_prob_before, | |
| ai_probability_after=ai_prob_after, | |
| processing_time=processing_time, | |
| strategies_applied=strategies, | |
| word_count_before=word_count_before, | |
| word_count_after=word_count_after, | |
| perplexity_before=readability_before.get("perplexity", 0), | |
| perplexity_after=readability_after.get("perplexity", 0), | |
| ) | |
| def _remove_ai_patterns(self, text: str, strategies: List[str]) -> str: | |
| """Remove common AI writing patterns.""" | |
| replacements = { | |
| r"\bIn conclusion\b": "So", | |
| r"\bFurthermore\b": "Plus", | |
| r"\bMoreover\b": "Also", | |
| r"\bAdditionally\b": "On top of that", | |
| r"\bIt's important to note\b": "Keep in mind", | |
| r"\bIt is important to note\b": "Keep in mind", | |
| r"\bdelve into\b": "look into", | |
| r"\bdelve deep\b": "dig into", | |
| r"\btapestry\b": "mix", | |
| r"\btestament to\b": "shows", | |
| r"\bin the realm of\b": "in", | |
| r"\bin today's world\b": "these days", | |
| r"\bever-evolving\b": "changing", | |
| r"\brapidly evolving\b": "fast-changing", | |
| r"\bharness the power of\b": "use", | |
| r"\bleverage\b": "use", | |
| r"\butilize\b": "use", | |
| r"\bpivotal role\b": "big role", | |
| r"\bshed light on\b": "explain", | |
| r"\bfoster a sense of\b": "create", | |
| r"\bin essence\b": "Basically", | |
| r"\bin summary\b": "To wrap up", | |
| } | |
| for pattern, replacement in replacements.items(): | |
| if re.search(pattern, text, re.IGNORECASE): | |
| text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) | |
| strategies.append(f"replaced_ai_pattern_{pattern}") | |
| return text | |
| def _add_human_imperfections(self, text: str, strategies: List[str]) -> str: | |
| """Add subtle human imperfections.""" | |
| sentences = sent_tokenize(text) | |
| if len(sentences) < 2: | |
| return text | |
| # Occasionally start sentences with conjunctions | |
| conjunctions = ["And", "But", "So", "Because", "Though"] | |
| for i, sent in enumerate(sentences): | |
| if i > 0 and random.random() < 0.15: | |
| conj = random.choice(conjunctions) | |
| sentences[i] = sent[0].lower() if sent else sent | |
| sentences[i] = f"{conj} {sentences[i]}" | |
| text = " ".join(sentences) | |
| strategies.append("added_conjunction_starts") | |
| return text | |
| def _vary_sentence_structure(self, text: str, strategies: List[str]) -> str: | |
| """Vary sentence structure for more natural flow.""" | |
| sentences = sent_tokenize(text) | |
| if len(sentences) < 3: | |
| return text | |
| new_sentences = [] | |
| for sent in sentences: | |
| words = sent.split() | |
| if len(words) > 25 and random.random() < 0.4: | |
| # Split long sentences | |
| mid = len(words) // 2 | |
| # Find a good split point | |
| for i in range(mid - 5, mid + 5): | |
| if i > 0 and i < len(words) and words[i] in [",", "and", "but", "which", "that", "where", "when"]: | |
| part1 = " ".join(words[:i + 1]) | |
| part2 = " ".join(words[i + 1:]) | |
| if part2: | |
| part2 = part2[0].upper() + part2[1:] | |
| new_sentences.append(part1.strip(" ,")) | |
| new_sentences.append(part2.strip()) | |
| break | |
| else: | |
| new_sentences.append(sent) | |
| else: | |
| new_sentences.append(sent) | |
| text = " ".join(new_sentences) | |
| strategies.append("varied_sentence_structure") | |
| return text | |
| def _improve_transitions(self, text: str, strategies: List[str]) -> str: | |
| """Improve transitions between ideas.""" | |
| human_transitions = [ | |
| "Here's the thing:", "The thing is,", "Look,", | |
| "Honestly,", "Real talk,", "Here's what I mean:", | |
| "What this means is:", "Put simply,", "The way I see it,", | |
| "At the end of the day,", "When you think about it,", | |
| ] | |
| sentences = sent_tokenize(text) | |
| if len(sentences) < 4: | |
| return text | |
| # Add a transition at ~30% mark | |
| insert_pos = len(sentences) // 3 | |
| if insert_pos > 0 and insert_pos < len(sentences): | |
| transition = random.choice(human_transitions) | |
| sentences[insert_pos] = f"{transition} {sentences[insert_pos][0].lower() + sentences[insert_pos][1:] if sentences[insert_pos] else sentences[insert_pos]}" | |
| text = " ".join(sentences) | |
| strategies.append("improved_transitions") | |
| return text | |
| def _add_personal_elements(self, text: str, strategies: List[str]) -> str: | |
| """Add personal-feeling elements.""" | |
| personal_phrases = [ | |
| "I've found that", "From my experience,", "I think", | |
| "It seems like", "I'd say", "If you ask me,", | |
| "In my view,", "What I've noticed is", | |
| ] | |
| sentences = sent_tokenize(text) | |
| if len(sentences) < 3: | |
| return text | |
| # Add personal phrase at beginning of second paragraph | |
| if len(sentences) > 4: | |
| insert_pos = min(4, len(sentences) - 1) | |
| phrase = random.choice(personal_phrases) | |
| sentences[insert_pos] = f"{phrase} {sentences[insert_pos][0].lower() + sentences[insert_pos][1:] if sentences[insert_pos] else sentences[insert_pos]}" | |
| text = " ".join(sentences) | |
| strategies.append("added_personal_elements") | |
| return text | |
| def batch_humanize( | |
| self, | |
| texts: List[str], | |
| config: PipelineConfig, | |
| progress=gr.Progress(), | |
| ) -> List[HumanizationResult]: | |
| """Process multiple texts.""" | |
| results = [] | |
| for i, text in enumerate(texts): | |
| progress((i + 1) / len(texts), desc=f"Processing {i + 1}/{len(texts)}") | |
| try: | |
| result = self.humanize(text, config) | |
| results.append(result) | |
| except Exception as e: | |
| logger.error(f"Error processing text {i}: {e}") | |
| results.append(HumanizationResult( | |
| original=text, | |
| humanized=f"[Error: {str(e)}]", | |
| model_used="error", | |
| mode=config.mode, | |
| changes_made=0, | |
| similarity_score=0, | |
| readability_before={}, | |
| readability_after={}, | |
| ai_probability_before=0, | |
| ai_probability_after=0, | |
| processing_time=0, | |
| strategies_applied=[], | |
| word_count_before=len(text.split()), | |
| word_count_after=0, | |
| perplexity_before=0, | |
| perplexity_after=0, | |
| )) | |
| return results | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI Builder | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class HumanizerApp: | |
| """Gradio application for the humanizer.""" | |
| def __init__(self): | |
| self.engine = HumanizationEngine() | |
| self.theme = self._build_theme() | |
| def _build_theme(): | |
| """Build custom Gradio theme.""" | |
| from gradio.themes import Base, Default | |
| theme = Default( | |
| primary_hue="emerald", | |
| secondary_hue="blue", | |
| font=gr.themes.GoogleFont("Inter"), | |
| ) | |
| return theme | |
| def build_interface(self) -> gr.Blocks: | |
| """Build the complete Gradio interface.""" | |
| with gr.Blocks( | |
| theme=self.theme, | |
| title="𧬠Advanced AI Text Humanizer", | |
| css=self._get_custom_css(), | |
| ) as app: | |
| gr.Markdown(""" | |
| # 𧬠Advanced AI Text Humanizer | |
| ### Transform AI-generated text into authentic human writing using multi-model ensemble | |
| **Powered by:** Llama 3.3 70B β’ Mistral 7B β’ Gemma 2 27B β’ Zephyr 7B β’ BART | |
| """) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Single Text ββ | |
| with gr.Tab("π Single Text"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_text = gr.Textbox( | |
| label="π Input Text", | |
| placeholder="Paste your AI-generated text here...", | |
| lines=12, | |
| max_lines=50, | |
| ) | |
| with gr.Row(): | |
| humanize_btn = gr.Button( | |
| "β¨ Humanize Text", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| with gr.Row(): | |
| mode = gr.Dropdown( | |
| choices=[ | |
| ("π― Balanced", "balanced"), | |
| ("π Casual", "casual"), | |
| ("πΌ Professional", "professional"), | |
| ("π¨ Creative", "creative"), | |
| ("π Academic", "academic"), | |
| ], | |
| value="balanced", | |
| label="Writing Mode", | |
| ) | |
| intensity = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.1, | |
| label="Intensity", | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature", | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.9, | |
| step=0.05, | |
| label="Top-P", | |
| ) | |
| with gr.Row(): | |
| ensemble = gr.Checkbox( | |
| value=True, | |
| label="π Ensemble Mode", | |
| ) | |
| preserve_meaning = gr.Checkbox( | |
| value=True, | |
| label="π― Preserve Meaning", | |
| ) | |
| with gr.Row(): | |
| add_imperfections = gr.Checkbox( | |
| value=True, | |
| label="β¨ Add Imperfections", | |
| ) | |
| vary_sentence_length = gr.Checkbox( | |
| value=True, | |
| label="π Vary Sentence Length", | |
| ) | |
| with gr.Row(): | |
| add_transitions = gr.Checkbox( | |
| value=True, | |
| label="π Add Transitions", | |
| ) | |
| add_personal_touch = gr.Checkbox( | |
| value=True, | |
| label="π Add Personal Touch", | |
| ) | |
| with gr.Column(scale=1): | |
| output_text = gr.Textbox( | |
| label="π Humanized Output", | |
| placeholder="Humanized text will appear here...", | |
| lines=12, | |
| max_lines=50, | |
| ) | |
| with gr.Row(): | |
| copy_btn = gr.Button("π Copy", variant="secondary") | |
| download_btn = gr.Button("πΎ Download", variant="secondary") | |
| # ββ Tab 2: Batch Processing ββ | |
| with gr.Tab("π¦ Batch Processing"): | |
| gr.Markdown("### Process multiple texts at once") | |
| batch_input = gr.Textbox( | |
| label="π Input Texts (one per line, separated by ---)", | |
| placeholder="Text 1...\n---\nText 2...\n---\nText 3...", | |
| lines=15, | |
| max_lines=100, | |
| ) | |
| batch_btn = gr.Button("π Batch Humanize", variant="primary", size="lg") | |
| batch_output = gr.Dataframe( | |
| headers=["Original", "Humanized", "AI Score Before", "AI Score After", "Similarity"], | |
| label="Results", | |
| ) | |
| # ββ Tab 3: Analysis Dashboard ββ | |
| with gr.Tab("π Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| analysis_input = gr.Textbox( | |
| label="π Text to Analyze", | |
| lines=8, | |
| ) | |
| analyze_btn = gr.Button("π Analyze", variant="primary") | |
| with gr.Column(): | |
| ai_score_gauge = gr.Number( | |
| label="AI Probability Score", | |
| ) | |
| readability_output = gr.JSON(label="Readability Metrics") | |
| # ββ Tab 4: Comparison View ββ | |
| with gr.Tab("π Side-by-Side Comparison"): | |
| compare_input = gr.Textbox( | |
| label="π Input Text", | |
| lines=8, | |
| ) | |
| compare_btn = gr.Button("π Compare", variant="primary") | |
| with gr.Row(): | |
| compare_original = gr.Textbox( | |
| label="Original", | |
| lines=12, | |
| ) | |
| compare_humanized = gr.Textbox( | |
| label="Humanized", | |
| lines=12, | |
| ) | |
| diff_output = gr.HTML(label="π Diff View") | |
| # ββ Results Panel (shared) ββ | |
| with gr.Accordion("π Detailed Results", open=True): | |
| with gr.Row(): | |
| with gr.Column(): | |
| stats_json = gr.JSON(label="π Processing Statistics") | |
| with gr.Column(): | |
| ai_reduction = gr.Plot(label="π AI Detection Reduction") | |
| strategies_output = gr.Textbox( | |
| label="π οΈ Strategies Applied", | |
| lines=3, | |
| ) | |
| # ββ Footer ββ | |
| gr.Markdown(""" | |
| --- | |
| ### π‘ Tips for Best Results | |
| - **Balanced mode** works great for most use cases | |
| - **Higher intensity** = more aggressive rewriting | |
| - **Ensemble mode** uses multiple models for best quality | |
| - For short texts (<100 words), try **Casual** or **Creative** mode | |
| - For long texts (>500 words), use **Professional** or **Academic** mode | |
| - Adjust **Temperature** for more/less creative output | |
| """) | |
| # ββ Event Handlers ββ | |
| humanize_btn.click( | |
| fn=self._handle_humanize, | |
| inputs=[ | |
| input_text, mode, intensity, temperature, top_p, | |
| ensemble, preserve_meaning, add_imperfections, | |
| vary_sentence_length, add_transitions, add_personal_touch, | |
| ], | |
| outputs=[output_text, stats_json, strategies_output], | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("", "", {}), | |
| inputs=[], | |
| outputs=[input_text, output_text, stats_json], | |
| ) | |
| batch_btn.click( | |
| fn=self._handle_batch, | |
| inputs=[batch_input, mode, intensity, temperature, top_p, ensemble], | |
| outputs=[batch_output], | |
| ) | |
| analyze_btn.click( | |
| fn=self._handle_analyze, | |
| inputs=[analysis_input], | |
| outputs=[ai_score_gauge, readability_output], | |
| ) | |
| compare_btn.click( | |
| fn=self._handle_compare, | |
| inputs=[compare_input, mode, intensity, temperature, top_p, ensemble], | |
| outputs=[compare_original, compare_humanized, diff_output, stats_json], | |
| ) | |
| copy_btn.click( | |
| fn=self._copy_text, | |
| inputs=[output_text], | |
| outputs=[], | |
| ) | |
| download_btn.click( | |
| fn=self._download_text, | |
| inputs=[output_text], | |
| outputs=[], | |
| ) | |
| return app | |
| def _build_config(self, mode, intensity, temperature, top_p, ensemble, | |
| preserve_meaning, add_imperfections, vary_sentence_length, | |
| add_transitions, add_personal_touch) -> PipelineConfig: | |
| """Build PipelineConfig from UI inputs.""" | |
| return PipelineConfig( | |
| mode=mode, | |
| intensity=intensity, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ensemble=ensemble, | |
| preserve_meaning=preserve_meaning, | |
| add_imperfections=add_imperfections, | |
| vary_sentence_length=vary_sentence_length, | |
| add_transitions=add_transitions, | |
| add_personal_touch=add_personal_touch, | |
| max_tokens=int(intensity * 2048) + 512, | |
| ) | |
| def _handle_humanize(self, text, mode, intensity, temperature, top_p, | |
| ensemble, preserve_meaning, add_imperfections, | |
| vary_sentence_length, add_transitions, add_personal_touch): | |
| self.engine.initialize() | |
| if not text.strip(): | |
| return "Please enter some text to humanize.", {}, "" | |
| config = self._build_config( | |
| mode, intensity, temperature, top_p, ensemble, | |
| preserve_meaning, add_imperfections, vary_sentence_length, | |
| add_transitions, add_personal_touch, | |
| ) | |
| result = self.engine.humanize(text, config) | |
| stats = { | |
| "π€ Model Used": MODEL_REGISTRY.get(result.model_used, {}).get("name", result.model_used), | |
| "π Mode": result.mode, | |
| "β±οΈ Processing Time": f"{result.processing_time:.2f}s", | |
| "π Word Count": f"{result.word_count_before} β {result.word_count_after}", | |
| "π Changes Made": result.changes_made, | |
| "π― Semantic Similarity": f"{result.similarity_score:.1%}", | |
| "π€ AI Score Before": f"{result.ai_probability_before:.1%}", | |
| "π€ AI Score After": f"{result.ai_probability_after:.1%}", | |
| "π AI Reduction": f"{(result.ai_probability_before - result.ai_probability_after):.1%}", | |
| "π Avg Words/Sentence (Before)": f"{result.readability_before.get('avg_words_per_sentence', 0):.1f}", | |
| "π Avg Words/Sentence (After)": f"{result.readability_after.get('avg_words_per_sentence', 0):.1f}", | |
| "π Burstiness (After)": f"{result.readability_after.get('burstiness', 0):.1f}", | |
| } | |
| strategies = "\n".join(f"β {s}" for s in result.strategies_applied) | |
| return result.humanized, stats, strategies | |
| def _handle_batch(self, batch_input, mode, intensity, temperature, top_p, ensemble): | |
| self.engine.initialize() | |
| texts = [t.strip() for t in batch_input.split("---") if t.strip()] | |
| if not texts: | |
| texts = [line.strip() for line in batch_input.strip().split("\n") if line.strip()] | |
| if not texts: | |
| return [["No input provided"]] | |
| config = self._build_config( | |
| mode, intensity, temperature, top_p, ensemble, | |
| True, True, True, True, True, | |
| ) | |
| results = self.engine.batch_humanize(texts, config) | |
| table = [] | |
| for r in results: | |
| table.append([ | |
| r.original[:200] + "..." if len(r.original) > 200 else r.original, | |
| r.humanized[:200] + "..." if len(r.humanized) > 200 else r.humanized, | |
| f"{r.ai_probability_before:.1%}", | |
| f"{r.ai_probability_after:.1%}", | |
| f"{r.similarity_score:.1%}", | |
| ]) | |
| return table | |
| def _handle_analyze(self, text): | |
| self.engine.initialize() | |
| ai_score = self.engine.ai_detector.detect(text) | |
| readability = self.engine.readability.analyze(text) | |
| return ai_score, readability | |
| def _handle_compare(self, text, mode, intensity, temperature, top_p, ensemble): | |
| self.engine.initialize() | |
| config = self._build_config( | |
| mode, intensity, temperature, top_p, ensemble, | |
| True, True, True, True, True, | |
| ) | |
| result = self.engine.humanize(text, config) | |
| diff_html = self.engine.analyzer.get_diff_html(result.original, result.humanized) | |
| stats = { | |
| "π€ Model": MODEL_REGISTRY.get(result.model_used, {}).get("name", ""), | |
| "β±οΈ Time": f"{result.processing_time:.2f}s", | |
| "π Words": f"{result.word_count_before} β {result.word_count_after}", | |
| "π€ AI Score": f"{result.ai_probability_before:.1%} β {result.ai_probability_after:.1%}", | |
| } | |
| return result.original, result.humanized, diff_html, stats | |
| def _copy_text(self, text): | |
| """Copy text to clipboard (client-side handled via JS).""" | |
| return None | |
| def _download_text(self, text): | |
| """Download text as file.""" | |
| return None | |
| def _get_custom_css(): | |
| """Custom CSS for the app.""" | |
| return """ | |
| .gradio-container { | |
| max-width: 1400px !important; | |
| } | |
| .main-text textarea { | |
| font-size: 15px !important; | |
| line-height: 1.6 !important; | |
| } | |
| #diff-view { | |
| font-family: 'Inter', sans-serif; | |
| font-size: 14px; | |
| line-height: 1.8; | |
| padding: 20px; | |
| background: #f8f9fa; | |
| border-radius: 8px; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 15px; | |
| border-radius: 10px; | |
| text-align: center; | |
| } | |
| .footer { | |
| text-align: center; | |
| padding: 20px; | |
| color: #666; | |
| font-size: 12px; | |
| } | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Launch | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| logger.info("Starting DeepHumanizer...") | |
| app = HumanizerApp() | |
| interface = app.build_interface() | |
| interface.queue(default_concurrency_limit=2).launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("PORT", 7860)), | |
| share=True, | |
| show_error=True, | |
| ) | |
| if __name__ == "__main__": | |
| main() |