Spaces:

tyfsadik
/

DeepHumanizer

Running

App Files Files Community

tyfsadik commited on 14 days ago

Commit

1702db7

verified ·

1 Parent(s): b2bb7ef

Create app.py

Browse files

Files changed (1) hide show

app.py +425 -0

app.py ADDED Viewed

	@@ -0,0 +1,425 @@

+# app.py - Advanced Deep Humanizer for Hugging Face Spaces
+# Optimized for A100/H100 GPUs - Premium Configuration
+import gradio as gr
+import torch
+import random
+import re
+import json
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    pipeline,
+    BitsAndBytesConfig
+)
+from typing import List, Dict, Tuple
+import numpy as np
+from dataclasses import dataclass
+import spaces  # Hugging Face Spaces utility for GPU management
+@dataclass
+class HumanizationConfig:
+    temperature: float = 0.8
+    top_p: float = 0.92
+    repetition_penalty: float = 1.15
+    max_length: int = 4096
+    style_intensity: str = "medium"  # light, medium, aggressive
+    preserve_meaning: bool = True
+    add_imperfections: bool = True
+    burstiness_factor: float = 0.3  # Variation in sentence length
+    perplexity_target: float = 25.0  # Human text usually 15-30
+class DeepHumanizer:
+    def __init__(self):
+        self.model_id = "meta-llama/Llama-3.3-70B-Instruct"  # Premium model
+        # Alternative: "Qwen/Qwen2.5-72B-Instruct" or "deepseek-ai/DeepSeek-V3"
+        self.tokenizer = None
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.initialize_model()
+    def initialize_model(self):
+        """Initialize 70B model with 4-bit quantization for single A100 80GB"""
+        print(f"Initializing {self.model_id} on {self.device}...")
+        # 4-bit quantization config for 70B on single A100 80GB
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_id,
+            trust_remote_code=True,
+            padding_side="left"
+        )
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Load model with acceleration
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_id,
+            quantization_config=quantization_config,
+            device_map="auto",
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
+            attn_implementation="flash_attention_2"  # Speed optimization
+        )
+        self.model.eval()
+        print("Model loaded successfully")
+    def calculate_perplexity(self, text: str) -> float:
+        """Calculate perplexity score (lower is more predictable/AI-like)"""
+        encodings = self.tokenizer(text, return_tensors="pt")
+        input_ids = encodings.input_ids.to(self.device)
+        with torch.no_grad():
+            outputs = self.model(input_ids, labels=input_ids)
+            loss = outputs.loss
+        perplexity = torch.exp(loss).item()
+        return perplexity
+    def analyze_text_patterns(self, text: str) -> Dict:
+        """Analyze writing patterns to identify AI characteristics"""
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        words = text.split()
+        # Calculate burstiness (variation in sentence length)
+        if len(sentences) > 1:
+            sent_lengths = [len(s.split()) for s in sentences]
+            burstiness = np.std(sent_lengths) / (np.mean(sent_lengths) + 1e-8)
+        else:
+            burstiness = 0
+        # Common AI patterns
+        ai_patterns = [
+            r'\b(delve|leverage|utilize|facilitate|optimize)\b',
+            r'\b(In conclusion|Furthermore|Moreover|Additionally)\b',
+            r'\b(It is important to note that|It should be noted that)\b',
+            r'(\b\w+\b)\s+\1',  # Repetition
+        ]
+        pattern_matches = sum(len(re.findall(p, text, re.I)) for p in ai_patterns)
+        return {
+            "burstiness": burstiness,
+            "avg_sentence_length": np.mean([len(s.split()) for s in sentences]) if sentences else 0,
+            "ai_markers": pattern_matches,
+            "formality_score": self._estimate_formality(text)
+        }
+    def _estimate_formality(self, text: str) -> float:
+        """Estimate formality level 0-1"""
+        formal_words = r'\b(therefore|thus|hence|consequently|furthermore|moreover|nevertheless)\b'
+        informal_words = r'\b(so|but|anyway|actually|basically|like|you know)\b'
+        formal_count = len(re.findall(formal_words, text, re.I))
+        informal_count = len(re.findall(informal_words, text, re.I))
+        total = formal_count + informal_count
+        if total == 0:
+            return 0.5
+        return formal_count / total
+    def generate_humanization_prompt(self, text: str, config: HumanizationConfig,
+                                    style: str, analysis: Dict) -> str:
+        """Generate sophisticated system prompt based on analysis"""
+        imperfections_guide = ""
+        if config.add_imperfections:
+            imperfections_guide = """
+            - Include natural imperfections: occasional fragments, starting sentences with conjunctions (But, And, So)
+            - Vary punctuation usage naturally (em-dashes, occasional ellipses...)
+            - Add conversational fillers where appropriate (well, actually, you know what I mean)
+            - Break formal structure with rhetorical questions or personal asides
+            """
+        style_prompts = {
+            "casual": "Make it sound like a knowledgeable friend explaining over coffee. Use contractions, everyday vocabulary, personal anecdotes potential.",
+            "professional": "Keep it business-appropriate but warm. Like a smart colleague in a Slack message—not too stiff, not too loose.",
+            "academic": "Scholarly but accessible. Reduce robotic transitions but keep the rigor. Like a passionate professor speaking, not writing a textbook.",
+            "creative": "Vivid, varied sentence structures, rhythmic flow. Occasional metaphors, emotional undertones, unpredictable phrasing.",
+            "reddit": "Authentic internet voice. Like a high-karma r/depthhub or r/explainlikeimfive comment. Informative but colloquial.",
+            "twitter": "Sharp, punchy, tweet-thread style. Short sentences mixed with longer explanatory ones. Personality-forward."
+        }
+        style_instruction = style_prompts.get(style, style_prompts["casual"])
+        # Adjust based on detected patterns
+        if analysis["ai_markers"] > 3:
+            de_ai_instruction = "CRITICAL: Remove all AI-signaling phrases (delve, leverage, moreover, it is important to note). "
+        else:
+            de_ai_instruction = ""
+        prompt = f"""<|im_start|>system
+You are an elite linguistic surgeon specializing in humanization of AI-generated text. Your task is to transform robotic, predictable text into authentic human writing that bypasses AI detection through natural variation and cognitive authenticity.
+{style_instruction}
+{de_ai_instruction}{imperfections_guide}
+TECHNICAL REQUIREMENTS:
+- Target perplexity: {config.perplexity_target} (human range)
+- Burstiness factor: Inject {int(config.burstiness_factor * 100)}% variation in sentence length
+- Maintain core meaning: {config.preserve_meaning}
+- Output ONLY the rewritten text, no explanations, no markdown code blocks
+HUMANIZATION LAYERS:
+1. Lexical variation: Replace generic AI terms with context-specific vocabulary
+2. Syntactic diversity: Mix simple, compound, complex sentences irregularly
+3. Semantic noise: Add slight ambiguity or subjective framing where appropriate
+4. Pragmatic markers: Include hesitation, self-correction, natural flow disruptions
+5. Cognitive fingerprint: Inject personal stance or mild opinion<|im_end|>
+<|im_start|>user
+Transform this text into deeply human writing:
+{text}<|im_end|>
+<|im_start|>assistant"""
+        return prompt
+    @spaces.GPU(duration=120)  # HF Spaces GPU decorator
+    def humanize(self, text: str, style: str = "casual", intensity: str = "medium",
+                 creativity: float = 0.8, add_typos: bool = False,
+                 target_reading_level: str = "default") -> Tuple[str, Dict]:
+        """
+        Main humanization pipeline with multi-step refinement
+        """
+        config = HumanizationConfig(
+            temperature=creativity,
+            style_intensity=intensity,
+            add_imperfections=intensity in ["medium", "aggressive"]
+        )
+        # Step 1: Analysis
+        analysis = self.analyze_text_patterns(text)
+        # Step 2: Initial rewrite
+        prompt = self.generate_humanization_prompt(text, config, style, analysis)
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=len(text.split()) * 3,  # Generous buffer
+                temperature=config.temperature,
+                top_p=config.top_p,
+                repetition_penalty=config.repetition_penalty,
+                do_sample=True,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id,
+            )
+        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract only the assistant's response
+        humanized = decoded.split("assistant")[-1].strip()
+        # Step 3: Post-processing based on intensity
+        if intensity == "aggressive":
+            humanized = self._inject_aggressive_variation(humanized)
+        elif intensity == "light":
+            humanized = self._light_touch(humanized)
+        # Step 4: Optional imperfections
+        if add_typos and intensity == "aggressive":
+            humanized = self._add_natural_typos(humanized)
+        # Step 5: Metrics calculation
+        final_analysis = {
+            "original_perplexity": round(self.calculate_perplexity(text), 2),
+            "humanized_perplexity": round(self.calculate_perplexity(humanized), 2),
+            "burstiness_change": round(self.analyze_text_patterns(humanized)["burstiness"] - analysis["burstiness"], 2),
+            "human_score": self._calculate_human_score(humanized),
+            "processing_style": style,
+            "intensity": intensity
+        }
+        return humanized, final_analysis
+    def _inject_aggressive_variation(self, text: str) -> str:
+        """Add high-level human variation"""
+        # Randomly combine sentences with conjunctions
+        text = re.sub(r'\.\s+([A-Z])', lambda m: f", and {m.group(1).lower()}" if random.random() > 0.7 else f". {m.group(1)}", text)
+        # Add occasional fragments
+        sentences = text.split('. ')
+        if len(sentences) > 3 and random.random() > 0.5:
+            idx = random.randint(1, len(sentences)-2)
+            sentences[idx] = sentences[idx].split(',')[0]  # Make first part a fragment
+        return '. '.join(sentences)
+    def _light_touch(self, text: str) -> str:
+        """Minimal changes, just polish"""
+        # Remove common AI transitions
+        text = re.sub(r'\b(In conclusion|To summarize|Overall),\s*', '', text, flags=re.I)
+        return text
+    def _add_natural_typos(self, text: str) -> str:
+        """Add believable human typos (use sparingly)"""
+        # Very subtle: duplicate letters occasionally
+        words = text.split()
+        for i in range(len(words)):
+            if random.random() > 0.98 and len(words[i]) > 4:
+                words[i] = words[i][:2] + words[i][1] + words[i][2:]
+        return ' '.join(words)
+    def _calculate_human_score(self, text: str) -> int:
+        """Estimate likelihood of passing as human 0-100"""
+        score = 70  # Base
+        # Check for AI markers
+        ai_markers = len(re.findall(r'\b(leverage|delve|utilize|facilitate|optimize)\b', text, re.I))
+        score -= ai_markers * 5
+        # Check variation
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+        if len(sentences) > 1:
+            lengths = [len(s) for s in sentences]
+            variation = np.std(lengths) / np.mean(lengths)
+            if variation > 0.3:  # Good burstiness
+                score += 15
+        # Check contractions
+        if len(re.findall(r"\b\w+'\w+\b", text)) > 0:
+            score += 10
+        return min(100, max(0, score))
+# Initialize singleton
+humanizer = DeepHumanizer()
+# Gradio Interface
+def process_text(text, style, intensity, creativity, add_imperfections, comparison_mode):
+    if not text.strip():
+        return "", {}, ""
+    humanized, metrics = humanizer.humanize(
+        text=text,
+        style=style,
+        intensity=intensity,
+        creativity=creativity,
+        add_typos=(add_imperfections == "Aggressive")
+    )
+    # Format metrics display
+    metrics_md = f"""
+    ### 📊 Analysis Results
+    | Metric | Value | Status |
+    |--------|-------|--------|
+    | **Human Likelihood Score** | {metrics['human_score']}/100 | {'🟢 Human' if metrics['human_score'] > 80 else '🟡 Unclear' if metrics['human_score'] > 60 else '🔵 AI'} |
+    | **Perplexity Change** | {metrics['original_perplexity']} → {metrics['humanized_perplexity']} | {'🟢 Good Variation' if metrics['humanized_perplexity'] > metrics['original_perplexity'] else '⚠️ Check needed'} |
+    | **Burstiness Delta** | +{metrics['burstiness_change']:.2f} | {'🟢 Natural Flow' if metrics['burstiness_change'] > 0 else '⚠️ Monotonous'} |
+    """
+    if comparison_mode:
+        comparison = f"""
+**Original ({len(text.split())} words):**
+{text[:500]}{'...' if len(text) > 500 else ''}
+---
+**Humanized ({len(humanized.split())} words):**
+{humanized}
+"""
+        return humanized, metrics_md, comparison
+    return humanized, metrics_md, ""
+# Custom CSS for premium feel
+css = """
+.gradio-container {
+    font-family: 'Inter', sans-serif;
+}
+.metric-card {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 8px;
+    padding: 16px;
+    color: white;
+}
+"""
+with gr.Blocks(css=css, title="Deep Humanizer Pro", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🧠 Deep Humanizer Pro
+    ### Advanced AI-to-Human Text Transformation using Llama 3.3 70B
+    *Elite-grade humanization with linguistic analysis and adversarial pattern disruption*
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_text = gr.Textbox(
+                label="Input Text (AI-generated)",
+                placeholder="Paste your AI-generated content here...",
+                lines=10
+            )
+            with gr.Row():
+                style = gr.Dropdown(
+                    choices=["casual", "professional", "academic", "creative", "reddit", "twitter"],
+                    value="casual",
+                    label="Voice Style"
+                )
+                intensity = gr.Radio(
+                    choices=["light", "medium", "aggressive"],
+                    value="medium",
+                    label="Humanization Intensity"
+                )
+            with gr.Row():
+                creativity = gr.Slider(
+                    minimum=0.1, maximum=1.0, value=0.8, step=0.1,
+                    label="Creativity (Temperature)"
+                )
+                add_imperfections = gr.Checkbox(
+                    label="Add Natural Imperfections",
+                    value=True
+                )
+            comparison_mode = gr.Checkbox(
+                label="Show Side-by-Side Comparison",
+                value=False
+            )
+            submit_btn = gr.Button("🚀 Humanize Text", variant="primary")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="Humanized Output",
+                lines=10,
+                show_copy_button=True
+            )
+            metrics_display = gr.Markdown()
+            comparison_display = gr.Markdown()
+    # Examples
+    gr.Examples(
+        examples=[
+            ["Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving.", "casual", "medium"],
+            ["In conclusion, it is important to note that leveraging cutting-edge technologies can facilitate optimal outcomes for stakeholders.", "professional", "aggressive"],
+        ],
+        inputs=[input_text, style, intensity],
+        label="Try these examples"
+    )
+    submit_btn.click(
+        fn=process_text,
+        inputs=[input_text, style, intensity, creativity, add_imperfections, comparison_mode],
+        outputs=[output_text, metrics_display, comparison_display]
+    )
+    gr.Markdown("""
+    ### 🛠️ Technical Specifications
+    - **Model**: Llama 3.3 70B Instruct (4-bit quantized)
+    - **Architecture**: Flash Attention 2 + Gradient Checkpointing
+    - **Analysis**: Perplexity scoring, burstiness calculation, AI marker detection
+    - **GPU**: Optimized for A100/H100 (80GB VRAM)
+    """)
+if __name__ == "__main__":
+    demo.launch()