Spaces:

viskav
/

format

Sleeping

File size: 4,911 Bytes

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import Literal, Optional
from llama_cpp import Llama
import re

# ==================== MODEL CONFIG ====================
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
MODEL_FILE = "Phi-3.1-mini-4k-instruct-IQ2_M.gguf"

print("🚀 Loading Phi-3.1 Mini (Human Authorship Restorer)...")
llm = Llama.from_pretrained(
    repo_id=MODEL_REPO,
    filename=MODEL_FILE,
    n_threads=4,
    n_ctx=1024,      # safer for HF Spaces
    n_batch=128,
    n_gpu_layers=0,
    verbose=False,
)
print("✅ Model loaded")

# ==================== FASTAPI ====================
app = FastAPI(title="AI Humanizer – Author Voice Restorer")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ==================== REQUEST ====================
class HumanizeRequest(BaseModel):
    text: str = Field(..., min_length=1, max_length=3000)
    section: Literal[
        "abstract",
        "introduction",
        "methodology",
        "results",
        "discussion"
    ]
    author_notes: Optional[str] = None

# ==================== SECTION-AWARE STYLE ====================
SECTION_GUIDANCE = {
    "abstract":
        "Write concisely and densely. Maintain objective academic tone.",
    "introduction":
        "Provide context and motivation. Sound like a researcher framing a problem.",
    "methodology":
        "Be procedural, precise, and restrained. No persuasive language.",
    "results":
        "Be cautious, observational, and factual. Avoid strong claims.",
    "discussion":
        "Be interpretive and reflective. Explain implications carefully."
}

# ==================== OUTPUT CLEANER ====================
def clean_output(text: str) -> str:
    text = re.sub(r'<\|.*?\|>', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# ==================== FALLBACK (UNCHANGED, SAFE) ====================
def fallback_humanize(text: str) -> str:
    replacements = [
        ("utilize", "use"),
        ("commence", "start"),
        ("approximately", "about"),
        ("therefore", "so"),
        ("however", "but"),
        ("in order to", "to"),
        ("due to the fact that", "because"),
        ("prior to", "before"),
        ("subsequent to", "after"),
    ]

    result = text
    for formal, simple in replacements:
        result = re.sub(formal, simple, result, flags=re.IGNORECASE)

    return result

# ==================== PROMPT BUILDER ====================
def build_prompt(text: str, section: str, author_notes: Optional[str]) -> str:
    guidance = SECTION_GUIDANCE.get(section, "Use formal academic tone.")

    notes_block = (
        f"\nAuthor context (do NOT invent new reasoning):\n{author_notes}\n"
        if author_notes else ""
    )

    return f"""
You are an academic writing assistant.

GOAL:
Restore natural human authorship signals while preserving formal academic language.

NON-NEGOTIABLE RULES:
- Preserve all technical meaning, claims, numbers, and citations
- Do NOT add new information
- Do NOT remove uncertainty
- Do NOT invent justifications
- Do NOT change terminology
- Do NOT introduce grammar or punctuation errors

SECTION GUIDANCE:
{guidance}

HUMANIZATION RULES:
- Vary sentence rhythm naturally (short / medium / long)
- Reorder clauses where appropriate
- Reduce overused academic fillers (e.g., "Moreover", "Furthermore")
- Prefer implicit transitions
- Preserve author intent and constraints

{notes_block}

TEXT:
{text}

OUTPUT:
Return ONLY the revised text.
""".strip()

# ==================== ENDPOINT ====================
@app.post("/api/humanize")
async def humanize(req: HumanizeRequest):
    text = req.text.strip()

    prompt = build_prompt(text, req.section, req.author_notes)

    try:
        output = llm(
            prompt,
            max_tokens=400,
            temperature=0.4,   # controlled, academic-safe
            top_p=0.9,
            top_k=40,
            stop=["<|user|>", "<|end|>"],
            echo=False,
        )

        raw = output["choices"][0]["text"]
        cleaned = clean_output(raw)

        if not cleaned or cleaned.lower() == text.lower():
            cleaned = fallback_humanize(text)

        return {
            "original": text,
            "section": req.section,
            "humanized": cleaned,
            "success": True
        }

    except Exception as e:
        print("❌ Inference error:", e)
        return {
            "original": text,
            "section": req.section,
            "humanized": fallback_humanize(text),
            "success": False
        }

# ==================== HEALTH ====================
@app.get("/")
def health():
    return {
        "status": "ok",
        "model": MODEL_FILE,
        "endpoint": "/api/humanize"
    }