|
|
from fastapi import FastAPI, HTTPException |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from pydantic import BaseModel, Field |
|
|
from typing import Literal, Optional |
|
|
from llama_cpp import Llama |
|
|
import re |
|
|
|
|
|
|
|
|
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF" |
|
|
MODEL_FILE = "Phi-3.1-mini-4k-instruct-IQ2_M.gguf" |
|
|
|
|
|
print("🚀 Loading Phi-3.1 Mini (Human Authorship Restorer)...") |
|
|
llm = Llama.from_pretrained( |
|
|
repo_id=MODEL_REPO, |
|
|
filename=MODEL_FILE, |
|
|
n_threads=4, |
|
|
n_ctx=1024, |
|
|
n_batch=128, |
|
|
n_gpu_layers=0, |
|
|
verbose=False, |
|
|
) |
|
|
print("✅ Model loaded") |
|
|
|
|
|
|
|
|
app = FastAPI(title="AI Humanizer – Author Voice Restorer") |
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
class HumanizeRequest(BaseModel): |
|
|
text: str = Field(..., min_length=1, max_length=3000) |
|
|
section: Literal[ |
|
|
"abstract", |
|
|
"introduction", |
|
|
"methodology", |
|
|
"results", |
|
|
"discussion" |
|
|
] |
|
|
author_notes: Optional[str] = None |
|
|
|
|
|
|
|
|
SECTION_GUIDANCE = { |
|
|
"abstract": |
|
|
"Write concisely and densely. Maintain objective academic tone.", |
|
|
"introduction": |
|
|
"Provide context and motivation. Sound like a researcher framing a problem.", |
|
|
"methodology": |
|
|
"Be procedural, precise, and restrained. No persuasive language.", |
|
|
"results": |
|
|
"Be cautious, observational, and factual. Avoid strong claims.", |
|
|
"discussion": |
|
|
"Be interpretive and reflective. Explain implications carefully." |
|
|
} |
|
|
|
|
|
|
|
|
def clean_output(text: str) -> str: |
|
|
text = re.sub(r'<\|.*?\|>', '', text) |
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
return text.strip() |
|
|
|
|
|
|
|
|
def fallback_humanize(text: str) -> str: |
|
|
replacements = [ |
|
|
("utilize", "use"), |
|
|
("commence", "start"), |
|
|
("approximately", "about"), |
|
|
("therefore", "so"), |
|
|
("however", "but"), |
|
|
("in order to", "to"), |
|
|
("due to the fact that", "because"), |
|
|
("prior to", "before"), |
|
|
("subsequent to", "after"), |
|
|
] |
|
|
|
|
|
result = text |
|
|
for formal, simple in replacements: |
|
|
result = re.sub(formal, simple, result, flags=re.IGNORECASE) |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
def build_prompt(text: str, section: str, author_notes: Optional[str]) -> str: |
|
|
guidance = SECTION_GUIDANCE.get(section, "Use formal academic tone.") |
|
|
|
|
|
notes_block = ( |
|
|
f"\nAuthor context (do NOT invent new reasoning):\n{author_notes}\n" |
|
|
if author_notes else "" |
|
|
) |
|
|
|
|
|
return f""" |
|
|
You are an academic writing assistant. |
|
|
|
|
|
GOAL: |
|
|
Restore natural human authorship signals while preserving formal academic language. |
|
|
|
|
|
NON-NEGOTIABLE RULES: |
|
|
- Preserve all technical meaning, claims, numbers, and citations |
|
|
- Do NOT add new information |
|
|
- Do NOT remove uncertainty |
|
|
- Do NOT invent justifications |
|
|
- Do NOT change terminology |
|
|
- Do NOT introduce grammar or punctuation errors |
|
|
|
|
|
SECTION GUIDANCE: |
|
|
{guidance} |
|
|
|
|
|
HUMANIZATION RULES: |
|
|
- Vary sentence rhythm naturally (short / medium / long) |
|
|
- Reorder clauses where appropriate |
|
|
- Reduce overused academic fillers (e.g., "Moreover", "Furthermore") |
|
|
- Prefer implicit transitions |
|
|
- Preserve author intent and constraints |
|
|
|
|
|
{notes_block} |
|
|
|
|
|
TEXT: |
|
|
{text} |
|
|
|
|
|
OUTPUT: |
|
|
Return ONLY the revised text. |
|
|
""".strip() |
|
|
|
|
|
|
|
|
@app.post("/api/humanize") |
|
|
async def humanize(req: HumanizeRequest): |
|
|
text = req.text.strip() |
|
|
|
|
|
prompt = build_prompt(text, req.section, req.author_notes) |
|
|
|
|
|
try: |
|
|
output = llm( |
|
|
prompt, |
|
|
max_tokens=400, |
|
|
temperature=0.4, |
|
|
top_p=0.9, |
|
|
top_k=40, |
|
|
stop=["<|user|>", "<|end|>"], |
|
|
echo=False, |
|
|
) |
|
|
|
|
|
raw = output["choices"][0]["text"] |
|
|
cleaned = clean_output(raw) |
|
|
|
|
|
if not cleaned or cleaned.lower() == text.lower(): |
|
|
cleaned = fallback_humanize(text) |
|
|
|
|
|
return { |
|
|
"original": text, |
|
|
"section": req.section, |
|
|
"humanized": cleaned, |
|
|
"success": True |
|
|
} |
|
|
|
|
|
except Exception as e: |
|
|
print("❌ Inference error:", e) |
|
|
return { |
|
|
"original": text, |
|
|
"section": req.section, |
|
|
"humanized": fallback_humanize(text), |
|
|
"success": False |
|
|
} |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def health(): |
|
|
return { |
|
|
"status": "ok", |
|
|
"model": MODEL_FILE, |
|
|
"endpoint": "/api/humanize" |
|
|
} |
|
|
|