File size: 4,911 Bytes
25b6780 77438f0 901db22 77438f0 8bccb6d 77438f0 f60b4ac 77513d0 77438f0 442962e 77438f0 442962e 77438f0 6fbb945 77438f0 f60b4ac 77438f0 f60b4ac 73f597e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 77438f0 b1d2d1e 442962e 77438f0 442962e 77438f0 442962e 77438f0 442962e 9b56add 77438f0 442962e 77438f0 77513d0 77438f0 6fbb945 77438f0 442962e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import Literal, Optional
from llama_cpp import Llama
import re
# ==================== MODEL CONFIG ====================
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
MODEL_FILE = "Phi-3.1-mini-4k-instruct-IQ2_M.gguf"
print("🚀 Loading Phi-3.1 Mini (Human Authorship Restorer)...")
llm = Llama.from_pretrained(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
n_threads=4,
n_ctx=1024, # safer for HF Spaces
n_batch=128,
n_gpu_layers=0,
verbose=False,
)
print("✅ Model loaded")
# ==================== FASTAPI ====================
app = FastAPI(title="AI Humanizer – Author Voice Restorer")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ==================== REQUEST ====================
class HumanizeRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=3000)
section: Literal[
"abstract",
"introduction",
"methodology",
"results",
"discussion"
]
author_notes: Optional[str] = None
# ==================== SECTION-AWARE STYLE ====================
SECTION_GUIDANCE = {
"abstract":
"Write concisely and densely. Maintain objective academic tone.",
"introduction":
"Provide context and motivation. Sound like a researcher framing a problem.",
"methodology":
"Be procedural, precise, and restrained. No persuasive language.",
"results":
"Be cautious, observational, and factual. Avoid strong claims.",
"discussion":
"Be interpretive and reflective. Explain implications carefully."
}
# ==================== OUTPUT CLEANER ====================
def clean_output(text: str) -> str:
text = re.sub(r'<\|.*?\|>', '', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
# ==================== FALLBACK (UNCHANGED, SAFE) ====================
def fallback_humanize(text: str) -> str:
replacements = [
("utilize", "use"),
("commence", "start"),
("approximately", "about"),
("therefore", "so"),
("however", "but"),
("in order to", "to"),
("due to the fact that", "because"),
("prior to", "before"),
("subsequent to", "after"),
]
result = text
for formal, simple in replacements:
result = re.sub(formal, simple, result, flags=re.IGNORECASE)
return result
# ==================== PROMPT BUILDER ====================
def build_prompt(text: str, section: str, author_notes: Optional[str]) -> str:
guidance = SECTION_GUIDANCE.get(section, "Use formal academic tone.")
notes_block = (
f"\nAuthor context (do NOT invent new reasoning):\n{author_notes}\n"
if author_notes else ""
)
return f"""
You are an academic writing assistant.
GOAL:
Restore natural human authorship signals while preserving formal academic language.
NON-NEGOTIABLE RULES:
- Preserve all technical meaning, claims, numbers, and citations
- Do NOT add new information
- Do NOT remove uncertainty
- Do NOT invent justifications
- Do NOT change terminology
- Do NOT introduce grammar or punctuation errors
SECTION GUIDANCE:
{guidance}
HUMANIZATION RULES:
- Vary sentence rhythm naturally (short / medium / long)
- Reorder clauses where appropriate
- Reduce overused academic fillers (e.g., "Moreover", "Furthermore")
- Prefer implicit transitions
- Preserve author intent and constraints
{notes_block}
TEXT:
{text}
OUTPUT:
Return ONLY the revised text.
""".strip()
# ==================== ENDPOINT ====================
@app.post("/api/humanize")
async def humanize(req: HumanizeRequest):
text = req.text.strip()
prompt = build_prompt(text, req.section, req.author_notes)
try:
output = llm(
prompt,
max_tokens=400,
temperature=0.4, # controlled, academic-safe
top_p=0.9,
top_k=40,
stop=["<|user|>", "<|end|>"],
echo=False,
)
raw = output["choices"][0]["text"]
cleaned = clean_output(raw)
if not cleaned or cleaned.lower() == text.lower():
cleaned = fallback_humanize(text)
return {
"original": text,
"section": req.section,
"humanized": cleaned,
"success": True
}
except Exception as e:
print("❌ Inference error:", e)
return {
"original": text,
"section": req.section,
"humanized": fallback_humanize(text),
"success": False
}
# ==================== HEALTH ====================
@app.get("/")
def health():
return {
"status": "ok",
"model": MODEL_FILE,
"endpoint": "/api/humanize"
}
|