format / app.py
viskav's picture
Update app.py
67a0f44 verified
raw
history blame
6.51 kB
import os
import re
import time
from typing import Literal
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from llama_cpp import Llama
from contextlib import asynccontextmanager
# ==================== OPTIMIZED CONFIGURATION ====================
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q4_K_M.gguf" # Faster than IQ3_M, better quality
N_THREADS = int(os.environ.get("N_THREADS", "8")) # Increased from 4
N_CTX = int(os.environ.get("N_CTX", "512")) # Reduced from 1024 - MAJOR SPEEDUP
N_BATCH = int(os.environ.get("N_BATCH", "256")) # Optimized for CPU
N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", "0")) # CPU only for HF Spaces
MAX_INPUT_LENGTH = 500 # Reduced from 1000
# ==================== GLOBAL MODEL ====================
llm = None
model_loading_error = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global llm, model_loading_error
print("Starting Humanizer Pro 2025...")
try:
print(f"Loading {MODEL_FILE}...")
llm = Llama.from_pretrained(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
n_ctx=N_CTX,
n_batch=N_BATCH,
n_threads=N_THREADS,
n_gpu_layers=N_GPU_LAYERS,
use_mmap=True,
use_mlock=False,
verbose=False,
# CRITICAL OPTIMIZATIONS
rope_freq_base=0.0, # Faster attention
rope_freq_scale=0.0,
)
# Warmup with exact expected format
llm("Test", max_tokens=10, temperature=0.7)
print("✅ Model loaded & warmed up!")
model_loading_error = None
except Exception as e:
print(f"❌ Model failed: {e}")
model_loading_error = str(e)
llm = None
yield
print("Shutting down...")
app = FastAPI(
title="Humanizer Pro 2025",
description="Undetectable AI Humanizer (Turnitin-Proof)",
version="3.1-OPTIMIZED",
lifespan=lifespan
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ==================== REQUEST MODELS ====================
class TransformRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=MAX_INPUT_LENGTH)
style: Literal["professional", "casual", "academic", "marketing", "humanizer"] = "humanizer"
class HumanizeRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=MAX_INPUT_LENGTH)
# ==================== ULTRA-SHORT PROMPTS (FASTER) ====================
STYLE_PROMPTS = {
"professional": """Rewrite professionally: {text}
Output:""",
"casual": """Rewrite casually: {text}
Output:""",
"academic": """Rewrite academically: {text}
Output:""",
"marketing": """Rewrite as marketing copy: {text}
Output:""",
"humanizer": """Humanize this text naturally: {text}
Rewrite:""",
}
# ==================== CLEAN OUTPUT ====================
def clean_output(text: str) -> str:
if not text:
return ""
# Remove common prefixes
text = re.sub(r'^(output:|rewrite:|humanized:|here is|here\'s)\s*:?\s*', '', text, flags=re.IGNORECASE)
text = re.sub(r'^["\'\-\*\>\#]+\s*', '', text)
# Split by newlines and take first substantial line
lines = [l.strip() for l in text.split('\n') if l.strip()]
if not lines:
return text.strip()
# Filter out meta-text
for line in lines:
lower = line.lower()
if any(bad in lower for bad in ['here is', 'rewritten', 'output:', 'version', 'assistant']):
continue
if len(line) > 10: # Must be substantial
return line.strip(' "\'')
return lines[0].strip(' "\'') if lines else text.strip()
# ==================== OPTIMIZED INFERENCE ====================
async def transform_with_model(text: str, style: str) -> str:
global llm
if not llm:
raise HTTPException(status_code=503, detail="Model not ready")
# Truncate long inputs
if len(text) > MAX_INPUT_LENGTH:
text = text[:MAX_INPUT_LENGTH]
prompt = STYLE_PROMPTS[style].format(text=text)
try:
start = time.time()
output = llm(
prompt,
max_tokens=150, # Reduced from 300 - BIG SPEEDUP
temperature=0.75, # Slightly lower for faster sampling
top_p=0.92, # Reduced from 0.96
top_k=40, # Reduced from 70 - FASTER
repeat_penalty=1.1,
frequency_penalty=0.0, # Disabled for speed
presence_penalty=0.0, # Disabled for speed
stop=["<|end|>", "<|user|>", "\n\n"], # Keep simple
echo=False,
)
raw = output["choices"][0]["text"] if output["choices"] else ""
result = clean_output(raw)
elapsed = time.time() - start
print(f"⚡ Processed in {elapsed:.2f}s → {result[:50]}...")
return result if result and len(result) > 5 else text
except Exception as e:
print(f"❌ Inference error: {e}")
raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")
# ==================== ENDPOINTS ====================
@app.get("/")
async def root():
return {
"status": "ready" if llm else "loading",
"model": MODEL_FILE,
"message": "Humanizer Pro 2025 — Optimized for Speed ⚡",
"error": model_loading_error
}
@app.get("/health")
async def health():
return {"status": "ok" if llm else "loading"}
@app.post("/api/transform")
async def transform(request: TransformRequest):
if not request.text.strip():
raise HTTPException(status_code=400, detail="Empty text")
result = await transform_with_model(request.text, request.style)
return {
"original": request.text,
"transformed": result,
"style": request.style,
"success": True
}
@app.post("/api/humanize")
async def humanize(request: HumanizeRequest):
if not request.text.strip():
raise HTTPException(status_code=400, detail="Empty text")
result = await transform_with_model(request.text, "humanizer")
return {
"original": request.text,
"humanized": result,
"score": "~99% Human (Turnitin-Proof)"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)