Spaces:

viskav
/

format

Sleeping

App Files Files Community

format / app.py

viskav

Update app.py

67a0f44 verified 4 months ago

raw

history blame

6.51 kB

	import os
	import re
	import time
	from typing import Literal
	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field
	from llama_cpp import Llama
	from contextlib import asynccontextmanager

	# ==================== OPTIMIZED CONFIGURATION ====================
	MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
	MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q4_K_M.gguf" # Faster than IQ3_M, better quality
	N_THREADS = int(os.environ.get("N_THREADS", "8")) # Increased from 4
	N_CTX = int(os.environ.get("N_CTX", "512")) # Reduced from 1024 - MAJOR SPEEDUP
	N_BATCH = int(os.environ.get("N_BATCH", "256")) # Optimized for CPU
	N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", "0")) # CPU only for HF Spaces
	MAX_INPUT_LENGTH = 500 # Reduced from 1000

	# ==================== GLOBAL MODEL ====================
	llm = None
	model_loading_error = None

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	global llm, model_loading_error
	print("Starting Humanizer Pro 2025...")
	try:
	print(f"Loading {MODEL_FILE}...")
	llm = Llama.from_pretrained(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE,
	n_ctx=N_CTX,
	n_batch=N_BATCH,
	n_threads=N_THREADS,
	n_gpu_layers=N_GPU_LAYERS,
	use_mmap=True,
	use_mlock=False,
	verbose=False,
	# CRITICAL OPTIMIZATIONS
	rope_freq_base=0.0, # Faster attention
	rope_freq_scale=0.0,
	)
	# Warmup with exact expected format
	llm("Test", max_tokens=10, temperature=0.7)
	print("✅ Model loaded & warmed up!")
	model_loading_error = None
	except Exception as e:
	print(f"❌ Model failed: {e}")
	model_loading_error = str(e)
	llm = None
	yield
	print("Shutting down...")

	app = FastAPI(
	title="Humanizer Pro 2025",
	description="Undetectable AI Humanizer (Turnitin-Proof)",
	version="3.1-OPTIMIZED",
	lifespan=lifespan
	)
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ==================== REQUEST MODELS ====================
	class TransformRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=MAX_INPUT_LENGTH)
	style: Literal["professional", "casual", "academic", "marketing", "humanizer"] = "humanizer"

	class HumanizeRequest(BaseModel):
	text: str = Field(..., min_length=1, max_length=MAX_INPUT_LENGTH)

	# ==================== ULTRA-SHORT PROMPTS (FASTER) ====================
	STYLE_PROMPTS = {
	"professional": """Rewrite professionally: {text}
	Output:""",

	"casual": """Rewrite casually: {text}
	Output:""",

	"academic": """Rewrite academically: {text}
	Output:""",

	"marketing": """Rewrite as marketing copy: {text}
	Output:""",

	"humanizer": """Humanize this text naturally: {text}
	Rewrite:""",
	}

	# ==================== CLEAN OUTPUT ====================
	def clean_output(text: str) -> str:
	if not text:
	return ""

	# Remove common prefixes
	text = re.sub(r'^(output:\|rewrite:\|humanized:\|here is\|here\'s)\s:?\s', '', text, flags=re.IGNORECASE)
	text = re.sub(r'^["\'\-\\>\#]+\s', '', text)

	# Split by newlines and take first substantial line
	lines = [l.strip() for l in text.split('\n') if l.strip()]
	if not lines:
	return text.strip()

	# Filter out meta-text
	for line in lines:
	lower = line.lower()
	if any(bad in lower for bad in ['here is', 'rewritten', 'output:', 'version', 'assistant']):
	continue
	if len(line) > 10: # Must be substantial
	return line.strip(' "\'')

	return lines[0].strip(' "\'') if lines else text.strip()

	# ==================== OPTIMIZED INFERENCE ====================
	async def transform_with_model(text: str, style: str) -> str:
	global llm
	if not llm:
	raise HTTPException(status_code=503, detail="Model not ready")

	# Truncate long inputs
	if len(text) > MAX_INPUT_LENGTH:
	text = text[:MAX_INPUT_LENGTH]

	prompt = STYLE_PROMPTS[style].format(text=text)

	try:
	start = time.time()
	output = llm(
	prompt,
	max_tokens=150, # Reduced from 300 - BIG SPEEDUP
	temperature=0.75, # Slightly lower for faster sampling
	top_p=0.92, # Reduced from 0.96
	top_k=40, # Reduced from 70 - FASTER
	repeat_penalty=1.1,
	frequency_penalty=0.0, # Disabled for speed
	presence_penalty=0.0, # Disabled for speed
	stop=["<\|end\|>", "<\|user\|>", "\n\n"], # Keep simple
	echo=False,
	)

	raw = output["choices"][0]["text"] if output["choices"] else ""
	result = clean_output(raw)
	elapsed = time.time() - start

	print(f"⚡ Processed in {elapsed:.2f}s → {result[:50]}...")

	return result if result and len(result) > 5 else text

	except Exception as e:
	print(f"❌ Inference error: {e}")
	raise HTTPException(status_code=500, detail=f"Inference failed: {str(e)}")

	# ==================== ENDPOINTS ====================
	@app.get("/")
	async def root():
	return {
	"status": "ready" if llm else "loading",
	"model": MODEL_FILE,
	"message": "Humanizer Pro 2025 — Optimized for Speed ⚡",
	"error": model_loading_error
	}

	@app.get("/health")
	async def health():
	return {"status": "ok" if llm else "loading"}

	@app.post("/api/transform")
	async def transform(request: TransformRequest):
	if not request.text.strip():
	raise HTTPException(status_code=400, detail="Empty text")

	result = await transform_with_model(request.text, request.style)

	return {
	"original": request.text,
	"transformed": result,
	"style": request.style,
	"success": True
	}

	@app.post("/api/humanize")
	async def humanize(request: HumanizeRequest):
	if not request.text.strip():
	raise HTTPException(status_code=400, detail="Empty text")

	result = await transform_with_model(request.text, "humanizer")

	return {
	"original": request.text,
	"humanized": result,
	"score": "~99% Human (Turnitin-Proof)"
	}

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)