Spaces:

bughead
/

humanzise-api

Running

File size: 5,058 Bytes

325e5a1

import re
from typing import Dict, Optional

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field

from utils.ai_detection_utils import classify_text_hf
from utils.pdf_utils import extract_text_from_pdf
from utils.humanizer_core import (
    count_sentences,
    count_words,
    extract_citations,
    minimal_rewriting,
    preserve_linebreaks_rewrite,
    restore_citations,
)


DESCRIPTION = """
AI Text Humanizer & Detector API

Provides server-side access to the project's text humanization and AI-detection
pipelines. The API is consumed by the Next.js frontend in /web.
"""

tags_metadata = [
    {"name": "humanize", "description": "Transform AI-generated text into human-like prose."},
    {"name": "detect", "description": "Classify text as AI-generated or human-written."},
]

app = FastAPI(
    title="AI Text Humanizer API",
    version="0.3",
    description=DESCRIPTION,
    openapi_tags=tags_metadata,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


class HumanizeRequest(BaseModel):
    text: str = Field(..., description="The input text to humanize. Must be non-empty.")
    p_syn: Optional[float] = Field(0.2, ge=0.0, le=1.0)
    p_trans: Optional[float] = Field(0.2, ge=0.0, le=1.0)
    preserve_linebreaks: Optional[bool] = Field(True)


class HumanizeResponse(BaseModel):
    humanized_text: str
    orig_word_count: int
    orig_sentence_count: int
    new_word_count: int
    new_sentence_count: int
    words_added: int
    sentences_added: int


class DetectRequest(BaseModel):
    text: str = Field(..., description="The input text to analyze.")


class DetectResponse(BaseModel):
    percentages: Dict[str, float]
    classification: Dict[str, str]
    ai_score: float
    human_score: float


@app.get("/health", tags=["humanize"], summary="Health check")
def health():
    return {"status": "ok"}


@app.post("/humanize", response_model=HumanizeResponse, tags=["humanize"])
def humanize(req: HumanizeRequest):
    text = req.text or ""
    if not text.strip():
        raise HTTPException(status_code=400, detail="`text` must be a non-empty string")

    orig_wc = count_words(text)
    orig_sc = count_sentences(text)

    no_refs_text, placeholders = extract_citations(text)

    if req.preserve_linebreaks:
        rewritten = preserve_linebreaks_rewrite(no_refs_text, p_syn=req.p_syn, p_trans=req.p_trans)
    else:
        rewritten = minimal_rewriting(no_refs_text, p_syn=req.p_syn, p_trans=req.p_trans)

    final_text = restore_citations(rewritten, placeholders)
    final_text = re.sub(r"[ \t]+([.,;:!?])", r"\1", final_text)
    final_text = re.sub(r"(\()[ \t]+", r"\1", final_text)
    final_text = re.sub(r"[ \t]+(\))", r"\1", final_text)
    final_text = re.sub(r"[ \t]{2,}", " ", final_text)
    final_text = re.sub(r"``\s*(.+?)\s*''", r'"\1"', final_text)

    new_wc = count_words(final_text)
    new_sc = count_sentences(final_text)

    return {
        "humanized_text": final_text,
        "orig_word_count": orig_wc,
        "orig_sentence_count": orig_sc,
        "new_word_count": new_wc,
        "new_sentence_count": new_sc,
        "words_added": new_wc - orig_wc,
        "sentences_added": new_sc - orig_sc,
    }


@app.post("/extract-file", tags=["humanize"], summary="Extract text from uploaded file")
async def extract_file(file: UploadFile = File(...)):
    """Accept a PDF, TXT or MD file and return its plain-text contents."""
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")

    content = await file.read()
    name = file.filename.lower()

    try:
        if name.endswith(".pdf"):
            text = extract_text_from_pdf(content)
        elif name.endswith((".txt", ".md")):
            text = content.decode("utf-8", errors="ignore")
        else:
            raise HTTPException(
                status_code=400,
                detail="Unsupported file type. Use .pdf, .txt, or .md",
            )
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"Failed to extract: {exc}")

    return {"text": text, "filename": file.filename}


@app.post("/detect", response_model=DetectResponse, tags=["detect"])
def detect(req: DetectRequest):
    text = req.text or ""
    if not text.strip():
        raise HTTPException(status_code=400, detail="`text` must be a non-empty string")

    classification_map, percentages, mean_ai_prob = classify_text_hf(text)

    # Use the raw mean probability as the headline score — it's a more honest
    # signal than bucket-counting (which collapses to 0 for borderline text).
    ai_score = round(mean_ai_prob * 100, 2)
    human_score = round(100 - ai_score, 2)

    return {
        "percentages": percentages,
        "classification": classification_map,
        "ai_score": ai_score,
        "human_score": human_score,
    }