File size: 5,058 Bytes
325e5a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import re
from typing import Dict, Optional

from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field

from utils.ai_detection_utils import classify_text_hf
from utils.pdf_utils import extract_text_from_pdf
from utils.humanizer_core import (
    count_sentences,
    count_words,
    extract_citations,
    minimal_rewriting,
    preserve_linebreaks_rewrite,
    restore_citations,
)


DESCRIPTION = """
AI Text Humanizer & Detector API

Provides server-side access to the project's text humanization and AI-detection
pipelines. The API is consumed by the Next.js frontend in /web.
"""

tags_metadata = [
    {"name": "humanize", "description": "Transform AI-generated text into human-like prose."},
    {"name": "detect", "description": "Classify text as AI-generated or human-written."},
]

app = FastAPI(
    title="AI Text Humanizer API",
    version="0.3",
    description=DESCRIPTION,
    openapi_tags=tags_metadata,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


class HumanizeRequest(BaseModel):
    text: str = Field(..., description="The input text to humanize. Must be non-empty.")
    p_syn: Optional[float] = Field(0.2, ge=0.0, le=1.0)
    p_trans: Optional[float] = Field(0.2, ge=0.0, le=1.0)
    preserve_linebreaks: Optional[bool] = Field(True)


class HumanizeResponse(BaseModel):
    humanized_text: str
    orig_word_count: int
    orig_sentence_count: int
    new_word_count: int
    new_sentence_count: int
    words_added: int
    sentences_added: int


class DetectRequest(BaseModel):
    text: str = Field(..., description="The input text to analyze.")


class DetectResponse(BaseModel):
    percentages: Dict[str, float]
    classification: Dict[str, str]
    ai_score: float
    human_score: float


@app.get("/health", tags=["humanize"], summary="Health check")
def health():
    return {"status": "ok"}


@app.post("/humanize", response_model=HumanizeResponse, tags=["humanize"])
def humanize(req: HumanizeRequest):
    text = req.text or ""
    if not text.strip():
        raise HTTPException(status_code=400, detail="`text` must be a non-empty string")

    orig_wc = count_words(text)
    orig_sc = count_sentences(text)

    no_refs_text, placeholders = extract_citations(text)

    if req.preserve_linebreaks:
        rewritten = preserve_linebreaks_rewrite(no_refs_text, p_syn=req.p_syn, p_trans=req.p_trans)
    else:
        rewritten = minimal_rewriting(no_refs_text, p_syn=req.p_syn, p_trans=req.p_trans)

    final_text = restore_citations(rewritten, placeholders)
    final_text = re.sub(r"[ \t]+([.,;:!?])", r"\1", final_text)
    final_text = re.sub(r"(\()[ \t]+", r"\1", final_text)
    final_text = re.sub(r"[ \t]+(\))", r"\1", final_text)
    final_text = re.sub(r"[ \t]{2,}", " ", final_text)
    final_text = re.sub(r"``\s*(.+?)\s*''", r'"\1"', final_text)

    new_wc = count_words(final_text)
    new_sc = count_sentences(final_text)

    return {
        "humanized_text": final_text,
        "orig_word_count": orig_wc,
        "orig_sentence_count": orig_sc,
        "new_word_count": new_wc,
        "new_sentence_count": new_sc,
        "words_added": new_wc - orig_wc,
        "sentences_added": new_sc - orig_sc,
    }


@app.post("/extract-file", tags=["humanize"], summary="Extract text from uploaded file")
async def extract_file(file: UploadFile = File(...)):
    """Accept a PDF, TXT or MD file and return its plain-text contents."""
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")

    content = await file.read()
    name = file.filename.lower()

    try:
        if name.endswith(".pdf"):
            text = extract_text_from_pdf(content)
        elif name.endswith((".txt", ".md")):
            text = content.decode("utf-8", errors="ignore")
        else:
            raise HTTPException(
                status_code=400,
                detail="Unsupported file type. Use .pdf, .txt, or .md",
            )
    except HTTPException:
        raise
    except Exception as exc:
        raise HTTPException(status_code=500, detail=f"Failed to extract: {exc}")

    return {"text": text, "filename": file.filename}


@app.post("/detect", response_model=DetectResponse, tags=["detect"])
def detect(req: DetectRequest):
    text = req.text or ""
    if not text.strip():
        raise HTTPException(status_code=400, detail="`text` must be a non-empty string")

    classification_map, percentages, mean_ai_prob = classify_text_hf(text)

    # Use the raw mean probability as the headline score — it's a more honest
    # signal than bucket-counting (which collapses to 0 for borderline text).
    ai_score = round(mean_ai_prob * 100, 2)
    human_score = round(100 - ai_score, 2)

    return {
        "percentages": percentages,
        "classification": classification_map,
        "ai_score": ai_score,
        "human_score": human_score,
    }