Spaces:
Sleeping
Sleeping
Tany Nguyen commited on
Commit ·
1182571
0
Parent(s):
Init repo
Browse files- .gitignore +53 -0
- Dockerfile +49 -0
- Dockerfile.local +38 -0
- README.md +16 -0
- RUN.md +145 -0
- app.py +58 -0
- docker-compose.yml +24 -0
- download_model.py +82 -0
- modal_deploy.py +47 -0
- requirements.txt +12 -0
- src/auth.py +24 -0
- src/config.py +29 -0
- src/interface.py +30 -0
- src/middleware/request_logging.py +34 -0
- src/models/parrot_model.py +203 -0
- src/modes/generation/__init__.py +2 -0
- src/modes/generation/factory.py +11 -0
- src/modes/generation/generation_interface.py +14 -0
- src/modes/generation/hybrid.py +51 -0
- src/modes/processing/burstiness.py +58 -0
- src/schemas.py +7 -0
- src/services/language_detector.py +62 -0
- src/services/paraphraser.py +46 -0
.gitignore
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
MANIFEST
|
| 23 |
+
|
| 24 |
+
# Virtual Environments
|
| 25 |
+
venv/
|
| 26 |
+
env/
|
| 27 |
+
ENV/
|
| 28 |
+
.env
|
| 29 |
+
.venv/
|
| 30 |
+
demo/
|
| 31 |
+
|
| 32 |
+
# Environment Variables
|
| 33 |
+
.env
|
| 34 |
+
|
| 35 |
+
# IDEs
|
| 36 |
+
.idea/
|
| 37 |
+
.vscode/
|
| 38 |
+
*.swp
|
| 39 |
+
*.swo
|
| 40 |
+
|
| 41 |
+
# Project Specific
|
| 42 |
+
saved_models/
|
| 43 |
+
*.pt
|
| 44 |
+
*.bin
|
| 45 |
+
*.onnx
|
| 46 |
+
|
| 47 |
+
# OS
|
| 48 |
+
.DS_Store
|
| 49 |
+
Thumbs.db
|
| 50 |
+
|
| 51 |
+
# Sub git
|
| 52 |
+
server/
|
| 53 |
+
*.ps1
|
Dockerfile
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Install system dependencies (git is required for some pip packages)
|
| 5 |
+
USER root
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
git \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Set up a new user named "user" with user ID 1000
|
| 11 |
+
RUN useradd -m -u 1000 user
|
| 12 |
+
|
| 13 |
+
# Switch to the "user" user
|
| 14 |
+
USER user
|
| 15 |
+
|
| 16 |
+
# Set home to the user's home directory
|
| 17 |
+
ENV HOME=/home/user \
|
| 18 |
+
PATH="/home/user/.local/bin:$PATH" \
|
| 19 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 20 |
+
PYTHONUNBUFFERED=1 \
|
| 21 |
+
MODELS_DIR=/home/user/app/saved_models \
|
| 22 |
+
PORT=7860
|
| 23 |
+
|
| 24 |
+
# Set the working directory to the user's home directory
|
| 25 |
+
WORKDIR $HOME/app
|
| 26 |
+
|
| 27 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 28 |
+
COPY --chown=user . $HOME/app
|
| 29 |
+
|
| 30 |
+
# Install requirements
|
| 31 |
+
# 1. Install CPU-only torch first to save space (Free tier has no GPU)
|
| 32 |
+
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
|
| 33 |
+
|
| 34 |
+
# 2. Install other dependencies
|
| 35 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 36 |
+
|
| 37 |
+
# Download NLTK data
|
| 38 |
+
RUN python -m nltk.downloader punkt punkt_tab
|
| 39 |
+
|
| 40 |
+
# Download/Cache Models during build
|
| 41 |
+
# Ensure the models directory exists
|
| 42 |
+
RUN mkdir -p $MODELS_DIR
|
| 43 |
+
RUN python download_model.py
|
| 44 |
+
|
| 45 |
+
# Expose port 7860
|
| 46 |
+
EXPOSE 7860
|
| 47 |
+
|
| 48 |
+
# Start the application using uvicorn
|
| 49 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
Dockerfile.local
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use PyTorch base image with CUDA support for GPU acceleration
|
| 2 |
+
FROM pytorch/pytorch:2.1.2-cuda11.8-cudnn8-runtime
|
| 3 |
+
|
| 4 |
+
# Set environment variables
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 6 |
+
PYTHONUNBUFFERED=1 \
|
| 7 |
+
MODELS_DIR=/app/saved_models \
|
| 8 |
+
PORT=8000
|
| 9 |
+
|
| 10 |
+
# Install system dependencies
|
| 11 |
+
# git is required for installing parrot-paraphraser from github
|
| 12 |
+
RUN apt-get update && apt-get install -y \
|
| 13 |
+
git \
|
| 14 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Copy requirements
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
|
| 21 |
+
# Install Python dependencies
|
| 22 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 23 |
+
|
| 24 |
+
# Download NLTK data
|
| 25 |
+
RUN python -m nltk.downloader punkt punkt_tab
|
| 26 |
+
|
| 27 |
+
# Copy only the downloader script
|
| 28 |
+
# src/ is NOT copied (will be bind mounted by docker-compose)
|
| 29 |
+
COPY download_model.py .
|
| 30 |
+
|
| 31 |
+
# Run the downloader script to cache the FP16 and INT8 models
|
| 32 |
+
# This script is now standalone and does not require src/
|
| 33 |
+
RUN python download_model.py
|
| 34 |
+
|
| 35 |
+
# Expose the port
|
| 36 |
+
EXPOSE 8000
|
| 37 |
+
|
| 38 |
+
# No CMD here - it is defined in docker-compose.yml
|
README.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Parrot Paraphraser
|
| 3 |
+
emoji: 🦜
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Parrot Paraphraser API
|
| 11 |
+
|
| 12 |
+
This is a paraphrasing API deployed on Hugging Face Spaces.
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
- **Paraphrasing**: Rewrites input text while preserving meaning.
|
| 16 |
+
- **Language Detection**: Automatically detects and strictly enforces **English text only**. Non-English inputs are rejected to save resources.
|
RUN.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Paraphraser API Runner's Guide
|
| 2 |
+
|
| 3 |
+
This guide explains how to build, run, and interact with the Paraphraser API server using Docker.
|
| 4 |
+
|
| 5 |
+
## Prerequisites
|
| 6 |
+
|
| 7 |
+
- **Docker** and **Docker Compose** installed
|
| 8 |
+
- **NVIDIA GPU** (optional, but recommended for performance) with NVIDIA Container Toolkit installed
|
| 9 |
+
- **Git** (to clone the repository)
|
| 10 |
+
|
| 11 |
+
## Quick Start
|
| 12 |
+
|
| 13 |
+
1. **Clone the repository**:
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
git clone <repository-url>
|
| 17 |
+
cd paraphraser/server
|
| 18 |
+
```
|
| 19 |
+
2. **Configuration**:
|
| 20 |
+
The server comes with a default `.env` file. You can customize it if needed:
|
| 21 |
+
|
| 22 |
+
```ini
|
| 23 |
+
# .env
|
| 24 |
+
API_KEY=sk-sample-key
|
| 25 |
+
BATCH_SIZE=8
|
| 26 |
+
PORT=8080 # Port on your host machine
|
| 27 |
+
MODELS_DIR=saved_models
|
| 28 |
+
COMPOSE_PROJECT_NAME=paraphraser
|
| 29 |
+
```
|
| 30 |
+
3. **Start the Server**:
|
| 31 |
+
Run the following command to build and start the container in the background:
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
docker-compose up -d --build
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
*Note: The first run will take a few minutes to download the ML models and build the image.*
|
| 38 |
+
4. **Verify Status**:
|
| 39 |
+
Check if the server is running and healthy:
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
curl http://localhost:8080/health
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
**Expected Output:**
|
| 46 |
+
|
| 47 |
+
```json
|
| 48 |
+
{"status":"healthy","model_loaded":true,"device":"cuda"}
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
*(If `device` says "cpu", GPU acceleration is not active.)*
|
| 52 |
+
|
| 53 |
+
## Usage
|
| 54 |
+
|
| 55 |
+
### Endpoint: `/paraphrase`
|
| 56 |
+
|
| 57 |
+
Use this endpoint to paraphrase text. **Note: Only English text is supported.**
|
| 58 |
+
|
| 59 |
+
- **URL**: `http://localhost:8080/paraphrase`
|
| 60 |
+
- **Method**: `POST`
|
| 61 |
+
- **Headers**:
|
| 62 |
+
- `Content-Type`: `application/json`
|
| 63 |
+
- `X-API-Key`: `sk-sample-key` (or whatever is set in your `.env`)
|
| 64 |
+
|
| 65 |
+
### Sample Request (cURL)
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
curl -X POST http://localhost:8080/paraphrase \
|
| 69 |
+
-H "Content-Type: application/json" \
|
| 70 |
+
-H "X-API-Key: sk-sample-key" \
|
| 71 |
+
-d '{
|
| 72 |
+
"text": "The quick brown fox jumps over the lazy dog."
|
| 73 |
+
}'
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Sample Response
|
| 77 |
+
|
| 78 |
+
```json
|
| 79 |
+
{
|
| 80 |
+
"paraphrased_text": "The quick brown fox leaps over the lazy dog."
|
| 81 |
+
}
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### Error Response (Non-English Input)
|
| 85 |
+
|
| 86 |
+
If the input text is not detected as English, the server returns a `422 Unprocessable Entity` error.
|
| 87 |
+
|
| 88 |
+
```json
|
| 89 |
+
{
|
| 90 |
+
"detail": "Only English text is supported"
|
| 91 |
+
}
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
## Troubleshooting
|
| 95 |
+
|
| 96 |
+
- **Server not responding immediately?**
|
| 97 |
+
The model takes 10-20 seconds to load into memory on startup. Check the logs with:
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
docker logs -f paraphraser_api
|
| 101 |
+
```
|
| 102 |
+
- **GPU not detected?**
|
| 103 |
+
Ensure you have the NVIDIA Container Toolkit installed and your Docker daemon is configured to use it.
|
| 104 |
+
|
| 105 |
+
## Stopping the Server
|
| 106 |
+
|
| 107 |
+
To stop and remove the container:
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
|
| 111 |
+
## Deployment to Cloud (Modal)
|
| 112 |
+
|
| 113 |
+
You can deploy this API to [Modal](https://modal.com) for serverless GPU inference.
|
| 114 |
+
|
| 115 |
+
### Prerequisites
|
| 116 |
+
1. Create a [Modal account](https://modal.com/signup).
|
| 117 |
+
2. Install the Modal client:
|
| 118 |
+
```bash
|
| 119 |
+
pip install modal
|
| 120 |
+
```
|
| 121 |
+
3. Authenticate:
|
| 122 |
+
```bash
|
| 123 |
+
modal setup
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### Deploying
|
| 127 |
+
Run the deployment script:
|
| 128 |
+
```bash
|
| 129 |
+
modal deploy modal_deploy.py
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
**Note for Windows Users:**
|
| 133 |
+
If you encounter encoding errors (like `charmap codec can't encode...`), set your python encoding to UTF-8:
|
| 134 |
+
- **PowerShell**: `[System.Environment]::SetEnvironmentVariable('PYTHONIOENCODING', 'utf-8', 'User')` (restart terminal after)
|
| 135 |
+
- **Cmd**: `set PYTHONIOENCODING=utf-8`
|
| 136 |
+
|
| 137 |
+
The command will output your live URL (e.g., `https://your-username--parrot-paraphraser-fastapi-app.modal.run`).
|
| 138 |
+
|
| 139 |
+
### Testing the Cloud Endpoint
|
| 140 |
+
```bash
|
| 141 |
+
curl -X POST "https://your-url.modal.run/paraphrase" \
|
| 142 |
+
-H "Content-Type: application/json" \
|
| 143 |
+
-H "X-API-Key: sk-sample-key" \
|
| 144 |
+
-d '{"text": "The quick brown fox jumps over the lazy dog."}'
|
| 145 |
+
```
|
app.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
import uvicorn
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
from src.auth import get_api_key
|
| 8 |
+
from src.schemas import ParaphraseRequest, ParaphraseResponse
|
| 9 |
+
from src.services.paraphraser import paraphraser_service
|
| 10 |
+
from src.services.language_detector import language_detector_service
|
| 11 |
+
from src.middleware.request_logging import RequestLoggingMiddleware
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
# Configure logging
|
| 17 |
+
logging.basicConfig(
|
| 18 |
+
level=logging.INFO,
|
| 19 |
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
| 20 |
+
datefmt="%Y-%m-%d %H:%M:%S"
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
app = FastAPI(title="Parrot Paraphraser API")
|
| 24 |
+
app.add_middleware(RequestLoggingMiddleware)
|
| 25 |
+
|
| 26 |
+
@app.on_event("startup")
|
| 27 |
+
async def startup_event():
|
| 28 |
+
language_detector_service.initialize()
|
| 29 |
+
paraphraser_service.initialize()
|
| 30 |
+
|
| 31 |
+
@app.get("/")
|
| 32 |
+
def root():
|
| 33 |
+
return {"message": "Parrot Paraphraser API is running. Visit /docs for documentation."}
|
| 34 |
+
|
| 35 |
+
@app.get("/health")
|
| 36 |
+
def health_check():
|
| 37 |
+
return paraphraser_service.get_status()
|
| 38 |
+
|
| 39 |
+
@app.post("/paraphrase", response_model=ParaphraseResponse)
|
| 40 |
+
async def paraphrase(request: ParaphraseRequest, api_key: str = Depends(get_api_key)):
|
| 41 |
+
try:
|
| 42 |
+
# Detect language
|
| 43 |
+
lang = language_detector_service.detect_language(request.text)
|
| 44 |
+
if lang != "en":
|
| 45 |
+
raise HTTPException(status_code=422, detail="Only English text is supported")
|
| 46 |
+
|
| 47 |
+
result = paraphraser_service.paraphrase(request.text)
|
| 48 |
+
return {"paraphrased_text": result}
|
| 49 |
+
except HTTPException as he:
|
| 50 |
+
raise he
|
| 51 |
+
except RuntimeError:
|
| 52 |
+
raise HTTPException(status_code=503, detail="Model not initialized")
|
| 53 |
+
except Exception as e:
|
| 54 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
port = int(os.environ.get("PORT", 8080))
|
| 58 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
paraphraser-api:
|
| 3 |
+
build: .
|
| 4 |
+
container_name: paraphraser_api
|
| 5 |
+
ports:
|
| 6 |
+
- "${PORT:-8080}:8000"
|
| 7 |
+
env_file: .env
|
| 8 |
+
environment:
|
| 9 |
+
# Required for GPU access inside the container
|
| 10 |
+
- NVIDIA_VISIBLE_DEVICES=all
|
| 11 |
+
volumes:
|
| 12 |
+
- ./.env:/app/.env
|
| 13 |
+
- ./src:/app/src
|
| 14 |
+
- ./app.py:/app/app.py
|
| 15 |
+
# Explicit command ensures we run on port 8000
|
| 16 |
+
command: uvicorn app:app --host 0.0.0.0 --port 8000
|
| 17 |
+
deploy:
|
| 18 |
+
resources:
|
| 19 |
+
reservations:
|
| 20 |
+
devices:
|
| 21 |
+
- driver: nvidia
|
| 22 |
+
count: 1
|
| 23 |
+
capabilities: [gpu]
|
| 24 |
+
restart: unless-stopped
|
download_model.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 4 |
+
|
| 5 |
+
# Standalone configuration to avoid dependency on src/ during Docker build
|
| 6 |
+
MODEL_TAG = "prithivida/parrot_paraphraser_on_T5"
|
| 7 |
+
MODELS_DIR = os.getenv("MODELS_DIR", "saved_models")
|
| 8 |
+
QUANTIZED_CPU_FILENAME = "parrot_t5_int8.pt"
|
| 9 |
+
QUANTIZED_GPU_FILENAME = "parrot_t5_fp16.pt"
|
| 10 |
+
FASTTEXT_MODEL_URL = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz"
|
| 11 |
+
FASTTEXT_FILENAME = "lid.176.ftz"
|
| 12 |
+
|
| 13 |
+
def ensure_models_dir():
|
| 14 |
+
if not os.path.exists(MODELS_DIR):
|
| 15 |
+
os.makedirs(MODELS_DIR)
|
| 16 |
+
|
| 17 |
+
def get_model_path(filename):
|
| 18 |
+
return os.path.join(MODELS_DIR, filename)
|
| 19 |
+
|
| 20 |
+
def download_and_prepare_dual_mode():
|
| 21 |
+
print("Starting model download for Dual Mode (CPU + GPU)...")
|
| 22 |
+
|
| 23 |
+
# Create directory
|
| 24 |
+
ensure_models_dir()
|
| 25 |
+
|
| 26 |
+
# Paths
|
| 27 |
+
cpu_path = get_model_path(QUANTIZED_CPU_FILENAME)
|
| 28 |
+
gpu_path = get_model_path(QUANTIZED_GPU_FILENAME)
|
| 29 |
+
|
| 30 |
+
print(f"Loading base model: {MODEL_TAG}")
|
| 31 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_TAG)
|
| 32 |
+
base_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_TAG)
|
| 33 |
+
|
| 34 |
+
# --- 1. Prepare GPU Version (FP16) ---
|
| 35 |
+
if not os.path.exists(gpu_path):
|
| 36 |
+
print("Creating GPU version (FP16)...")
|
| 37 |
+
# Copy model to avoid modifying the base implementation for CPU step next
|
| 38 |
+
# (Actually we can just cast to half, save, then reload or use copy)
|
| 39 |
+
# Simplest: Load, cast, save.
|
| 40 |
+
gpu_model = base_model.half()
|
| 41 |
+
print(f"Saving FP16 model to {gpu_path}...")
|
| 42 |
+
torch.save(gpu_model, gpu_path)
|
| 43 |
+
else:
|
| 44 |
+
print(f"GPU model already exists at {gpu_path}")
|
| 45 |
+
|
| 46 |
+
# --- 2. Prepare CPU Version (INT8) ---
|
| 47 |
+
if not os.path.exists(cpu_path):
|
| 48 |
+
print("Creating CPU version (INT8 Quantized)...")
|
| 49 |
+
# Reload base model to ensure clean state (fp32)
|
| 50 |
+
base_model_cpu = AutoModelForSeq2SeqLM.from_pretrained(MODEL_TAG)
|
| 51 |
+
|
| 52 |
+
print("Quantizing for CPU...")
|
| 53 |
+
quantized_model = torch.quantization.quantize_dynamic(
|
| 54 |
+
base_model_cpu, {torch.nn.Linear}, dtype=torch.qint8
|
| 55 |
+
)
|
| 56 |
+
print(f"Saving INT8 model to {cpu_path}...")
|
| 57 |
+
torch.save(quantized_model, cpu_path)
|
| 58 |
+
else:
|
| 59 |
+
print(f"CPU model already exists at {cpu_path}")
|
| 60 |
+
|
| 61 |
+
import requests
|
| 62 |
+
print("Downloading FastText language identification model...")
|
| 63 |
+
fasttext_path = get_model_path(FASTTEXT_FILENAME)
|
| 64 |
+
if not os.path.exists(fasttext_path):
|
| 65 |
+
try:
|
| 66 |
+
response = requests.get(FASTTEXT_MODEL_URL, stream=True)
|
| 67 |
+
response.raise_for_status()
|
| 68 |
+
with open(fasttext_path, 'wb') as f:
|
| 69 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 70 |
+
f.write(chunk)
|
| 71 |
+
print(f"FastText model saved to {fasttext_path}")
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"Failed to download FastText model: {e}")
|
| 74 |
+
# Non-critical for build if we handle it gracefully later, but critical for functionality
|
| 75 |
+
raise e
|
| 76 |
+
else:
|
| 77 |
+
print(f"FastText model already exists at {fasttext_path}")
|
| 78 |
+
|
| 79 |
+
print("Dual mode models cached successfully.")
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
download_and_prepare_dual_mode()
|
modal_deploy.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import modal
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
# Define the image with all dependencies and baked-in models
|
| 6 |
+
image = (
|
| 7 |
+
modal.Image.debian_slim(python_version="3.10")
|
| 8 |
+
.apt_install("git")
|
| 9 |
+
.pip_install(
|
| 10 |
+
"torch",
|
| 11 |
+
"transformers==4.38.2",
|
| 12 |
+
"sentencepiece==0.1.99",
|
| 13 |
+
"protobuf==3.20.3",
|
| 14 |
+
"git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git",
|
| 15 |
+
"python-dotenv==1.0.1",
|
| 16 |
+
"fastapi==0.110.0",
|
| 17 |
+
"uvicorn==0.27.1",
|
| 18 |
+
"nltk"
|
| 19 |
+
)
|
| 20 |
+
.run_commands("python -m nltk.downloader punkt punkt_tab")
|
| 21 |
+
.workdir("/root")
|
| 22 |
+
# Copy the model download script
|
| 23 |
+
.add_local_file("download_model.py", "/root/download_model.py", copy=True)
|
| 24 |
+
# Download and cache models during the build phase
|
| 25 |
+
.run_commands("python download_model.py")
|
| 26 |
+
# Copy the source code
|
| 27 |
+
.add_local_dir("src", "/root/src")
|
| 28 |
+
.add_local_file("app.py", "/root/app.py")
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
app = modal.App("parrot-paraphraser")
|
| 32 |
+
|
| 33 |
+
@app.function(
|
| 34 |
+
image=image,
|
| 35 |
+
gpu="any", # Request any available GPU
|
| 36 |
+
scaledown_window=300, # Keep container warm for 5 minutes
|
| 37 |
+
timeout=600,
|
| 38 |
+
secrets=[modal.Secret.from_dotenv()],
|
| 39 |
+
)
|
| 40 |
+
@modal.asgi_app()
|
| 41 |
+
def fastapi_app():
|
| 42 |
+
# Ensure /root is in the python path so imports work correctly
|
| 43 |
+
sys.path.append("/root")
|
| 44 |
+
|
| 45 |
+
# Import the FastAPI app from app.py
|
| 46 |
+
from app import app as web_app
|
| 47 |
+
return web_app
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# torch # Recommended: Install PyTorch with GPU support manually for better performance (see https://pytorch.org/get-started/locally/)
|
| 2 |
+
transformers==4.38.2
|
| 3 |
+
sentencepiece==0.1.99
|
| 4 |
+
protobuf==3.20.3
|
| 5 |
+
git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
|
| 6 |
+
python-dotenv==1.0.1
|
| 7 |
+
fastapi==0.104.1
|
| 8 |
+
uvicorn==0.24.0
|
| 9 |
+
nltk==3.9.1
|
| 10 |
+
fasttext-wheel==0.9.2
|
| 11 |
+
requests==2.31.0
|
| 12 |
+
numpy==1.26.4
|
src/auth.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Security, HTTPException, status
|
| 2 |
+
from fastapi.security import APIKeyHeader
|
| 3 |
+
from .config import Config
|
| 4 |
+
|
| 5 |
+
API_KEY_NAME = "X-API-Key"
|
| 6 |
+
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
|
| 7 |
+
|
| 8 |
+
def get_api_key(api_key_header: str = Security(api_key_header)):
|
| 9 |
+
expected_api_key = Config.API_KEY
|
| 10 |
+
|
| 11 |
+
# If no API key is set in env, fail safe
|
| 12 |
+
if not expected_api_key:
|
| 13 |
+
raise HTTPException(
|
| 14 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 15 |
+
detail="Server API Key not configured"
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
if api_key_header == expected_api_key:
|
| 19 |
+
return api_key_header
|
| 20 |
+
|
| 21 |
+
raise HTTPException(
|
| 22 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 23 |
+
detail="Could not validate credentials"
|
| 24 |
+
)
|
src/config.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
# Load environment variables from .env file
|
| 5 |
+
load_dotenv()
|
| 6 |
+
|
| 7 |
+
class Config:
|
| 8 |
+
# Directory where models are stored/cached
|
| 9 |
+
MODELS_DIR = os.getenv("MODELS_DIR", "saved_models")
|
| 10 |
+
|
| 11 |
+
# Inference batch size
|
| 12 |
+
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "8"))
|
| 13 |
+
|
| 14 |
+
# API Key
|
| 15 |
+
API_KEY = os.getenv("API_KEY")
|
| 16 |
+
|
| 17 |
+
# FastText Model
|
| 18 |
+
FASTTEXT_FILENAME = "lid.176.ftz"
|
| 19 |
+
|
| 20 |
+
@staticmethod
|
| 21 |
+
def get_model_path(filename: str) -> str:
|
| 22 |
+
"""Helper to get full path for a model file."""
|
| 23 |
+
return os.path.join(Config.MODELS_DIR, filename)
|
| 24 |
+
|
| 25 |
+
@staticmethod
|
| 26 |
+
def ensure_models_dir():
|
| 27 |
+
"""Ensures the models directory exists."""
|
| 28 |
+
if not os.path.exists(Config.MODELS_DIR):
|
| 29 |
+
os.makedirs(Config.MODELS_DIR)
|
src/interface.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
class ParaphrasingModel(ABC):
|
| 5 |
+
"""
|
| 6 |
+
Abstract base class for paraphrasing models (e.g., T5, GPT).
|
| 7 |
+
"""
|
| 8 |
+
@abstractmethod
|
| 9 |
+
def paraphrase(self, text: str) -> str:
|
| 10 |
+
"""Paraphrase the given text."""
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
def paraphrase_batch(self, texts: List[str]) -> List[str]:
|
| 14 |
+
"""
|
| 15 |
+
Paraphrase a list of texts.
|
| 16 |
+
Default implementation loops, but subclasses should override for GPU batching.
|
| 17 |
+
"""
|
| 18 |
+
return [self.paraphrase(text) for text in texts]
|
| 19 |
+
|
| 20 |
+
class TextProcessingMode(ABC):
|
| 21 |
+
"""
|
| 22 |
+
Abstract base class for text processing modes (Strategy Pattern).
|
| 23 |
+
Defines HOW the text is broken down and processed (e.g., by paragraph, random sentences).
|
| 24 |
+
"""
|
| 25 |
+
@abstractmethod
|
| 26 |
+
def process(self, text: str, paraphraser: ParaphrasingModel) -> str:
|
| 27 |
+
"""
|
| 28 |
+
Process a block of text using the provided paraphraser.
|
| 29 |
+
"""
|
| 30 |
+
pass
|
src/middleware/request_logging.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Request logging middleware for timing."""
|
| 2 |
+
import time
|
| 3 |
+
import logging
|
| 4 |
+
from typing import Callable
|
| 5 |
+
|
| 6 |
+
from fastapi import Request, Response
|
| 7 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
| 13 |
+
"""Middleware to log request processing time."""
|
| 14 |
+
|
| 15 |
+
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
| 16 |
+
"""Process request with timing."""
|
| 17 |
+
start_time = time.time()
|
| 18 |
+
|
| 19 |
+
# Log incoming request
|
| 20 |
+
logger.info(f"Incoming request: {request.method} {request.url}")
|
| 21 |
+
|
| 22 |
+
# Process request
|
| 23 |
+
response = await call_next(request)
|
| 24 |
+
|
| 25 |
+
# Calculate processing time
|
| 26 |
+
process_time = time.time() - start_time
|
| 27 |
+
|
| 28 |
+
# Log processing time
|
| 29 |
+
logger.info(f"Request processed in {process_time:.4f}s - {request.method} {request.url} - Status: {response.status_code}")
|
| 30 |
+
|
| 31 |
+
# Add processing time to response headers
|
| 32 |
+
response.headers["X-Process-Time"] = str(process_time)
|
| 33 |
+
|
| 34 |
+
return response
|
src/models/parrot_model.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import warnings
|
| 3 |
+
import os
|
| 4 |
+
from typing import Optional, List, Tuple
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 6 |
+
from ..interface import ParaphrasingModel
|
| 7 |
+
from ..config import Config
|
| 8 |
+
from ..modes.generation import GenerationModeFactory
|
| 9 |
+
from parrot import Parrot
|
| 10 |
+
from parrot.filters import Adequacy, Fluency, Diversity
|
| 11 |
+
|
| 12 |
+
class ParrotModel(ParaphrasingModel):
|
| 13 |
+
"""
|
| 14 |
+
Concrete implementation of ParaphrasingModel using the Parrot library.
|
| 15 |
+
Adapts the prithivida/parrot_paraphraser_on_T5 model.
|
| 16 |
+
Supports INT8 quantization on CPU for faster loading.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
MODEL_TAG = "prithivida/parrot_paraphraser_on_T5"
|
| 20 |
+
QUANTIZED_CPU_FILENAME = "parrot_t5_int8.pt"
|
| 21 |
+
QUANTIZED_GPU_FILENAME = "parrot_t5_fp16.pt"
|
| 22 |
+
|
| 23 |
+
def __init__(self, use_gpu: Optional[bool] = None):
|
| 24 |
+
if use_gpu is not None:
|
| 25 |
+
self.use_gpu = use_gpu
|
| 26 |
+
else:
|
| 27 |
+
self.use_gpu = torch.cuda.is_available()
|
| 28 |
+
|
| 29 |
+
# Filter warnings as in the original script
|
| 30 |
+
warnings.filterwarnings("ignore")
|
| 31 |
+
self.parrot_model = self._load_model()
|
| 32 |
+
|
| 33 |
+
def _load_model(self) -> Parrot:
|
| 34 |
+
device = "cuda" if self.use_gpu else "cpu"
|
| 35 |
+
print(f"Loading Parrot model ({self.MODEL_TAG}) on {device}...")
|
| 36 |
+
|
| 37 |
+
# Create models directory if it doesn't exist
|
| 38 |
+
Config.ensure_models_dir()
|
| 39 |
+
|
| 40 |
+
quantized_path = Config.get_model_path(self.QUANTIZED_CPU_FILENAME)
|
| 41 |
+
gpu_path = Config.get_model_path(self.QUANTIZED_GPU_FILENAME)
|
| 42 |
+
|
| 43 |
+
tokenizer = None
|
| 44 |
+
model = None
|
| 45 |
+
|
| 46 |
+
# Try to load GPU model if on CUDA
|
| 47 |
+
if device == "cuda" and os.path.exists(gpu_path):
|
| 48 |
+
print(f"Loading GPU Parrot model from {gpu_path}...")
|
| 49 |
+
try:
|
| 50 |
+
tokenizer = AutoTokenizer.from_pretrained(self.MODEL_TAG)
|
| 51 |
+
model = torch.load(gpu_path)
|
| 52 |
+
model.to(device)
|
| 53 |
+
model.eval()
|
| 54 |
+
print("GPU Parrot model loaded successfully.")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
print(f"Failed to load GPU model: {e}. Falling back to standard load.")
|
| 57 |
+
|
| 58 |
+
# Try to load quantized model if on CPU
|
| 59 |
+
if device == "cpu" and os.path.exists(quantized_path) and model is None:
|
| 60 |
+
print(f"Loading quantized Parrot model from {quantized_path}...")
|
| 61 |
+
try:
|
| 62 |
+
tokenizer = AutoTokenizer.from_pretrained(self.MODEL_TAG)
|
| 63 |
+
model = torch.load(quantized_path)
|
| 64 |
+
model.eval()
|
| 65 |
+
print("Quantized Parrot model loaded successfully.")
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Failed to load quantized model: {e}. Falling back to standard load.")
|
| 68 |
+
|
| 69 |
+
if model is None:
|
| 70 |
+
tokenizer = AutoTokenizer.from_pretrained(self.MODEL_TAG)
|
| 71 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(self.MODEL_TAG).to(device)
|
| 72 |
+
|
| 73 |
+
# Quantize and save if on CPU
|
| 74 |
+
if device == "cpu":
|
| 75 |
+
print("Quantizing Parrot model to INT8...")
|
| 76 |
+
model = torch.quantization.quantize_dynamic(
|
| 77 |
+
model, {torch.nn.Linear}, dtype=torch.qint8
|
| 78 |
+
)
|
| 79 |
+
print(f"Saving quantized Parrot model to {quantized_path}...")
|
| 80 |
+
torch.save(model, quantized_path)
|
| 81 |
+
|
| 82 |
+
# Save FP16 model if on GPU
|
| 83 |
+
elif device == "cuda":
|
| 84 |
+
print("Converting Parrot model to FP16 for GPU...")
|
| 85 |
+
model = model.half()
|
| 86 |
+
print(f"Saving GPU Parrot model to {gpu_path}...")
|
| 87 |
+
torch.save(model, gpu_path)
|
| 88 |
+
|
| 89 |
+
# Manually construct Parrot instance to bypass its __init__ (which loads model again)
|
| 90 |
+
parrot_instance = Parrot.__new__(Parrot)
|
| 91 |
+
parrot_instance.tokenizer = tokenizer
|
| 92 |
+
parrot_instance.model = model
|
| 93 |
+
parrot_instance.device = device
|
| 94 |
+
|
| 95 |
+
# Manually initialize the scoring attributes that would have been set in __init__
|
| 96 |
+
parrot_instance.adequacy_score = Adequacy()
|
| 97 |
+
parrot_instance.fluency_score = Fluency()
|
| 98 |
+
parrot_instance.diversity_score = Diversity()
|
| 99 |
+
|
| 100 |
+
return parrot_instance
|
| 101 |
+
|
| 102 |
+
def paraphrase(self, text: str) -> str:
|
| 103 |
+
"""
|
| 104 |
+
Paraphrases text using the Parrot model.
|
| 105 |
+
Returns the highest scored paraphrase or original text if none found.
|
| 106 |
+
"""
|
| 107 |
+
text = text.strip()
|
| 108 |
+
if not text:
|
| 109 |
+
return ""
|
| 110 |
+
|
| 111 |
+
# Avoid processing code blocks or very short noise
|
| 112 |
+
if text.startswith("```") or len(text) < 3:
|
| 113 |
+
return text
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
# We bypass parrot.augment to use uniform GenerationModes
|
| 117 |
+
# This ensures Perplexity, Anti-Tell, etc. work consistently.
|
| 118 |
+
input_ids = self.parrot_model.tokenizer(
|
| 119 |
+
text,
|
| 120 |
+
return_tensors="pt",
|
| 121 |
+
truncation=True,
|
| 122 |
+
max_length=512
|
| 123 |
+
).input_ids.to(self.parrot_model.device)
|
| 124 |
+
|
| 125 |
+
# Use "hybrid" as placeholder, factory returns HybridMode regardless
|
| 126 |
+
mode = GenerationModeFactory.get_mode("hybrid")
|
| 127 |
+
generate_kwargs = mode.get_config(self.parrot_model.tokenizer)
|
| 128 |
+
|
| 129 |
+
outputs = self.parrot_model.model.generate(
|
| 130 |
+
input_ids,
|
| 131 |
+
**generate_kwargs
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
res = self.parrot_model.tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 135 |
+
return res[0] if res else text
|
| 136 |
+
|
| 137 |
+
except Exception as e:
|
| 138 |
+
print(f"Error during Parrot inference: {e}")
|
| 139 |
+
return text
|
| 140 |
+
|
| 141 |
+
def paraphrase_batch(self, texts: List[str]) -> List[str]:
|
| 142 |
+
"""
|
| 143 |
+
True batch implementation bypassing the slow parrot.augment loop.
|
| 144 |
+
Processes in chunks of Config.BATCH_SIZE.
|
| 145 |
+
"""
|
| 146 |
+
results = [""] * len(texts)
|
| 147 |
+
|
| 148 |
+
# 1. Prepare valid inputs
|
| 149 |
+
valid_items = []
|
| 150 |
+
for i, text in enumerate(texts):
|
| 151 |
+
text = text.strip()
|
| 152 |
+
if text and not text.startswith("```") and len(text) >= 3:
|
| 153 |
+
valid_items.append((i, text))
|
| 154 |
+
else:
|
| 155 |
+
results[i] = text
|
| 156 |
+
|
| 157 |
+
if not valid_items:
|
| 158 |
+
return results
|
| 159 |
+
|
| 160 |
+
# 2. Process in chunks
|
| 161 |
+
batch_size = Config.BATCH_SIZE
|
| 162 |
+
|
| 163 |
+
for i in range(0, len(valid_items), batch_size):
|
| 164 |
+
chunk = valid_items[i : i + batch_size]
|
| 165 |
+
chunk_indices = [item[0] for item in chunk]
|
| 166 |
+
chunk_texts = [item[1] for item in chunk]
|
| 167 |
+
|
| 168 |
+
# Tokenize chunk
|
| 169 |
+
inputs = self.parrot_model.tokenizer(
|
| 170 |
+
chunk_texts,
|
| 171 |
+
padding=True,
|
| 172 |
+
truncation=True,
|
| 173 |
+
return_tensors="pt"
|
| 174 |
+
).to(self.parrot_model.device)
|
| 175 |
+
|
| 176 |
+
# Generate chunk on GPU
|
| 177 |
+
try:
|
| 178 |
+
# Use "hybrid" as placeholder
|
| 179 |
+
mode = GenerationModeFactory.get_mode("hybrid")
|
| 180 |
+
generate_kwargs = mode.get_config(self.parrot_model.tokenizer)
|
| 181 |
+
|
| 182 |
+
outputs = self.parrot_model.model.generate(
|
| 183 |
+
**inputs,
|
| 184 |
+
**generate_kwargs
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Decode chunk
|
| 188 |
+
generated_texts = self.parrot_model.tokenizer.batch_decode(
|
| 189 |
+
outputs,
|
| 190 |
+
skip_special_tokens=True
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# Map back to results
|
| 194 |
+
for idx, gen_text in zip(chunk_indices, generated_texts):
|
| 195 |
+
results[idx] = gen_text
|
| 196 |
+
|
| 197 |
+
except Exception as e:
|
| 198 |
+
print(f"Batch inference failed for chunk {i}: {e}")
|
| 199 |
+
# Fallback to original text
|
| 200 |
+
for idx, text in zip(chunk_indices, chunk_texts):
|
| 201 |
+
results[idx] = text
|
| 202 |
+
|
| 203 |
+
return results
|
src/modes/generation/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .factory import GenerationModeFactory
|
| 2 |
+
from .generation_interface import GenerationMode
|
src/modes/generation/factory.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Optional
|
| 2 |
+
from .generation_interface import GenerationMode
|
| 3 |
+
from .hybrid import HybridMode
|
| 4 |
+
|
| 5 |
+
class GenerationModeFactory:
|
| 6 |
+
@staticmethod
|
| 7 |
+
def get_mode(mode_name: str) -> GenerationMode:
|
| 8 |
+
# For this optimized server, we always return HybridMode
|
| 9 |
+
# regardless of the requested string, or we could strict check it.
|
| 10 |
+
# Given the requirements, defaulting to Hybrid is safest/cleanest.
|
| 11 |
+
return HybridMode()
|
src/modes/generation/generation_interface.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Dict, Any, Optional
|
| 3 |
+
from transformers import PreTrainedTokenizer
|
| 4 |
+
|
| 5 |
+
class GenerationMode(ABC):
|
| 6 |
+
"""
|
| 7 |
+
Abstract base class for T5 generation modes.
|
| 8 |
+
"""
|
| 9 |
+
@abstractmethod
|
| 10 |
+
def get_config(self, tokenizer: Optional[PreTrainedTokenizer] = None) -> Dict[str, Any]:
|
| 11 |
+
"""
|
| 12 |
+
Returns the generate() kwargs for this mode.
|
| 13 |
+
"""
|
| 14 |
+
pass
|
src/modes/generation/hybrid.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, Any, Optional
|
| 2 |
+
from transformers import PreTrainedTokenizer
|
| 3 |
+
from .generation_interface import GenerationMode
|
| 4 |
+
|
| 5 |
+
class HybridMode(GenerationMode):
|
| 6 |
+
"""
|
| 7 |
+
Hybrid Mode: Balances Readability and Evasion.
|
| 8 |
+
|
| 9 |
+
1. Vocabulary: Bans ONLY specific 'AI fingerprint' nouns/verbs, keeping functional glue words.
|
| 10 |
+
2. Sampling: Uses Nucleus Sampling (top_p) for natural variance.
|
| 11 |
+
3. Brevity: Mild length penalty to avoid AI rambling.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
# A much stricter, curated list than Anti-Tell.
|
| 15 |
+
# We allow functional words ("ensure", "enable") but ban obvious AI-isms.
|
| 16 |
+
AI_FINGERPRINTS = [
|
| 17 |
+
# The absolute worst offenders (immediate AI tells)
|
| 18 |
+
"delve", "embark", "game-changer", "landscape", "realm", "tapestry",
|
| 19 |
+
"testament", "underscore", "harness", "leverage", "demystify",
|
| 20 |
+
"plethora", "myriad", "beacon", "symphony", "nuance",
|
| 21 |
+
"paramount", "pivotal", "intricate", "meticulous"
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
def get_config(self, tokenizer: Optional[PreTrainedTokenizer] = None) -> Dict[str, Any]:
|
| 25 |
+
config = {
|
| 26 |
+
"max_length": 1024,
|
| 27 |
+
"early_stopping": True,
|
| 28 |
+
"do_sample": True, # Enable sampling for evasion
|
| 29 |
+
"top_p": 0.95, # Relaxed: Allow broader vocabulary (typos, slang, "messy" human text)
|
| 30 |
+
"top_k": 60, # Keep decent variety
|
| 31 |
+
"temperature": 0.96, # High variance: Encourages unexpected phrasing (like "How are you kidding?")
|
| 32 |
+
"repetition_penalty": 1.1, # Lower penalty: Allow natural conversational repetition
|
| 33 |
+
"length_penalty": 1.0, # Neutral: Don't force brevity. Allow sentence merging/rambling.
|
| 34 |
+
"num_return_sequences": 1
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
if tokenizer:
|
| 38 |
+
bad_ids = []
|
| 39 |
+
for word in self.AI_FINGERPRINTS:
|
| 40 |
+
# Encode word alone
|
| 41 |
+
ids_1 = tokenizer.encode(word, add_special_tokens=False)
|
| 42 |
+
# Encode word with leading space (common in T5)
|
| 43 |
+
ids_2 = tokenizer.encode(" " + word, add_special_tokens=False)
|
| 44 |
+
|
| 45 |
+
if ids_1: bad_ids.append(ids_1)
|
| 46 |
+
if ids_2: bad_ids.append(ids_2)
|
| 47 |
+
|
| 48 |
+
if bad_ids:
|
| 49 |
+
config["bad_words_ids"] = bad_ids
|
| 50 |
+
|
| 51 |
+
return config
|
src/modes/processing/burstiness.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import nltk
|
| 3 |
+
from src.interface import TextProcessingMode, ParaphrasingModel
|
| 4 |
+
|
| 5 |
+
# Ensure nltk resources are available
|
| 6 |
+
try:
|
| 7 |
+
nltk.data.find('tokenizers/punkt')
|
| 8 |
+
except LookupError:
|
| 9 |
+
nltk.download('punkt')
|
| 10 |
+
|
| 11 |
+
class BurstinessMode(TextProcessingMode):
|
| 12 |
+
"""
|
| 13 |
+
Groups sentences into variable-sized chunks (1-3 sentences) to create
|
| 14 |
+
rhythmic variation (burstiness) in the output.
|
| 15 |
+
"""
|
| 16 |
+
def __init__(self, min_chunk: int = 1, max_chunk: int = 3):
|
| 17 |
+
self.min_chunk = min_chunk
|
| 18 |
+
self.max_chunk = max_chunk
|
| 19 |
+
|
| 20 |
+
def process(self, text: str, paraphraser: ParaphrasingModel) -> str:
|
| 21 |
+
# Respect paragraph structure to maintain markdown formatting
|
| 22 |
+
paragraphs = text.split('\n')
|
| 23 |
+
processed_paragraphs = []
|
| 24 |
+
|
| 25 |
+
for p in paragraphs:
|
| 26 |
+
if not p.strip():
|
| 27 |
+
processed_paragraphs.append(p)
|
| 28 |
+
continue
|
| 29 |
+
|
| 30 |
+
processed_paragraphs.append(self._process_paragraph_chunks(p, paraphraser))
|
| 31 |
+
|
| 32 |
+
return '\n'.join(processed_paragraphs)
|
| 33 |
+
|
| 34 |
+
def _process_paragraph_chunks(self, paragraph: str, paraphraser: ParaphrasingModel) -> str:
|
| 35 |
+
sentences = nltk.sent_tokenize(paragraph)
|
| 36 |
+
if not sentences:
|
| 37 |
+
return paragraph
|
| 38 |
+
|
| 39 |
+
chunks = []
|
| 40 |
+
i = 0
|
| 41 |
+
while i < len(sentences):
|
| 42 |
+
# Determine chunk size randomly
|
| 43 |
+
chunk_size = random.randint(self.min_chunk, self.max_chunk)
|
| 44 |
+
|
| 45 |
+
# Slice the sentences for this chunk
|
| 46 |
+
chunk_sentences = sentences[i : i + chunk_size]
|
| 47 |
+
|
| 48 |
+
# Join them to form a single text block
|
| 49 |
+
chunk_text = " ".join(chunk_sentences)
|
| 50 |
+
chunks.append(chunk_text)
|
| 51 |
+
|
| 52 |
+
i += chunk_size
|
| 53 |
+
|
| 54 |
+
# Process all chunks in a batch
|
| 55 |
+
processed_chunks = paraphraser.paraphrase_batch(chunks)
|
| 56 |
+
|
| 57 |
+
# Join chunks back together
|
| 58 |
+
return " ".join(processed_chunks)
|
src/schemas.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class ParaphraseRequest(BaseModel):
|
| 4 |
+
text: str
|
| 5 |
+
|
| 6 |
+
class ParaphraseResponse(BaseModel):
|
| 7 |
+
paraphrased_text: str
|
src/services/language_detector.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fasttext
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
from src.config import Config
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class LanguageDetectorService:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.model = None
|
| 11 |
+
self._initialized = False
|
| 12 |
+
|
| 13 |
+
def initialize(self):
|
| 14 |
+
"""Initialize the FastText model."""
|
| 15 |
+
if self._initialized:
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
model_path = Config.get_model_path(Config.FASTTEXT_FILENAME)
|
| 19 |
+
if not os.path.exists(model_path):
|
| 20 |
+
logger.error(f"FastText model not found at {model_path}")
|
| 21 |
+
# If model is missing, we can't function properly.
|
| 22 |
+
# In production, this should probably prevent startup or fail gracefully.
|
| 23 |
+
raise RuntimeError(f"FastText model not found at {model_path}")
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
logger.info(f"Loading FastText model from {model_path}...")
|
| 27 |
+
# Supress fasttext warning on load if possible, but it usually prints to C++ stdout
|
| 28 |
+
self.model = fasttext.load_model(model_path)
|
| 29 |
+
self._initialized = True
|
| 30 |
+
logger.info("LanguageDetectorService initialized successfully.")
|
| 31 |
+
except Exception as e:
|
| 32 |
+
logger.error(f"Failed to load FastText model: {e}")
|
| 33 |
+
raise RuntimeError(f"Failed to load FastText model: {e}")
|
| 34 |
+
|
| 35 |
+
def detect_language(self, text: str) -> str:
|
| 36 |
+
"""
|
| 37 |
+
Detects the language of the provided text.
|
| 38 |
+
Returns the ISO 639-1 language code (e.g., 'en', 'fr').
|
| 39 |
+
"""
|
| 40 |
+
if not self._initialized or self.model is None:
|
| 41 |
+
raise RuntimeError("LanguageDetectorService not initialized")
|
| 42 |
+
|
| 43 |
+
if not text or not text.strip():
|
| 44 |
+
return "unknown"
|
| 45 |
+
|
| 46 |
+
# fasttext expects a single line for prediction
|
| 47 |
+
clean_text = text.replace("\n", " ")
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
# predict returns a tuple: (['__label__en'], array([0.98...]))
|
| 51 |
+
labels, _ = self.model.predict(clean_text)
|
| 52 |
+
if labels:
|
| 53 |
+
# Extract language code from '__label__en'
|
| 54 |
+
lang_code = labels[0].replace("__label__", "")
|
| 55 |
+
return lang_code
|
| 56 |
+
return "unknown"
|
| 57 |
+
except Exception as e:
|
| 58 |
+
logger.error(f"Error detecting language: {e}")
|
| 59 |
+
return "unknown"
|
| 60 |
+
|
| 61 |
+
# Global instance
|
| 62 |
+
language_detector_service = LanguageDetectorService()
|
src/services/paraphraser.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.models.parrot_model import ParrotModel
|
| 2 |
+
from src.modes.processing.burstiness import BurstinessMode
|
| 3 |
+
from src.config import Config
|
| 4 |
+
|
| 5 |
+
class ParaphraserService:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.model = None
|
| 8 |
+
self.burstiness_mode = None
|
| 9 |
+
self._initialized = False
|
| 10 |
+
|
| 11 |
+
def initialize(self):
|
| 12 |
+
"""Initialize the model and processing modes."""
|
| 13 |
+
if self._initialized:
|
| 14 |
+
return
|
| 15 |
+
|
| 16 |
+
# Configure for Hybrid mode
|
| 17 |
+
Config.GENERATION_MODE = "hybrid"
|
| 18 |
+
|
| 19 |
+
print("Initializing Parrot Model...")
|
| 20 |
+
# use_gpu=None triggers auto-detection in ParrotModel class
|
| 21 |
+
self.model = ParrotModel(use_gpu=None)
|
| 22 |
+
|
| 23 |
+
print("Initializing Burstiness Mode...")
|
| 24 |
+
self.burstiness_mode = BurstinessMode(min_chunk=1, max_chunk=3)
|
| 25 |
+
self._initialized = True
|
| 26 |
+
print("Service initialization complete.")
|
| 27 |
+
|
| 28 |
+
def get_status(self):
|
| 29 |
+
"""Return the health status of the service."""
|
| 30 |
+
if self.model is not None:
|
| 31 |
+
return {
|
| 32 |
+
"status": "healthy",
|
| 33 |
+
"model_loaded": True,
|
| 34 |
+
"device": "cuda" if self.model.use_gpu else "cpu"
|
| 35 |
+
}
|
| 36 |
+
return {"status": "starting", "model_loaded": False}
|
| 37 |
+
|
| 38 |
+
def paraphrase(self, text: str) -> str:
|
| 39 |
+
"""Process the text using the configured mode and model."""
|
| 40 |
+
if not self._initialized or self.model is None or self.burstiness_mode is None:
|
| 41 |
+
raise RuntimeError("Service not initialized")
|
| 42 |
+
|
| 43 |
+
return self.burstiness_mode.process(text, self.model)
|
| 44 |
+
|
| 45 |
+
# Global instance
|
| 46 |
+
paraphraser_service = ParaphraserService()
|