Spaces:
Sleeping
Sleeping
Commit ·
a317be6
0
Parent(s):
Initial deploy
Browse files- .env.example +11 -0
- .gitattributes +35 -0
- .gitignore +44 -0
- Dockerfile +35 -0
- Makefile +23 -0
- README.md +12 -0
- app/__init__.py +1 -0
- app/config.py +32 -0
- app/database.py +36 -0
- app/main.py +36 -0
- app/models/__init__.py +1 -0
- app/models/file.py +115 -0
- app/models/user.py +45 -0
- app/routers/__init__.py +1 -0
- app/routers/auth.py +27 -0
- app/routers/pdf.py +78 -0
- app/schemas/__init__.py +1 -0
- app/schemas/file.py +35 -0
- app/schemas/user.py +32 -0
- app/services/__init__.py +1 -0
- app/services/auth.py +81 -0
- app/services/pdf_merger.py +138 -0
- app/utils/__init__.py +1 -0
- app/utils/security.py +32 -0
- requirements.txt +12 -0
.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MongoDB Configuration
|
| 2 |
+
MONGODB_URL=mongodb://localhost:27017
|
| 3 |
+
DATABASE_NAME=pdf_merger
|
| 4 |
+
|
| 5 |
+
# JWT Configuration
|
| 6 |
+
SECRET_KEY=your-super-secret-key-change-in-production
|
| 7 |
+
ALGORITHM=HS256
|
| 8 |
+
ACCESS_TOKEN_EXPIRE_MINUTES=30
|
| 9 |
+
|
| 10 |
+
# Application Settings
|
| 11 |
+
DEBUG=True
|
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
share/python-wheels/
|
| 20 |
+
*.egg-info/
|
| 21 |
+
.installed.cfg
|
| 22 |
+
*.egg
|
| 23 |
+
MANIFEST
|
| 24 |
+
|
| 25 |
+
# Virtual Environments
|
| 26 |
+
venv/
|
| 27 |
+
env/
|
| 28 |
+
ENV/
|
| 29 |
+
.env
|
| 30 |
+
|
| 31 |
+
# Environment Variables
|
| 32 |
+
.env
|
| 33 |
+
|
| 34 |
+
# PyCharm/VSCode
|
| 35 |
+
.idea/
|
| 36 |
+
.vscode/
|
| 37 |
+
|
| 38 |
+
# Logs
|
| 39 |
+
*.log
|
| 40 |
+
|
| 41 |
+
# App Data
|
| 42 |
+
uploads/
|
| 43 |
+
merged/
|
| 44 |
+
*.pdf
|
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Set environment variables
|
| 4 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 5 |
+
ENV PYTHONUNBUFFERED=1
|
| 6 |
+
|
| 7 |
+
# Create a non-root user
|
| 8 |
+
RUN adduser --disabled-password --gecos "" appuser
|
| 9 |
+
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
# Install system dependencies (none needed for this simple app, but good practice)
|
| 13 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 14 |
+
gcc \
|
| 15 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
# Install python dependencies
|
| 18 |
+
COPY requirements.txt .
|
| 19 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
+
|
| 21 |
+
# Copy application code
|
| 22 |
+
COPY . .
|
| 23 |
+
|
| 24 |
+
# Create directories for uploads
|
| 25 |
+
RUN mkdir -p uploads merged && \
|
| 26 |
+
chown -R appuser:appuser /app
|
| 27 |
+
|
| 28 |
+
# Switch to non-root user
|
| 29 |
+
USER appuser
|
| 30 |
+
|
| 31 |
+
# Expose port
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
# Command to run the application
|
| 35 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
Makefile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.PHONY: setup run docker-build docker-run clean
|
| 2 |
+
|
| 3 |
+
setup:
|
| 4 |
+
python -m venv venv
|
| 5 |
+
./venv/Scripts/pip install -r requirements.txt
|
| 6 |
+
|
| 7 |
+
run:
|
| 8 |
+
./venv/Scripts/uvicorn app.main:app --reload
|
| 9 |
+
|
| 10 |
+
docker-build:
|
| 11 |
+
docker build -t pdf-merger-backend .
|
| 12 |
+
|
| 13 |
+
docker-run:
|
| 14 |
+
docker run -p 8000:8000 --env-file .env -v $(PWD)/uploads:/app/uploads -v $(PWD)/merged:/app/merged pdf-merger-backend
|
| 15 |
+
|
| 16 |
+
clean:
|
| 17 |
+
rm -rf __pycache__
|
| 18 |
+
rm -rf app/__pycache__
|
| 19 |
+
rm -rf app/models/__pycache__
|
| 20 |
+
rm -rf app/schemas/__pycache__
|
| 21 |
+
rm -rf app/services/__pycache__
|
| 22 |
+
rm -rf app/routers/__pycache__
|
| 23 |
+
rm -rf app/utils/__pycache__
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Pdf Merger
|
| 3 |
+
emoji: 👁
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
short_description: its merger multiple pdfs
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# App package
|
app/config.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings
|
| 2 |
+
from functools import lru_cache
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class Settings(BaseSettings):
|
| 6 |
+
"""Application settings loaded from environment variables."""
|
| 7 |
+
|
| 8 |
+
# MongoDB
|
| 9 |
+
mongodb_url: str = "mongodb://localhost:27017"
|
| 10 |
+
database_name: str = "pdf_merger"
|
| 11 |
+
|
| 12 |
+
# JWT
|
| 13 |
+
secret_key: str = "your-super-secret-key-change-in-production"
|
| 14 |
+
algorithm: str = "HS256"
|
| 15 |
+
access_token_expire_minutes: int = 30
|
| 16 |
+
|
| 17 |
+
# Application
|
| 18 |
+
debug: bool = True
|
| 19 |
+
|
| 20 |
+
# File paths
|
| 21 |
+
upload_dir: str = "uploads"
|
| 22 |
+
merged_dir: str = "merged"
|
| 23 |
+
|
| 24 |
+
class Config:
|
| 25 |
+
env_file = ".env"
|
| 26 |
+
case_sensitive = False
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@lru_cache()
|
| 30 |
+
def get_settings() -> Settings:
|
| 31 |
+
"""Get cached settings instance."""
|
| 32 |
+
return Settings()
|
app/database.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from motor.motor_asyncio import AsyncIOMotorClient
|
| 2 |
+
from .config import get_settings
|
| 3 |
+
|
| 4 |
+
settings = get_settings()
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class Database:
|
| 8 |
+
"""MongoDB database connection manager."""
|
| 9 |
+
|
| 10 |
+
client: AsyncIOMotorClient = None
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
db = Database()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
async def connect_to_mongo():
|
| 17 |
+
"""Connect to MongoDB on application startup."""
|
| 18 |
+
db.client = AsyncIOMotorClient(settings.mongodb_url)
|
| 19 |
+
print(f"Connected to MongoDB at {settings.mongodb_url}")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
async def close_mongo_connection():
|
| 23 |
+
"""Close MongoDB connection on application shutdown."""
|
| 24 |
+
if db.client:
|
| 25 |
+
db.client.close()
|
| 26 |
+
print("Closed MongoDB connection")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def get_database():
|
| 30 |
+
"""Get the database instance."""
|
| 31 |
+
return db.client[settings.database_name]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_collection(collection_name: str):
|
| 35 |
+
"""Get a specific collection from the database."""
|
| 36 |
+
return get_database()[collection_name]
|
app/main.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from .config import get_settings
|
| 4 |
+
from .database import connect_to_mongo, close_mongo_connection
|
| 5 |
+
from .routers import auth, pdf
|
| 6 |
+
|
| 7 |
+
settings = get_settings()
|
| 8 |
+
|
| 9 |
+
app = FastAPI(
|
| 10 |
+
title="PDF Merger API",
|
| 11 |
+
description="Backend API for merging PDF files with authentication",
|
| 12 |
+
version="1.0.0"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# CORS Middleware
|
| 16 |
+
app.add_middleware(
|
| 17 |
+
CORSMiddleware,
|
| 18 |
+
allow_origins=["*"], # In production, specify exact origins
|
| 19 |
+
allow_credentials=True,
|
| 20 |
+
allow_methods=["*"],
|
| 21 |
+
allow_headers=["*"],
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Event Handlers
|
| 25 |
+
app.add_event_handler("startup", connect_to_mongo)
|
| 26 |
+
app.add_event_handler("shutdown", close_mongo_connection)
|
| 27 |
+
|
| 28 |
+
# Include Routers
|
| 29 |
+
app.include_router(auth.router)
|
| 30 |
+
app.include_router(pdf.router)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@app.get("/")
|
| 34 |
+
async def root():
|
| 35 |
+
"""Health check endpoint."""
|
| 36 |
+
return {"message": "PDF Merger API is running"}
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Models package
|
app/models/file.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from bson import ObjectId
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class PDFFile:
|
| 7 |
+
"""PDF file model for MongoDB."""
|
| 8 |
+
|
| 9 |
+
collection_name = "files"
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
user_id: str,
|
| 14 |
+
filename: str,
|
| 15 |
+
original_filename: str,
|
| 16 |
+
file_path: str,
|
| 17 |
+
file_size: int,
|
| 18 |
+
file_type: str = "application/pdf",
|
| 19 |
+
created_at: Optional[datetime] = None,
|
| 20 |
+
_id: Optional[ObjectId] = None
|
| 21 |
+
):
|
| 22 |
+
self._id = _id or ObjectId()
|
| 23 |
+
self.user_id = user_id
|
| 24 |
+
self.filename = filename
|
| 25 |
+
self.original_filename = original_filename
|
| 26 |
+
self.file_path = file_path
|
| 27 |
+
self.file_size = file_size
|
| 28 |
+
self.file_type = file_type
|
| 29 |
+
self.created_at = created_at or datetime.utcnow()
|
| 30 |
+
|
| 31 |
+
def to_dict(self) -> dict:
|
| 32 |
+
"""Convert file to dictionary for MongoDB insertion."""
|
| 33 |
+
return {
|
| 34 |
+
"_id": self._id,
|
| 35 |
+
"user_id": self.user_id,
|
| 36 |
+
"filename": self.filename,
|
| 37 |
+
"original_filename": self.original_filename,
|
| 38 |
+
"file_path": self.file_path,
|
| 39 |
+
"file_size": self.file_size,
|
| 40 |
+
"file_type": self.file_type,
|
| 41 |
+
"created_at": self.created_at
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
@classmethod
|
| 45 |
+
def from_dict(cls, data: dict) -> "PDFFile":
|
| 46 |
+
"""Create PDFFile instance from MongoDB document."""
|
| 47 |
+
return cls(
|
| 48 |
+
_id=data.get("_id"),
|
| 49 |
+
user_id=data.get("user_id"),
|
| 50 |
+
filename=data.get("filename"),
|
| 51 |
+
original_filename=data.get("original_filename"),
|
| 52 |
+
file_path=data.get("file_path"),
|
| 53 |
+
file_size=data.get("file_size"),
|
| 54 |
+
file_type=data.get("file_type"),
|
| 55 |
+
created_at=data.get("created_at")
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
@property
|
| 59 |
+
def id(self) -> str:
|
| 60 |
+
"""Get string representation of file ID."""
|
| 61 |
+
return str(self._id)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class MergedPDF:
|
| 65 |
+
"""Merged PDF file model for MongoDB."""
|
| 66 |
+
|
| 67 |
+
collection_name = "merged_files"
|
| 68 |
+
|
| 69 |
+
def __init__(
|
| 70 |
+
self,
|
| 71 |
+
user_id: str,
|
| 72 |
+
filename: str,
|
| 73 |
+
file_path: str,
|
| 74 |
+
file_size: int,
|
| 75 |
+
source_file_ids: List[str],
|
| 76 |
+
created_at: Optional[datetime] = None,
|
| 77 |
+
_id: Optional[ObjectId] = None
|
| 78 |
+
):
|
| 79 |
+
self._id = _id or ObjectId()
|
| 80 |
+
self.user_id = user_id
|
| 81 |
+
self.filename = filename
|
| 82 |
+
self.file_path = file_path
|
| 83 |
+
self.file_size = file_size
|
| 84 |
+
self.source_file_ids = source_file_ids
|
| 85 |
+
self.created_at = created_at or datetime.utcnow()
|
| 86 |
+
|
| 87 |
+
def to_dict(self) -> dict:
|
| 88 |
+
"""Convert merged file to dictionary for MongoDB insertion."""
|
| 89 |
+
return {
|
| 90 |
+
"_id": self._id,
|
| 91 |
+
"user_id": self.user_id,
|
| 92 |
+
"filename": self.filename,
|
| 93 |
+
"file_path": self.file_path,
|
| 94 |
+
"file_size": self.file_size,
|
| 95 |
+
"source_file_ids": self.source_file_ids,
|
| 96 |
+
"created_at": self.created_at
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
@classmethod
|
| 100 |
+
def from_dict(cls, data: dict) -> "MergedPDF":
|
| 101 |
+
"""Create MergedPDF instance from MongoDB document."""
|
| 102 |
+
return cls(
|
| 103 |
+
_id=data.get("_id"),
|
| 104 |
+
user_id=data.get("user_id"),
|
| 105 |
+
filename=data.get("filename"),
|
| 106 |
+
file_path=data.get("file_path"),
|
| 107 |
+
file_size=data.get("file_size"),
|
| 108 |
+
source_file_ids=data.get("source_file_ids"),
|
| 109 |
+
created_at=data.get("created_at")
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
@property
|
| 113 |
+
def id(self) -> str:
|
| 114 |
+
"""Get string representation of merged file ID."""
|
| 115 |
+
return str(self._id)
|
app/models/user.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from bson import ObjectId
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class User:
|
| 7 |
+
"""User model for MongoDB."""
|
| 8 |
+
|
| 9 |
+
collection_name = "users"
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
email: str,
|
| 14 |
+
hashed_password: str,
|
| 15 |
+
created_at: Optional[datetime] = None,
|
| 16 |
+
_id: Optional[ObjectId] = None
|
| 17 |
+
):
|
| 18 |
+
self._id = _id or ObjectId()
|
| 19 |
+
self.email = email
|
| 20 |
+
self.hashed_password = hashed_password
|
| 21 |
+
self.created_at = created_at or datetime.utcnow()
|
| 22 |
+
|
| 23 |
+
def to_dict(self) -> dict:
|
| 24 |
+
"""Convert user to dictionary for MongoDB insertion."""
|
| 25 |
+
return {
|
| 26 |
+
"_id": self._id,
|
| 27 |
+
"email": self.email,
|
| 28 |
+
"hashed_password": self.hashed_password,
|
| 29 |
+
"created_at": self.created_at
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
@classmethod
|
| 33 |
+
def from_dict(cls, data: dict) -> "User":
|
| 34 |
+
"""Create User instance from MongoDB document."""
|
| 35 |
+
return cls(
|
| 36 |
+
_id=data.get("_id"),
|
| 37 |
+
email=data.get("email"),
|
| 38 |
+
hashed_password=data.get("hashed_password"),
|
| 39 |
+
created_at=data.get("created_at")
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
@property
|
| 43 |
+
def id(self) -> str:
|
| 44 |
+
"""Get string representation of user ID."""
|
| 45 |
+
return str(self._id)
|
app/routers/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Routers package
|
app/routers/auth.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends
|
| 2 |
+
from ..schemas.user import UserCreate, UserResponse, UserLogin, Token
|
| 3 |
+
from ..services.auth import create_user, authenticate_user, get_current_user
|
| 4 |
+
from ..models.user import User
|
| 5 |
+
|
| 6 |
+
router = APIRouter(
|
| 7 |
+
prefix="/auth",
|
| 8 |
+
tags=["Authentication"]
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.post("/signup", response_model=UserResponse)
|
| 13 |
+
async def signup(user: UserCreate):
|
| 14 |
+
"""Register a new user."""
|
| 15 |
+
return await create_user(user)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@router.post("/login", response_model=Token)
|
| 19 |
+
async def login(user: UserLogin):
|
| 20 |
+
"""Authenticate user and return JWT token."""
|
| 21 |
+
return await authenticate_user(user)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@router.get("/me", response_model=UserResponse)
|
| 25 |
+
async def read_users_me(current_user: User = Depends(get_current_user)):
|
| 26 |
+
"""Get current user information."""
|
| 27 |
+
return current_user
|
app/routers/pdf.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from fastapi import APIRouter, Depends, File, UploadFile, HTTPException
|
| 3 |
+
from fastapi.responses import FileResponse as FastAPIFileResponse
|
| 4 |
+
from ..schemas.file import FileResponse, MergeRequest, MergeResponse
|
| 5 |
+
from ..services.pdf_merger import save_upload_file, get_user_files, merge_pdfs
|
| 6 |
+
from ..services.auth import get_current_user
|
| 7 |
+
from ..models.user import User
|
| 8 |
+
|
| 9 |
+
router = APIRouter(
|
| 10 |
+
prefix="/pdf",
|
| 11 |
+
tags=["PDF Operations"]
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@router.post("/upload", response_model=FileResponse)
|
| 16 |
+
async def upload_pdf(
|
| 17 |
+
file: UploadFile = File(...),
|
| 18 |
+
current_user: User = Depends(get_current_user)
|
| 19 |
+
):
|
| 20 |
+
"""Upload a PDF file."""
|
| 21 |
+
if not file.filename.lower().endswith('.pdf'):
|
| 22 |
+
raise HTTPException(status_code=400, detail="File must be a PDF")
|
| 23 |
+
|
| 24 |
+
return await save_upload_file(file, current_user)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@router.get("/files", response_model=List[FileResponse])
|
| 28 |
+
async def list_files(current_user: User = Depends(get_current_user)):
|
| 29 |
+
"""List all files uploaded by the current user."""
|
| 30 |
+
return await get_user_files(current_user)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@router.get("/merged", response_model=List[MergeResponse])
|
| 34 |
+
async def list_merged_files(current_user: User = Depends(get_current_user)):
|
| 35 |
+
"""List all merged files created by the user."""
|
| 36 |
+
from ..services.pdf_merger import get_user_merged_files
|
| 37 |
+
return await get_user_merged_files(current_user)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@router.post("/merge", response_model=MergeResponse)
|
| 41 |
+
async def merge_files(
|
| 42 |
+
merge_req: MergeRequest,
|
| 43 |
+
current_user: User = Depends(get_current_user)
|
| 44 |
+
):
|
| 45 |
+
"""Merge multiple PDF files into one."""
|
| 46 |
+
return await merge_pdfs(merge_req, current_user)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@router.get("/download/{file_path:path}")
|
| 50 |
+
async def download_file(
|
| 51 |
+
file_path: str,
|
| 52 |
+
current_user: User = Depends(get_current_user)
|
| 53 |
+
# Note: In a real app, you'd want to verify the user owns the file or the merged file
|
| 54 |
+
# For this demo, we'll assume if they have the path (ID based in real world), they can access
|
| 55 |
+
# But since we store full paths in DB, we should be careful.
|
| 56 |
+
# Better approach: download by ID and look up path.
|
| 57 |
+
# For now, let's just serve it if it exists in our logical directories.
|
| 58 |
+
):
|
| 59 |
+
"""Download a processed PDF file."""
|
| 60 |
+
import os
|
| 61 |
+
from ..config import get_settings
|
| 62 |
+
settings = get_settings()
|
| 63 |
+
|
| 64 |
+
# Simple security check to prevent directory traversal
|
| 65 |
+
real_path = os.path.realpath(file_path)
|
| 66 |
+
if not (real_path.startswith(os.path.realpath(settings.upload_dir)) or
|
| 67 |
+
real_path.startswith(os.path.realpath(settings.merged_dir))):
|
| 68 |
+
raise HTTPException(status_code=403, detail="Access denied")
|
| 69 |
+
|
| 70 |
+
if not os.path.exists(real_path):
|
| 71 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 72 |
+
|
| 73 |
+
return FastAPIFileResponse(
|
| 74 |
+
real_path,
|
| 75 |
+
media_type="application/pdf",
|
| 76 |
+
filename=os.path.basename(real_path),
|
| 77 |
+
content_disposition_type="attachment"
|
| 78 |
+
)
|
app/schemas/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Schemas package
|
app/schemas/file.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class FileBase(BaseModel):
|
| 7 |
+
filename: str
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FileResponse(FileBase):
|
| 11 |
+
id: str
|
| 12 |
+
user_id: str
|
| 13 |
+
original_filename: str
|
| 14 |
+
file_size: int
|
| 15 |
+
file_type: str
|
| 16 |
+
created_at: datetime
|
| 17 |
+
|
| 18 |
+
class Config:
|
| 19 |
+
from_attributes = True
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class MergeRequest(BaseModel):
|
| 23 |
+
file_ids: List[str]
|
| 24 |
+
output_filename: Optional[str] = "merged_document.pdf"
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class MergeResponse(BaseModel):
|
| 28 |
+
id: str
|
| 29 |
+
filename: str
|
| 30 |
+
file_path: str
|
| 31 |
+
file_size: int
|
| 32 |
+
created_at: datetime
|
| 33 |
+
|
| 34 |
+
class Config:
|
| 35 |
+
from_attributes = True
|
app/schemas/user.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, EmailStr
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class UserBase(BaseModel):
|
| 7 |
+
email: EmailStr
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UserCreate(UserBase):
|
| 11 |
+
password: str
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class UserLogin(UserBase):
|
| 15 |
+
password: str
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class UserResponse(UserBase):
|
| 19 |
+
id: str
|
| 20 |
+
created_at: datetime
|
| 21 |
+
|
| 22 |
+
class Config:
|
| 23 |
+
from_attributes = True
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class Token(BaseModel):
|
| 27 |
+
access_token: str
|
| 28 |
+
token_type: str
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class TokenData(BaseModel):
|
| 32 |
+
email: Optional[str] = None
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Services package
|
app/services/auth.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Depends, HTTPException, status
|
| 2 |
+
from fastapi.security import OAuth2PasswordBearer
|
| 3 |
+
from jose import JWTError, jwt
|
| 4 |
+
from ..models.user import User
|
| 5 |
+
from ..schemas.user import UserCreate, UserLogin, Token
|
| 6 |
+
from ..database import get_collection
|
| 7 |
+
from ..utils.security import verify_password, get_password_hash, create_access_token
|
| 8 |
+
from ..config import get_settings
|
| 9 |
+
|
| 10 |
+
settings = get_settings()
|
| 11 |
+
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="auth/login")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def get_user_by_email(email: str) -> User | None:
|
| 15 |
+
"""Retrieve a user by email from the database."""
|
| 16 |
+
users_collection = get_collection(User.collection_name)
|
| 17 |
+
user_data = await users_collection.find_one({"email": email})
|
| 18 |
+
if user_data:
|
| 19 |
+
return User.from_dict(user_data)
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
async def create_user(user_in: UserCreate) -> User:
|
| 24 |
+
"""Register a new user."""
|
| 25 |
+
existing_user = await get_user_by_email(user_in.email)
|
| 26 |
+
if existing_user:
|
| 27 |
+
raise HTTPException(
|
| 28 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 29 |
+
detail="Email already registered"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
hashed_password = get_password_hash(user_in.password)
|
| 33 |
+
user = User(email=user_in.email, hashed_password=hashed_password)
|
| 34 |
+
|
| 35 |
+
users_collection = get_collection(User.collection_name)
|
| 36 |
+
await users_collection.insert_one(user.to_dict())
|
| 37 |
+
|
| 38 |
+
return user
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def authenticate_user(user_in: UserLogin) -> Token:
|
| 42 |
+
"""Authenticate a user and return a JWT token."""
|
| 43 |
+
user = await get_user_by_email(user_in.email)
|
| 44 |
+
if not user:
|
| 45 |
+
raise HTTPException(
|
| 46 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 47 |
+
detail="Incorrect email or password",
|
| 48 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
if not verify_password(user_in.password, user.hashed_password):
|
| 52 |
+
raise HTTPException(
|
| 53 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 54 |
+
detail="Incorrect email or password",
|
| 55 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
access_token = create_access_token(data={"sub": user.email})
|
| 59 |
+
return Token(access_token=access_token, token_type="bearer")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
async def get_current_user(token: str = Depends(oauth2_scheme)) -> User:
|
| 63 |
+
"""Validate JWT token and return current user."""
|
| 64 |
+
credentials_exception = HTTPException(
|
| 65 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 66 |
+
detail="Could not validate credentials",
|
| 67 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
payload = jwt.decode(token, settings.secret_key, algorithms=[settings.algorithm])
|
| 72 |
+
email: str = payload.get("sub")
|
| 73 |
+
if email is None:
|
| 74 |
+
raise credentials_exception
|
| 75 |
+
except JWTError:
|
| 76 |
+
raise credentials_exception
|
| 77 |
+
|
| 78 |
+
user = await get_user_by_email(email)
|
| 79 |
+
if user is None:
|
| 80 |
+
raise credentials_exception
|
| 81 |
+
return user
|
app/services/pdf_merger.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import uuid
|
| 3 |
+
from typing import List
|
| 4 |
+
from fastapi import UploadFile, HTTPException, status
|
| 5 |
+
from PyPDF2 import PdfMerger, PdfReader
|
| 6 |
+
from bson import ObjectId
|
| 7 |
+
from ..models.file import PDFFile, MergedPDF
|
| 8 |
+
from ..schemas.file import MergeRequest
|
| 9 |
+
from ..database import get_collection
|
| 10 |
+
from ..config import get_settings
|
| 11 |
+
from ..models.user import User
|
| 12 |
+
|
| 13 |
+
settings = get_settings()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
async def save_upload_file(file: UploadFile, user: User) -> PDFFile:
|
| 17 |
+
"""Save uploaded file to disk and database."""
|
| 18 |
+
# Ensure upload directory exists
|
| 19 |
+
os.makedirs(settings.upload_dir, exist_ok=True)
|
| 20 |
+
|
| 21 |
+
# Generate unique filename to prevent collisions
|
| 22 |
+
file_ext = os.path.splitext(file.filename)[1]
|
| 23 |
+
unique_filename = f"{uuid.uuid4()}{file_ext}"
|
| 24 |
+
file_path = os.path.join(settings.upload_dir, unique_filename)
|
| 25 |
+
|
| 26 |
+
# Save file content
|
| 27 |
+
content = await file.read()
|
| 28 |
+
with open(file_path, "wb") as f:
|
| 29 |
+
f.write(content)
|
| 30 |
+
|
| 31 |
+
# Create DB entry
|
| 32 |
+
file_size = len(content)
|
| 33 |
+
pdf_file = PDFFile(
|
| 34 |
+
user_id=user.id,
|
| 35 |
+
filename=unique_filename,
|
| 36 |
+
original_filename=file.filename,
|
| 37 |
+
file_path=file_path,
|
| 38 |
+
file_size=file_size
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
files_collection = get_collection(PDFFile.collection_name)
|
| 42 |
+
await files_collection.insert_one(pdf_file.to_dict())
|
| 43 |
+
|
| 44 |
+
return pdf_file
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
async def get_user_files(user: User) -> List[PDFFile]:
|
| 48 |
+
"""Get all files uploaded by a user."""
|
| 49 |
+
files_collection = get_collection(PDFFile.collection_name)
|
| 50 |
+
cursor = files_collection.find({"user_id": user.id})
|
| 51 |
+
files = []
|
| 52 |
+
async for file_data in cursor:
|
| 53 |
+
files.append(PDFFile.from_dict(file_data))
|
| 54 |
+
return files
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
async def get_user_merged_files(user: User) -> List[MergedPDF]:
|
| 58 |
+
"""Get all merged files created by a user."""
|
| 59 |
+
merged_collection = get_collection(MergedPDF.collection_name)
|
| 60 |
+
cursor = merged_collection.find({"user_id": user.id})
|
| 61 |
+
files = []
|
| 62 |
+
async for file_data in cursor:
|
| 63 |
+
files.append(MergedPDF.from_dict(file_data))
|
| 64 |
+
return files
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
async def merge_pdfs(merge_req: MergeRequest, user: User) -> MergedPDF:
|
| 68 |
+
"""Merge selected PDFs into a single file."""
|
| 69 |
+
files_collection = get_collection(PDFFile.collection_name)
|
| 70 |
+
|
| 71 |
+
# Validate and fetch all requested files
|
| 72 |
+
input_paths = []
|
| 73 |
+
|
| 74 |
+
# Convert string IDs to ObjectIds for query
|
| 75 |
+
file_ids = [ObjectId(fid) for fid in merge_req.file_ids]
|
| 76 |
+
|
| 77 |
+
cursor = files_collection.find({
|
| 78 |
+
"_id": {"$in": file_ids},
|
| 79 |
+
"user_id": user.id
|
| 80 |
+
})
|
| 81 |
+
|
| 82 |
+
found_files = {}
|
| 83 |
+
async for file_data in cursor:
|
| 84 |
+
found_files[str(file_data["_id"])] = file_data
|
| 85 |
+
|
| 86 |
+
# Check if all files were found (security check for ownership)
|
| 87 |
+
if len(found_files) != len(merge_req.file_ids):
|
| 88 |
+
raise HTTPException(
|
| 89 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 90 |
+
detail="One or more files not found or access denied"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Sort files according to the requested order
|
| 94 |
+
for fid in merge_req.file_ids:
|
| 95 |
+
file_data = found_files[fid]
|
| 96 |
+
input_paths.append(file_data["file_path"])
|
| 97 |
+
|
| 98 |
+
# Perform merge
|
| 99 |
+
merger = PdfMerger()
|
| 100 |
+
try:
|
| 101 |
+
for path in input_paths:
|
| 102 |
+
merger.append(path)
|
| 103 |
+
|
| 104 |
+
# Ensure merged directory exists
|
| 105 |
+
os.makedirs(settings.merged_dir, exist_ok=True)
|
| 106 |
+
|
| 107 |
+
output_filename = merge_req.output_filename or f"merged_{uuid.uuid4()}.pdf"
|
| 108 |
+
if not output_filename.endswith('.pdf'):
|
| 109 |
+
output_filename += '.pdf'
|
| 110 |
+
|
| 111 |
+
unique_output_name = f"{uuid.uuid4()}_{output_filename}"
|
| 112 |
+
output_path = os.path.join(settings.merged_dir, unique_output_name)
|
| 113 |
+
|
| 114 |
+
merger.write(output_path)
|
| 115 |
+
except Exception as e:
|
| 116 |
+
raise HTTPException(
|
| 117 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 118 |
+
detail=f"Error merging PDFs: {str(e)}"
|
| 119 |
+
)
|
| 120 |
+
finally:
|
| 121 |
+
merger.close()
|
| 122 |
+
|
| 123 |
+
# Get file size
|
| 124 |
+
file_size = os.path.getsize(output_path)
|
| 125 |
+
|
| 126 |
+
# Create DB entry for merged file
|
| 127 |
+
merged_pdf = MergedPDF(
|
| 128 |
+
user_id=user.id,
|
| 129 |
+
filename=output_filename,
|
| 130 |
+
file_path=output_path,
|
| 131 |
+
file_size=file_size,
|
| 132 |
+
source_file_ids=merge_req.file_ids
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
merged_collection = get_collection(MergedPDF.collection_name)
|
| 136 |
+
await merged_collection.insert_one(merged_pdf.to_dict())
|
| 137 |
+
|
| 138 |
+
return merged_pdf
|
app/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Utils package
|
app/utils/security.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timedelta
|
| 2 |
+
from typing import Optional, Union, Any
|
| 3 |
+
from jose import JWTError, jwt
|
| 4 |
+
from passlib.context import CryptContext
|
| 5 |
+
from ..config import get_settings
|
| 6 |
+
|
| 7 |
+
settings = get_settings()
|
| 8 |
+
|
| 9 |
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
| 13 |
+
"""Verify a password against a hash."""
|
| 14 |
+
return pwd_context.verify(plain_password, hashed_password)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_password_hash(password: str) -> str:
|
| 18 |
+
"""Generate a password hash."""
|
| 19 |
+
return pwd_context.hash(password)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
|
| 23 |
+
"""Create a new JWT access token."""
|
| 24 |
+
to_encode = data.copy()
|
| 25 |
+
if expires_delta:
|
| 26 |
+
expire = datetime.utcnow() + expires_delta
|
| 27 |
+
else:
|
| 28 |
+
expire = datetime.utcnow() + timedelta(minutes=settings.access_token_expire_minutes)
|
| 29 |
+
|
| 30 |
+
to_encode.update({"exp": expire})
|
| 31 |
+
encoded_jwt = jwt.encode(to_encode, settings.secret_key, algorithm=settings.algorithm)
|
| 32 |
+
return encoded_jwt
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.109.0
|
| 2 |
+
uvicorn[standard]==0.27.0
|
| 3 |
+
motor==3.3.2
|
| 4 |
+
pymongo==4.6.1
|
| 5 |
+
python-jose[cryptography]==3.3.0
|
| 6 |
+
passlib[bcrypt]==1.7.4
|
| 7 |
+
python-multipart==0.0.6
|
| 8 |
+
PyPDF2==3.0.1
|
| 9 |
+
pydantic-settings==2.1.0
|
| 10 |
+
python-dotenv==1.0.0
|
| 11 |
+
bcrypt==4.0.1
|
| 12 |
+
email-validator==2.1.0.post1
|