Merge branch 'Testing'
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env-example +34 -0
- .gitignore +63 -166
- Dockerfile +26 -0
- Procfile +1 -0
- README.md +22 -1
- READMEs.md +152 -0
- __init__.py +0 -0
- app.py +62 -0
- config.py +2 -0
- docs/api_endpoints.md +92 -0
- docs/deployment.md +108 -0
- docs/detector/ELA.md +65 -0
- docs/detector/ai_human_image_checker.md +132 -0
- docs/detector/fft.md +136 -0
- docs/detector/meta.md +20 -0
- docs/detector/note-for-backend.md +94 -0
- docs/features/image_classifier.md +31 -0
- docs/features/nepali_text_classifier.md +30 -0
- docs/features/text_classifier.md +30 -0
- docs/functions.md +62 -0
- docs/nestjs_integration.md +83 -0
- docs/security.md +10 -0
- docs/setup.md +24 -0
- docs/status_code.md +68 -0
- docs/structure.md +74 -0
- features/ai_human_image_classifier/controller.py +35 -0
- features/ai_human_image_classifier/inferencer.py +48 -0
- features/ai_human_image_classifier/main.py +27 -0
- features/ai_human_image_classifier/model_loader.py +80 -0
- features/ai_human_image_classifier/preprocessor.py +34 -0
- features/ai_human_image_classifier/routes.py +44 -0
- features/image_classifier/__init__.py +0 -0
- features/image_classifier/controller.py +16 -0
- features/image_classifier/inferencer.py +42 -0
- features/image_classifier/model_loader.py +58 -0
- features/image_classifier/preprocess.py +26 -0
- features/image_classifier/routes.py +26 -0
- features/image_edit_detector/controller.py +49 -0
- features/image_edit_detector/detectors/ela.py +32 -0
- features/image_edit_detector/detectors/fft.py +40 -0
- features/image_edit_detector/detectors/metadata.py +82 -0
- features/image_edit_detector/preprocess.py +9 -0
- features/image_edit_detector/routes.py +53 -0
- features/nepali_text_classifier/__init__.py +0 -0
- features/nepali_text_classifier/controller.py +130 -0
- features/nepali_text_classifier/inferencer.py +23 -0
- features/nepali_text_classifier/model_loader.py +54 -0
- features/nepali_text_classifier/preprocess.py +35 -0
- features/nepali_text_classifier/routes.py +45 -0
- features/rag_chatbot/__init__.py +0 -0
.env-example
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MY_SECRET_TOKEN="SECRET_CODE_TOKEN"
|
| 2 |
+
|
| 3 |
+
# CHROMA_HOST = "localhost" (Host gareko address rakhney)
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# EXAMPLE CONFIGURATIONS FOR DIFFERENT PROVIDERS(Use only one at once)
|
| 7 |
+
# ===========================================
|
| 8 |
+
|
| 9 |
+
# FOR OPENAI:(PAID)
|
| 10 |
+
# LLM_PROVIDER=openai
|
| 11 |
+
# LLM_API_KEY=sk-your-openai-api-key
|
| 12 |
+
# LLM_MODEL=gpt-3.5-turbo
|
| 13 |
+
# # Other options: gpt-4, gpt-4-turbo-preview, etc.
|
| 14 |
+
|
| 15 |
+
# FOR GROQ:(FREE: BABAL XA-> prefer this)
|
| 16 |
+
# LLM_PROVIDER=groq
|
| 17 |
+
# LLM_API_KEY=gsk_your-groq-api-key
|
| 18 |
+
# LLM_MODEL=llama-3.3-70b-versatile
|
| 19 |
+
# # Other options: llama-3.1-70b-versatile, mixtral-8x7b-32768, etc.
|
| 20 |
+
|
| 21 |
+
# FOR OPENROUTER:(FREE: LASTAI RATE LIMIT LAGAUXA)
|
| 22 |
+
# LLM_PROVIDER=openrouter
|
| 23 |
+
# LLM_API_KEY=sk-or-your-openrouter-api-key
|
| 24 |
+
# LLM_MODEL=meta-llama/llama-3.1-8b-instruct:free
|
| 25 |
+
# # Other options: anthropic/claude-3-haiku, google/gemma-7b-it, etc.
|
| 26 |
+
|
| 27 |
+
# ===========================================
|
| 28 |
+
# ADVANCED CONFIGURATION
|
| 29 |
+
# ===========================================
|
| 30 |
+
# Temperature (0.0 to 1.0) - controls randomness
|
| 31 |
+
# LLM_TEMPERATURE=0.1
|
| 32 |
+
|
| 33 |
+
# Maximum tokens for response
|
| 34 |
+
# LLM_MAX_TOKENS=4096
|
.gitignore
CHANGED
|
@@ -1,174 +1,71 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
__pycache__/
|
| 3 |
-
*
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
.Python
|
| 11 |
build/
|
| 12 |
-
develop-eggs/
|
| 13 |
dist/
|
| 14 |
-
downloads/
|
| 15 |
-
eggs/
|
| 16 |
-
.eggs/
|
| 17 |
-
lib/
|
| 18 |
-
lib64/
|
| 19 |
-
parts/
|
| 20 |
-
sdist/
|
| 21 |
-
var/
|
| 22 |
-
wheels/
|
| 23 |
-
share/python-wheels/
|
| 24 |
*.egg-info/
|
| 25 |
-
.installed.cfg
|
| 26 |
-
*.egg
|
| 27 |
-
MANIFEST
|
| 28 |
-
|
| 29 |
-
# PyInstaller
|
| 30 |
-
# Usually these files are written by a python script from a template
|
| 31 |
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
-
*.manifest
|
| 33 |
-
*.spec
|
| 34 |
-
|
| 35 |
-
# Installer logs
|
| 36 |
-
pip-log.txt
|
| 37 |
-
pip-delete-this-directory.txt
|
| 38 |
-
|
| 39 |
-
# Unit test / coverage reports
|
| 40 |
-
htmlcov/
|
| 41 |
-
.tox/
|
| 42 |
-
.nox/
|
| 43 |
-
.coverage
|
| 44 |
-
.coverage.*
|
| 45 |
-
.cache
|
| 46 |
-
nosetests.xml
|
| 47 |
-
coverage.xml
|
| 48 |
-
*.cover
|
| 49 |
-
*.py,cover
|
| 50 |
-
.hypothesis/
|
| 51 |
-
.pytest_cache/
|
| 52 |
-
cover/
|
| 53 |
-
|
| 54 |
-
# Translations
|
| 55 |
-
*.mo
|
| 56 |
-
*.pot
|
| 57 |
-
|
| 58 |
-
# Django stuff:
|
| 59 |
-
*.log
|
| 60 |
-
local_settings.py
|
| 61 |
-
db.sqlite3
|
| 62 |
-
db.sqlite3-journal
|
| 63 |
-
|
| 64 |
-
# Flask stuff:
|
| 65 |
-
instance/
|
| 66 |
-
.webassets-cache
|
| 67 |
-
|
| 68 |
-
# Scrapy stuff:
|
| 69 |
-
.scrapy
|
| 70 |
-
|
| 71 |
-
# Sphinx documentation
|
| 72 |
-
docs/_build/
|
| 73 |
-
|
| 74 |
-
# PyBuilder
|
| 75 |
-
.pybuilder/
|
| 76 |
-
target/
|
| 77 |
-
|
| 78 |
-
# Jupyter Notebook
|
| 79 |
-
.ipynb_checkpoints
|
| 80 |
-
|
| 81 |
-
# IPython
|
| 82 |
-
profile_default/
|
| 83 |
-
ipython_config.py
|
| 84 |
-
|
| 85 |
-
# pyenv
|
| 86 |
-
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
-
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
-
# .python-version
|
| 89 |
-
|
| 90 |
-
# pipenv
|
| 91 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
-
# install all needed dependencies.
|
| 95 |
-
#Pipfile.lock
|
| 96 |
|
| 97 |
-
#
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
# commonly ignored for libraries.
|
| 101 |
-
#uv.lock
|
| 102 |
|
| 103 |
-
#
|
| 104 |
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
-
# commonly ignored for libraries.
|
| 107 |
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
-
#poetry.lock
|
| 109 |
-
|
| 110 |
-
# pdm
|
| 111 |
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 112 |
-
#pdm.lock
|
| 113 |
-
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 114 |
-
# in version control.
|
| 115 |
-
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 116 |
-
.pdm.toml
|
| 117 |
-
.pdm-python
|
| 118 |
-
.pdm-build/
|
| 119 |
-
|
| 120 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 121 |
-
__pypackages__/
|
| 122 |
-
|
| 123 |
-
# Celery stuff
|
| 124 |
-
celerybeat-schedule
|
| 125 |
-
celerybeat.pid
|
| 126 |
-
|
| 127 |
-
# SageMath parsed files
|
| 128 |
-
*.sage.py
|
| 129 |
-
|
| 130 |
-
# Environments
|
| 131 |
.env
|
| 132 |
-
.
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
.
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
# Pyre type checker
|
| 155 |
-
.pyre/
|
| 156 |
-
|
| 157 |
-
# pytype static type analyzer
|
| 158 |
-
.pytype/
|
| 159 |
-
|
| 160 |
-
# Cython debug symbols
|
| 161 |
-
cython_debug/
|
| 162 |
-
|
| 163 |
-
# PyCharm
|
| 164 |
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 165 |
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 166 |
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 167 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 168 |
-
#.idea/
|
| 169 |
-
|
| 170 |
-
# Ruff stuff:
|
| 171 |
-
.ruff_cache/
|
| 172 |
-
|
| 173 |
-
# PyPI configuration file
|
| 174 |
-
.pypirc
|
|
|
|
| 1 |
+
# ---- Python Environment ----
|
| 2 |
+
venv/
|
| 3 |
+
.venv/
|
| 4 |
+
env/
|
| 5 |
+
ENV/
|
| 6 |
+
*.pyc
|
| 7 |
+
*.pyo
|
| 8 |
+
*.pyd
|
| 9 |
__pycache__/
|
| 10 |
+
**/__pycache__/
|
| 11 |
+
|
| 12 |
+
# ---- VS Code / IDEs ----
|
| 13 |
+
.vscode/
|
| 14 |
+
.idea/
|
| 15 |
+
*.swp
|
| 16 |
+
|
| 17 |
+
# ---- Jupyter / IPython ----
|
| 18 |
+
.ipynb_checkpoints/
|
| 19 |
+
*.ipynb
|
| 20 |
+
|
| 21 |
+
# ---- Model & Data Artifacts ----
|
| 22 |
+
*.pth
|
| 23 |
+
*.pt
|
| 24 |
+
*.h5
|
| 25 |
+
*.ckpt
|
| 26 |
+
*.onnx
|
| 27 |
+
*.joblib
|
| 28 |
+
*.pkl
|
| 29 |
+
|
| 30 |
+
# ---- Hugging Face Cache ----
|
| 31 |
+
~/.cache/huggingface/
|
| 32 |
+
huggingface_cache/
|
| 33 |
+
|
| 34 |
+
# ---- Logs and Dumps ----
|
| 35 |
+
*.log
|
| 36 |
+
*.out
|
| 37 |
+
*.err
|
| 38 |
|
| 39 |
+
# ---- Build Artifacts ----
|
|
|
|
| 40 |
build/
|
|
|
|
| 41 |
dist/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
*.egg-info/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
# ---- System Files ----
|
| 45 |
+
.DS_Store
|
| 46 |
+
Thumbs.db
|
|
|
|
|
|
|
| 47 |
|
| 48 |
+
# ---- Environment Configs ----
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
.env
|
| 50 |
+
.env.*
|
| 51 |
+
|
| 52 |
+
# ---- Project-specific ----
|
| 53 |
+
Ai-Text-Detector/
|
| 54 |
+
HuggingFace/model/
|
| 55 |
+
|
| 56 |
+
# ---- Node Projects (if applicable) ----
|
| 57 |
+
node_modules/
|
| 58 |
+
model/
|
| 59 |
+
models/.gitattributes #<-- This line can stay if you only want to ignore that file, not the whole folder
|
| 60 |
+
|
| 61 |
+
todo.md
|
| 62 |
+
np_text_model
|
| 63 |
+
IMG_Models
|
| 64 |
+
notebooks
|
| 65 |
+
# Ignore model and tokenizer files
|
| 66 |
+
np_text_model/classifier/sentencepiece.bpe.model
|
| 67 |
+
np_text_model/classifier/tokenizer.json
|
| 68 |
+
|
| 69 |
+
# vector database
|
| 70 |
+
chroma_data
|
| 71 |
+
chroma_database
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM python:3.10
|
| 5 |
+
|
| 6 |
+
# Create user first
|
| 7 |
+
RUN useradd -m -u 1000 user
|
| 8 |
+
|
| 9 |
+
# Install system dependencies (requires root)
|
| 10 |
+
RUN apt-get update && apt-get install -y libgl1
|
| 11 |
+
|
| 12 |
+
# Switch to non-root user
|
| 13 |
+
USER user
|
| 14 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 15 |
+
|
| 16 |
+
# Add TensorFlow environment variables to reduce logging noise
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 20 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 21 |
+
RUN python -m spacy download en_core_web_sm || echo "Failed to download model"
|
| 22 |
+
|
| 23 |
+
COPY --chown=user . /app
|
| 24 |
+
|
| 25 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 26 |
+
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}
|
README.md
CHANGED
|
@@ -1 +1,22 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Testing AI Contain
|
| 3 |
+
emoji: 🤖
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
sdk_version: "latest"
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Testing AI Contain
|
| 13 |
+
|
| 14 |
+
This Hugging Face Space uses **Docker** to run a custom environment for AI content detection.
|
| 15 |
+
|
| 16 |
+
## How to run locally
|
| 17 |
+
|
| 18 |
+
```bash
|
| 19 |
+
docker build -t testing-ai-contain .
|
| 20 |
+
docker run -p 7860:7860 testing-ai-contain
|
| 21 |
+
|
| 22 |
+
```
|
READMEs.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI-Contain-Checker
|
| 2 |
+
|
| 3 |
+
A modular AI content detection system with support for **image classification**, **image edit detection**, **Nepali text classification**, and **general text classification**. Built for performance and extensibility, it is ideal for detecting AI-generated content in both visual and textual forms.
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
## 🌟 Features
|
| 7 |
+
|
| 8 |
+
### 🖼️ Image Classifier
|
| 9 |
+
|
| 10 |
+
* **Purpose**: Classifies whether an image is AI-generated or a real-life photo.
|
| 11 |
+
* **Model**: Fine-tuned **InceptionV3** CNN.
|
| 12 |
+
* **Dataset**: Custom curated dataset with **\~79,950 images** for binary classification.
|
| 13 |
+
* **Location**: [`features/image_classifier`](features/image_classifier)
|
| 14 |
+
* **Docs**: [`docs/features/image_classifier.md`](docs/features/image_classifier.md)
|
| 15 |
+
|
| 16 |
+
### 🖌️ Image Edit Detector
|
| 17 |
+
|
| 18 |
+
* **Purpose**: Detects image tampering or post-processing.
|
| 19 |
+
* **Techniques Used**:
|
| 20 |
+
|
| 21 |
+
* **Error Level Analysis (ELA)**: Visualizes compression artifacts.
|
| 22 |
+
* **Fast Fourier Transform (FFT)**: Detects unnatural frequency patterns.
|
| 23 |
+
* **Location**: [`features/image_edit_detector`](features/image_edit_detector)
|
| 24 |
+
* **Docs**:
|
| 25 |
+
|
| 26 |
+
* [ELA](docs/detector/ELA.md)
|
| 27 |
+
* [FFT](docs/detector/fft.md )
|
| 28 |
+
* [Metadata Analysis](docs/detector/meta.md)
|
| 29 |
+
* [Backend Notes](docs/detector/note-for-backend.md)
|
| 30 |
+
|
| 31 |
+
### 📝 Nepali Text Classifier
|
| 32 |
+
|
| 33 |
+
* **Purpose**: Determines if Nepali text content is AI-generated or written by a human.
|
| 34 |
+
* **Model**: Based on `XLMRClassifier` fine-tuned on Nepali language data.
|
| 35 |
+
* **Dataset**: Scraped dataset of **\~18,000** Nepali texts.
|
| 36 |
+
* **Location**: [`features/nepali_text_classifier`](features/nepali_text_classifier)
|
| 37 |
+
* **Docs**: [`docs/features/nepali_text_classifier.md`](docs/features/nepali_text_classifier.md)
|
| 38 |
+
|
| 39 |
+
### 🌐 English Text Classifier
|
| 40 |
+
|
| 41 |
+
* **Purpose**: Detects if English text is AI-generated or human-written.
|
| 42 |
+
* **Pipeline**:
|
| 43 |
+
|
| 44 |
+
* Uses **GPT2 tokenizer** for input preprocessing.
|
| 45 |
+
* Custom binary classifier to differentiate between AI and human-written content.
|
| 46 |
+
* **Location**: [`features/text_classifier`](features/text_classifier)
|
| 47 |
+
* **Docs**: [`docs/features/text_classifier.md`](docs/features/text_classifier.md)
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
## 🗂️ Project Structure
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
AI-Checker/
|
| 55 |
+
│
|
| 56 |
+
├── app.py # Main FastAPI entry point
|
| 57 |
+
├── config.py # Configuration settings
|
| 58 |
+
├── Dockerfile # Docker build script
|
| 59 |
+
├── Procfile # Deployment file for Heroku or similar
|
| 60 |
+
├── requirements.txt # Python dependencies
|
| 61 |
+
├── README.md # You are here 📘
|
| 62 |
+
│
|
| 63 |
+
├── features/ # Core detection modules
|
| 64 |
+
│ ├── image_classifier/
|
| 65 |
+
│ ├── image_edit_detector/
|
| 66 |
+
│ ├── nepali_text_classifier/
|
| 67 |
+
│ └── text_classifier/
|
| 68 |
+
│
|
| 69 |
+
├── docs/ # Internal and API documentation
|
| 70 |
+
│ ├── api_endpoints.md
|
| 71 |
+
│ ├── deployment.md
|
| 72 |
+
│ ├── detector/
|
| 73 |
+
│ │ ├── ELA.md
|
| 74 |
+
│ │ ├── fft.md
|
| 75 |
+
│ │ ├── meta.md
|
| 76 |
+
│ │ └── note-for-backend.md
|
| 77 |
+
│ ├── functions.md
|
| 78 |
+
│ ├── nestjs_integration.md
|
| 79 |
+
│ ├── security.md
|
| 80 |
+
│ ├── setup.md
|
| 81 |
+
│ └── structure.md
|
| 82 |
+
│
|
| 83 |
+
├── IMG_Models/ # Saved image classifier model(s)
|
| 84 |
+
│ └── latest-my_cnn_model.h5
|
| 85 |
+
│
|
| 86 |
+
├── notebooks/ # Experimental and debug notebooks
|
| 87 |
+
├── static/ # Static assets if needed
|
| 88 |
+
└── test.md # Test notes
|
| 89 |
+
````
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## 📚 Documentation Links
|
| 94 |
+
|
| 95 |
+
* [API Endpoints](docs/api_endpoints.md)
|
| 96 |
+
* [Deployment Guide](docs/deployment.md)
|
| 97 |
+
* [Detector Documentation](docs/detector/)
|
| 98 |
+
|
| 99 |
+
* [Error Level Analysis (ELA)](docs/detector/ELA.md)
|
| 100 |
+
* [Fast Fourier Transform (FFT)](docs/detector/fft.md)
|
| 101 |
+
* [Metadata Analysis](docs/detector/meta.md)
|
| 102 |
+
* [Backend Notes](docs/detector/note-for-backend.md)
|
| 103 |
+
* [Functions Overview](docs/functions.md)
|
| 104 |
+
* [NestJS Integration Guide](docs/nestjs_integration.md)
|
| 105 |
+
* [Security Details](docs/security.md)
|
| 106 |
+
* [Setup Instructions](docs/setup.md)
|
| 107 |
+
* [Project Structure](docs/structure.md)
|
| 108 |
+
|
| 109 |
+
---
|
| 110 |
+
|
| 111 |
+
## 🚀 Usage
|
| 112 |
+
|
| 113 |
+
1. **Install dependencies**
|
| 114 |
+
|
| 115 |
+
```bash
|
| 116 |
+
pip install -r requirements.txt
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
2. **Run the API**
|
| 120 |
+
|
| 121 |
+
```bash
|
| 122 |
+
uvicorn app:app --reload
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
3. **Build Docker (optional)**
|
| 126 |
+
|
| 127 |
+
```bash
|
| 128 |
+
docker build -t ai-contain-checker .
|
| 129 |
+
docker run -p 8000:8000 ai-contain-checker
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
---
|
| 133 |
+
|
| 134 |
+
## 🔐 Security & Integration
|
| 135 |
+
|
| 136 |
+
* **Token Authentication** and **IP Whitelisting** supported.
|
| 137 |
+
* NestJS integration guide: [`docs/nestjs_integration.md`](docs/nestjs_integration.md)
|
| 138 |
+
* Rate limiting handled using `slowapi`.
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## 🛡️ Future Plans
|
| 143 |
+
|
| 144 |
+
* Add **video classifier** module.
|
| 145 |
+
* Expand dataset for **multilingual** AI content detection.
|
| 146 |
+
* Add **fine-tuning UI** for models.
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## 📄 License
|
| 151 |
+
|
| 152 |
+
See full license terms here: [`LICENSE.md`](license.md)
|
__init__.py
ADDED
|
File without changes
|
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Request
|
| 2 |
+
from slowapi import Limiter, _rate_limit_exceeded_handler
|
| 3 |
+
from fastapi.responses import FileResponse
|
| 4 |
+
from slowapi.middleware import SlowAPIMiddleware
|
| 5 |
+
from slowapi.errors import RateLimitExceeded
|
| 6 |
+
from slowapi.util import get_remote_address
|
| 7 |
+
from fastapi.responses import JSONResponse
|
| 8 |
+
from features.text_classifier.routes import router as text_classifier_router
|
| 9 |
+
from features.nepali_text_classifier.routes import (
|
| 10 |
+
router as nepali_text_classifier_router,
|
| 11 |
+
)
|
| 12 |
+
from features.image_classifier.routes import router as image_classifier_router
|
| 13 |
+
from features.image_edit_detector.routes import router as image_edit_detector_router
|
| 14 |
+
from fastapi.staticfiles import StaticFiles
|
| 15 |
+
|
| 16 |
+
from config import ACCESS_RATE
|
| 17 |
+
|
| 18 |
+
import requests
|
| 19 |
+
|
| 20 |
+
limiter = Limiter(key_func=get_remote_address, default_limits=[ACCESS_RATE])
|
| 21 |
+
|
| 22 |
+
app = FastAPI()
|
| 23 |
+
# added the robots.txt
|
| 24 |
+
# Set up SlowAPI
|
| 25 |
+
app.state.limiter = limiter
|
| 26 |
+
app.add_exception_handler(
|
| 27 |
+
RateLimitExceeded,
|
| 28 |
+
lambda request, exc: JSONResponse(
|
| 29 |
+
status_code=429,
|
| 30 |
+
content={
|
| 31 |
+
"status_code": 429,
|
| 32 |
+
"error": "Rate limit exceeded",
|
| 33 |
+
"message": "Too many requests. Chill for a bit and try again",
|
| 34 |
+
},
|
| 35 |
+
),
|
| 36 |
+
)
|
| 37 |
+
app.add_middleware(SlowAPIMiddleware)
|
| 38 |
+
|
| 39 |
+
# Include your routes
|
| 40 |
+
app.include_router(text_classifier_router, prefix="/text")
|
| 41 |
+
app.include_router(nepali_text_classifier_router, prefix="/NP")
|
| 42 |
+
app.include_router(image_classifier_router, prefix="/AI-image")
|
| 43 |
+
app.include_router(image_edit_detector_router, prefix="/detect")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@app.get("/")
|
| 47 |
+
@limiter.limit(ACCESS_RATE)
|
| 48 |
+
async def root(request: Request):
|
| 49 |
+
return {
|
| 50 |
+
"message": "API is working",
|
| 51 |
+
"endpoints": [
|
| 52 |
+
"/text/analyse",
|
| 53 |
+
"/text/upload",
|
| 54 |
+
"/text/analyse-sentences",
|
| 55 |
+
"/text/analyse-sentance-file",
|
| 56 |
+
"/NP/analyse",
|
| 57 |
+
"/NP/upload",
|
| 58 |
+
"/NP/analyse-sentences",
|
| 59 |
+
"/NP/file-sentences-analyse",
|
| 60 |
+
"/AI-image/analyse",
|
| 61 |
+
],
|
| 62 |
+
}
|
config.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ACCESS_RATE = "20/minute"
|
| 2 |
+
|
docs/api_endpoints.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🧩 API Endpoints
|
| 2 |
+
|
| 3 |
+
### English (GPT-2) - `/text/`
|
| 4 |
+
|
| 5 |
+
| Endpoint | Method | Description |
|
| 6 |
+
| ----------------------------- | ------ | -------------------------------------- |
|
| 7 |
+
| `/text/analyse` | POST | Classify raw English text |
|
| 8 |
+
| `/text/analyse-sentences` | POST | Sentence-by-sentence breakdown |
|
| 9 |
+
| `/text/analyse-sentance-file` | POST | Upload file, per-sentence breakdown |
|
| 10 |
+
| `/text/upload` | POST | Upload file for overall classification |
|
| 11 |
+
| `/text/health` | GET | Health check |
|
| 12 |
+
|
| 13 |
+
#### Example: Classify English text
|
| 14 |
+
|
| 15 |
+
```bash
|
| 16 |
+
curl -X POST http://localhost:8000/text/analyse \
|
| 17 |
+
-H "Authorization: Bearer <SECRET_TOKEN>" \
|
| 18 |
+
-H "Content-Type: application/json" \
|
| 19 |
+
-d '{"text": "This is a sample text for analysis."}'
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
**Response:**
|
| 23 |
+
|
| 24 |
+
```json
|
| 25 |
+
{
|
| 26 |
+
"result": "AI-generated",
|
| 27 |
+
"perplexity": 55.67,
|
| 28 |
+
"ai_likelihood": 66.6
|
| 29 |
+
}
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
#### Example: File upload
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
curl -X POST http://localhost:8000/text/upload \
|
| 36 |
+
-H "Authorization: Bearer <SECRET_TOKEN>" \
|
| 37 |
+
-F 'file=@yourfile.txt;type=text/plain'
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
### Nepali (SentencePiece) - `/NP/`
|
| 43 |
+
|
| 44 |
+
| Endpoint | Method | Description |
|
| 45 |
+
| ---------------------------- | ------ | ------------------------------------ |
|
| 46 |
+
| `/NP/analyse` | POST | Classify Nepali text |
|
| 47 |
+
| `/NP/analyse-sentences` | POST | Sentence-by-sentence breakdown |
|
| 48 |
+
| `/NP/upload` | POST | Upload Nepali PDF for classification |
|
| 49 |
+
| `/NP/file-sentences-analyse` | POST | PDF upload, per-sentence breakdown |
|
| 50 |
+
| `/NP/health` | GET | Health check |
|
| 51 |
+
|
| 52 |
+
#### Example: Nepali text classification
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
curl -X POST http://localhost:8000/NP/analyse \
|
| 56 |
+
-H "Authorization: Bearer <SECRET_TOKEN>" \
|
| 57 |
+
-H "Content-Type: application/json" \
|
| 58 |
+
-d '{"text": "यो उदाहरण वाक्य हो।"}'
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
**Response:**
|
| 62 |
+
|
| 63 |
+
```json
|
| 64 |
+
{
|
| 65 |
+
"label": "Human",
|
| 66 |
+
"confidence": 98.6
|
| 67 |
+
}
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
#### Example: Nepali PDF upload
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
curl -X POST http://localhost:8000/NP/upload \
|
| 74 |
+
-H "Authorization: Bearer <SECRET_TOKEN>" \
|
| 75 |
+
-F 'file=@NepaliText.pdf;type=application/pdf'
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### Image-Classification -`/verify-image/`
|
| 79 |
+
|
| 80 |
+
| Endpoint | Method | Description |
|
| 81 |
+
| ----------------------- | ------ | ----------------------- |
|
| 82 |
+
| `/verify-image/analyse` | POST | Classify Image using ML |
|
| 83 |
+
|
| 84 |
+
#### Example: Image-Classification
|
| 85 |
+
|
| 86 |
+
```bash
|
| 87 |
+
curl -X POST http://localhost:8000/verify-image/analyse \
|
| 88 |
+
-H "Authorization: Bearer <SECRET_TOKEN>" \
|
| 89 |
+
-F 'file=@test1.png'
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
[🔙 Back to Main README](../README.md)
|
docs/deployment.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Deployment
|
| 3 |
+
|
| 4 |
+
This project is containerized and deployed on **Hugging Face Spaces** using a custom `Dockerfile`. This guide explains the structure of the Dockerfile and key considerations for deploying FastAPI apps on Spaces with Docker SDK.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## 📦 Base Image
|
| 9 |
+
|
| 10 |
+
```dockerfile
|
| 11 |
+
FROM python:3.9
|
| 12 |
+
````
|
| 13 |
+
|
| 14 |
+
We use the official Python 3.9 image for compatibility and stability across most Python libraries and tools.
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## 👤 Create a Non-Root User
|
| 19 |
+
|
| 20 |
+
```dockerfile
|
| 21 |
+
RUN useradd -m -u 1000 user
|
| 22 |
+
USER user
|
| 23 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
* Hugging Face Spaces **requires** that containers run as a non-root user with UID `1000`.
|
| 27 |
+
* We also prepend the user's local binary path to `PATH` for Python package accessibility.
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## 🗂️ Set Working Directory
|
| 32 |
+
|
| 33 |
+
```dockerfile
|
| 34 |
+
WORKDIR /app
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
All application files will reside under `/app` for consistency and clarity.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## 📋 Install Dependencies
|
| 42 |
+
|
| 43 |
+
```dockerfile
|
| 44 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 45 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
* Copies the dependency list with correct file ownership.
|
| 49 |
+
* Uses `--no-cache-dir` to reduce image size.
|
| 50 |
+
* Ensures the latest compatible versions are installed.
|
| 51 |
+
|
| 52 |
+
---
|
| 53 |
+
|
| 54 |
+
## 🔡 Download Language Model (Optional)
|
| 55 |
+
|
| 56 |
+
```dockerfile
|
| 57 |
+
RUN python -m spacy download en_core_web_sm || echo "Failed to download model"
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
* Downloads the small English NLP model required by SpaCy.
|
| 61 |
+
* Uses `|| echo ...` to prevent build failure if the download fails (optional safeguard).
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
|
| 65 |
+
## 📁 Copy Project Files
|
| 66 |
+
|
| 67 |
+
```dockerfile
|
| 68 |
+
COPY --chown=user . /app
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Copies the entire project source into the container, setting correct ownership for Hugging Face's user-based execution.
|
| 72 |
+
|
| 73 |
+
---
|
| 74 |
+
|
| 75 |
+
## 🌐 Start the FastAPI Server
|
| 76 |
+
|
| 77 |
+
```dockerfile
|
| 78 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
* Launches the FastAPI app using `uvicorn`.
|
| 82 |
+
* **Port 7860 is mandatory** for Docker-based Hugging Face Spaces deployments.
|
| 83 |
+
* `app:app` refers to the `FastAPI()` instance in `app.py`.
|
| 84 |
+
|
| 85 |
+
---
|
| 86 |
+
|
| 87 |
+
## ✅ Deployment Checklist
|
| 88 |
+
|
| 89 |
+
* [x] Ensure your main file is named `app.py` or adjust `CMD` accordingly.
|
| 90 |
+
* [x] All dependencies should be listed in `requirements.txt`.
|
| 91 |
+
* [x] If using models like SpaCy, verify they are downloaded or bundled.
|
| 92 |
+
* [x] Test your Dockerfile locally with `docker build` before pushing to Hugging Face.
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## 📚 References
|
| 97 |
+
|
| 98 |
+
* Hugging Face Docs: [Spaces Docker SDK](https://huggingface.co/docs/hub/spaces-sdks-docker)
|
| 99 |
+
* Uvicorn Docs: [https://www.uvicorn.org/](https://www.uvicorn.org/)
|
| 100 |
+
* SpaCy Models: [https://spacy.io/models](https://spacy.io/models)
|
| 101 |
+
|
| 102 |
+
---
|
| 103 |
+
|
| 104 |
+
Happy deploying!
|
| 105 |
+
**P.S.** Try not to break stuff. 😅
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
[🔙 Back to Main README](../README.md)
|
docs/detector/ELA.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Error Level Analysis (ELA) Detector
|
| 2 |
+
|
| 3 |
+
This module provides a function to perform Error Level Analysis (ELA) on images to detect potential manipulations or edits.
|
| 4 |
+
|
| 5 |
+
## Function: `run_ela`
|
| 6 |
+
|
| 7 |
+
```python
|
| 8 |
+
def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool:
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
### Description
|
| 12 |
+
|
| 13 |
+
Error Level Analysis (ELA) works by recompressing an image at a specified JPEG quality level and comparing it to the original image. Differences between the two images reveal areas with inconsistent compression artifacts — often indicating image manipulation.
|
| 14 |
+
|
| 15 |
+
The function computes the maximum pixel difference across all color channels and uses a threshold to determine if the image is likely edited.
|
| 16 |
+
|
| 17 |
+
### Parameters
|
| 18 |
+
|
| 19 |
+
| Parameter | Type | Default | Description |
|
| 20 |
+
| ----------- | ----------- | ------- | ------------------------------------------------------------------------------------------- |
|
| 21 |
+
| `image` | `PIL.Image` | N/A | Input image in RGB mode to analyze. |
|
| 22 |
+
| `quality` | `int` | 90 | JPEG compression quality used for recompression during analysis (lower = more compression). |
|
| 23 |
+
| `threshold` | `int` | 15 | Pixel difference threshold to flag the image as edited. |
|
| 24 |
+
|
| 25 |
+
### Returns
|
| 26 |
+
|
| 27 |
+
`bool`
|
| 28 |
+
|
| 29 |
+
- `True` if the image is likely edited (max pixel difference > threshold).
|
| 30 |
+
- `False` if the image appears unedited.
|
| 31 |
+
|
| 32 |
+
### Usage Example
|
| 33 |
+
|
| 34 |
+
```python
|
| 35 |
+
from PIL import Image
|
| 36 |
+
from detectors.ela import run_ela
|
| 37 |
+
|
| 38 |
+
# Open and convert image to RGB
|
| 39 |
+
img = Image.open("example.jpg").convert("RGB")
|
| 40 |
+
|
| 41 |
+
# Run ELA detection
|
| 42 |
+
is_edited = run_ela(img, quality=90, threshold=15)
|
| 43 |
+
|
| 44 |
+
print("Image edited:", is_edited)
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### Notes
|
| 48 |
+
|
| 49 |
+
- The input image **must** be in RGB mode for accurate analysis.
|
| 50 |
+
- ELA is a heuristic technique; combining it with other detection methods increases reliability.
|
| 51 |
+
- Visualizing the enhanced difference image can help identify edited regions (not returned by this function but possible to add).
|
| 52 |
+
|
| 53 |
+
### Installation
|
| 54 |
+
|
| 55 |
+
Make sure you have Pillow installed:
|
| 56 |
+
|
| 57 |
+
```bash
|
| 58 |
+
pip install pillow
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### Running Locally
|
| 62 |
+
|
| 63 |
+
Just put the function in a notebook or script file and run it with your image. It works well for basic images.
|
| 64 |
+
|
| 65 |
+
[🔙 Back to Main README](../README.md)
|
docs/detector/ai_human_image_checker.md
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Real vs. Fake Image Classification for Production Pipeline
|
| 2 |
+
==========================================================
|
| 3 |
+
|
| 4 |
+
1\. Business Problem
|
| 5 |
+
--------------------
|
| 6 |
+
|
| 7 |
+
This project addresses the critical business need to automatically identify and flag manipulated or synthetically generated images. By accurately classifying images as **"real"** or **"fake,"** we can enhance the integrity of our platform, prevent the spread of misinformation, and protect our users from fraudulent content. This solution is designed for integration into our production pipeline to process images in real-time.
|
| 8 |
+
|
| 9 |
+
2\. Solution Overview
|
| 10 |
+
---------------------
|
| 11 |
+
|
| 12 |
+
This solution leverages OpenAI's CLIP (Contrastive Language-Image Pre-Training) model to differentiate between real and fake images. The system operates as follows:
|
| 13 |
+
|
| 14 |
+
1. **Feature Extraction:** A pre-trained CLIP model ('ViT-L/14') converts input images into 768-dimensional feature vectors.
|
| 15 |
+
|
| 16 |
+
2. **Classification:** A Support Vector Machine (SVM) model, trained on our internal dataset of real and fake images, classifies the feature vectors.
|
| 17 |
+
|
| 18 |
+
3. **Deployment:** The trained model is deployed as a service that can be integrated into our production image processing pipeline.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
The model has achieved an accuracy of **98.29%** on our internal test set, demonstrating its effectiveness in distinguishing between real and fake images.
|
| 22 |
+
|
| 23 |
+
3\. Getting Started
|
| 24 |
+
-------------------
|
| 25 |
+
|
| 26 |
+
### 3.1. Dependencies
|
| 27 |
+
|
| 28 |
+
To ensure a reproducible environment, all dependencies are listed in the requirements.txt file. Install them using pip:
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
pip install -r requirements.txt
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
**requirements.txt**:
|
| 35 |
+
- numpy
|
| 36 |
+
- Pillow
|
| 37 |
+
- torch
|
| 38 |
+
- clip-by-openai
|
| 39 |
+
- scikit-learn
|
| 40 |
+
- tqdm
|
| 41 |
+
- seaborn
|
| 42 |
+
- matplotlib
|
| 43 |
+
|
| 44 |
+
### 3.2. Data Preparation
|
| 45 |
+
|
| 46 |
+
The model was trained on a dataset of real and fake images obtained form kaggle the dataset link is https://www.kaggle.com/datasets/tristanzhang32/ai-generated-images-vs-real-images/data$0.
|
| 47 |
+
|
| 48 |
+
### 3.3. Usage
|
| 49 |
+
|
| 50 |
+
#### 3.3.1. Feature Extraction
|
| 51 |
+
|
| 52 |
+
To extract features from a new dataset, run the following command:
|
| 53 |
+
|
| 54 |
+
```
|
| 55 |
+
python extract_features.py --data_dir /path/to/your/data --output_file features.npz
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
#### 3.3.2. Model Training
|
| 59 |
+
|
| 60 |
+
To retrain the SVM model on a new set of extracted features, run:
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
python train_model.py --features_file features.npz --model_output_path model.joblib
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
#### 3.3.3. Inference
|
| 67 |
+
|
| 68 |
+
To classify a single image using the trained model, use the provided inference script:
|
| 69 |
+
```
|
| 70 |
+
python classify.py --image_path /path/to/your/image.jpg --model_path model.joblib
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
4\. Production Deployment
|
| 74 |
+
-------------------------
|
| 75 |
+
|
| 76 |
+
The image classification model is deployed as a microservice. The service exposes an API endpoint that accepts an image and returns a classification result ("real" or "fake").
|
| 77 |
+
|
| 78 |
+
### 4.1. API Specification
|
| 79 |
+
|
| 80 |
+
* **Endpoint:** /classify
|
| 81 |
+
|
| 82 |
+
* **Method:** POST
|
| 83 |
+
|
| 84 |
+
* **Request Body:** multipart/form-data with a single field image.
|
| 85 |
+
|
| 86 |
+
* **Response:**
|
| 87 |
+
|
| 88 |
+
* JSON{ "classification": "real", "confidence": 0.95}
|
| 89 |
+
|
| 90 |
+
* JSON{ "error": "Error message"}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
### 4.2. Scalability and Monitoring
|
| 94 |
+
|
| 95 |
+
The service is deployed in a containerized environment (e.g., Docker) and managed by an orchestrator (e.g., Kubernetes) to ensure scalability and high availability. Monitoring and logging are in place to track model performance, API latency, and error rates.
|
| 96 |
+
|
| 97 |
+
5\. Model Versioning
|
| 98 |
+
--------------------
|
| 99 |
+
|
| 100 |
+
We use a combination of Git for code versioning and a model registry for tracking trained model artifacts. Each model is versioned and associated with the commit hash of the code that produced it. The current production model is **v1.2.0**.
|
| 101 |
+
|
| 102 |
+
6\. Testing
|
| 103 |
+
-----------
|
| 104 |
+
|
| 105 |
+
The project includes a suite of tests to ensure correctness and reliability:
|
| 106 |
+
|
| 107 |
+
* **Unit tests:** To verify individual functions and components.
|
| 108 |
+
|
| 109 |
+
* **Integration tests:** To test the interaction between different parts of the system.
|
| 110 |
+
|
| 111 |
+
* **Model evaluation tests:** To continuously monitor model performance on a golden dataset.
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
To run the tests, execute:
|
| 115 |
+
```
|
| 116 |
+
pytest
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
7\. Future Work
|
| 120 |
+
---------------
|
| 121 |
+
|
| 122 |
+
* **Explore more advanced classifiers:** Investigate the use of neural network-based classifiers on top of CLIP features.
|
| 123 |
+
|
| 124 |
+
* **Fine-tune the CLIP model:** For even better performance, we can fine-tune the CLIP model on our specific domain of images.
|
| 125 |
+
|
| 126 |
+
* **Expand the training dataset:** Continuously augment the training data with new examples of real and fake images to improve the model's robustness.
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
8\. Contact/Support
|
| 130 |
+
-------------------
|
| 131 |
+
|
| 132 |
+
For any questions or issues regarding this project, please contact the Machine Learning team at [your-team-email@yourcompany.com](mailto:your-team-email@yourcompany.com) .
|
docs/detector/fft.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Fast Fourier Transform (FFT) Detector
|
| 3 |
+
|
| 4 |
+
```python
|
| 5 |
+
def run_fft(image: Image.Image, threshold: float = 0.92) -> bool:
|
| 6 |
+
```
|
| 7 |
+
|
| 8 |
+
## **Overview**
|
| 9 |
+
|
| 10 |
+
The `run_fft` function performs a frequency domain analysis on an image using the **Fast Fourier Transform (FFT)** to detect possible **AI generation or digital manipulation**. It leverages the fact that artificially generated or heavily edited images often exhibit a distinct high-frequency pattern.
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## **Parameters**
|
| 15 |
+
|
| 16 |
+
| Parameter | Type | Description |
|
| 17 |
+
| ----------- | ----------------- | --------------------------------------------------------------------------------------- |
|
| 18 |
+
| `image` | `PIL.Image.Image` | Input image to analyze. It will be converted to grayscale and resized. |
|
| 19 |
+
| `threshold` | `float` | Proportion threshold of high-frequency components to flag the image. Default is `0.92`. |
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## **Returns**
|
| 24 |
+
|
| 25 |
+
| Type | Description |
|
| 26 |
+
| ------ | ---------------------------------------------------------------------- |
|
| 27 |
+
| `bool` | `True` if image is likely AI-generated/manipulated; otherwise `False`. |
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
## **Step-by-Step Explanation**
|
| 32 |
+
|
| 33 |
+
### 1. **Grayscale Conversion**
|
| 34 |
+
|
| 35 |
+
All images are converted to grayscale:
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
gray_image = image.convert("L")
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### 2. **Resize**
|
| 42 |
+
|
| 43 |
+
The image is resized to a fixed $512 \times 512$ for uniformity:
|
| 44 |
+
|
| 45 |
+
```python
|
| 46 |
+
resized_image = gray_image.resize((512, 512))
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### 3. **FFT Calculation**
|
| 50 |
+
|
| 51 |
+
Compute the 2D Discrete Fourier Transform:
|
| 52 |
+
|
| 53 |
+
$$
|
| 54 |
+
F(u, v) = \sum_{x=0}^{M-1} \sum_{y=0}^{N-1} f(x, y) \cdot e^{-2\pi i \left( \frac{ux}{M} + \frac{vy}{N} \right)}
|
| 55 |
+
$$
|
| 56 |
+
|
| 57 |
+
```python
|
| 58 |
+
fft_result = fft2(image_array)
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### 4. **Shift Zero Frequency to Center**
|
| 62 |
+
|
| 63 |
+
Use `fftshift` to center the zero-frequency component:
|
| 64 |
+
|
| 65 |
+
```python
|
| 66 |
+
fft_shifted = fftshift(fft_result)
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
### 5. **Magnitude Spectrum**
|
| 70 |
+
|
| 71 |
+
$$
|
| 72 |
+
|F(u, v)| = \sqrt{\Re^2 + \Im^2}
|
| 73 |
+
$$
|
| 74 |
+
|
| 75 |
+
```python
|
| 76 |
+
magnitude_spectrum = np.abs(fft_shifted)
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
### 6. **Normalization**
|
| 80 |
+
|
| 81 |
+
Normalize the spectrum to avoid scale issues:
|
| 82 |
+
|
| 83 |
+
$$
|
| 84 |
+
\text{Normalized}(u,v) = \frac{|F(u,v)|}{\max(|F(u,v)|)}
|
| 85 |
+
$$
|
| 86 |
+
|
| 87 |
+
```python
|
| 88 |
+
normalized_spectrum = magnitude_spectrum / max_magnitude
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### 7. **High-Frequency Detection**
|
| 92 |
+
|
| 93 |
+
High-frequency components are defined as:
|
| 94 |
+
|
| 95 |
+
$$
|
| 96 |
+
\text{Mask}(u,v) =
|
| 97 |
+
\begin{cases}
|
| 98 |
+
1 & \text{if } \text{Normalized}(u,v) > 0.5 \\
|
| 99 |
+
0 & \text{otherwise}
|
| 100 |
+
\end{cases}
|
| 101 |
+
$$
|
| 102 |
+
|
| 103 |
+
```python
|
| 104 |
+
high_freq_mask = normalized_spectrum > 0.5
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### 8. **Proportion Calculation**
|
| 108 |
+
|
| 109 |
+
$$
|
| 110 |
+
\text{Ratio} = \frac{\sum \text{Mask}}{\text{Total pixels}}
|
| 111 |
+
$$
|
| 112 |
+
|
| 113 |
+
```python
|
| 114 |
+
high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
### 9. **Threshold Decision**
|
| 118 |
+
|
| 119 |
+
If the ratio exceeds the threshold:
|
| 120 |
+
|
| 121 |
+
$$
|
| 122 |
+
\text{is\_fake} = (\text{Ratio} > \text{Threshold})
|
| 123 |
+
$$
|
| 124 |
+
|
| 125 |
+
```python
|
| 126 |
+
is_fake = high_freq_ratio > threshold
|
| 127 |
+
```
|
| 128 |
+
|
| 129 |
+
it is implemented in the api
|
| 130 |
+
|
| 131 |
+
### Running Locally
|
| 132 |
+
|
| 133 |
+
Just put the function in a notebook or script file and run it with your image. It works well for basic images.
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
[🔙 Back to Main README](../README.md)
|
docs/detector/meta.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Metadata Analysis for Image Edit Detection
|
| 2 |
+
|
| 3 |
+
This module inspects image metadata to detect possible signs of AI-generation or post-processing edits.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
- Many AI-generated images and edited images leave identifiable traces in their metadata.
|
| 8 |
+
- This detector scans image EXIF metadata and raw bytes for known AI generation indicators and common photo editing software signatures.
|
| 9 |
+
- It classifies images as `"ai_generated"`, `"edited"`, or `"undetermined"` based on detected markers.
|
| 10 |
+
- Handles invalid image formats gracefully by reporting errors.
|
| 11 |
+
|
| 12 |
+
## How It Works
|
| 13 |
+
|
| 14 |
+
- Opens the image from raw bytes using the Python Pillow library (`PIL`).
|
| 15 |
+
- Reads EXIF metadata and specifically looks for the "Software" tag that often contains the editing app name.
|
| 16 |
+
- Checks for common image editors such as Photoshop, GIMP, Snapseed, etc.
|
| 17 |
+
- Scans the entire raw byte content of the image for embedded AI generation identifiers like "midjourney", "stable-diffusion", "openai", etc.
|
| 18 |
+
- Returns a status string indicating the metadata classification.
|
| 19 |
+
|
| 20 |
+
[🔙 Back to Main README](../README.md)
|
docs/detector/note-for-backend.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# 📦API integration note
|
| 3 |
+
|
| 4 |
+
## Overview
|
| 5 |
+
|
| 6 |
+
This system integrates **three image forensics methods**—**ELA**, **FFT**, and **Metadata analysis**—into a single detection pipeline to determine whether an image is AI-generated, manipulated, or authentic.
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## 🔍 Detection Modules
|
| 11 |
+
|
| 12 |
+
### 1. **ELA (Error Level Analysis)**
|
| 13 |
+
|
| 14 |
+
* **Purpose:** Detects tampering or editing by analyzing compression error levels.
|
| 15 |
+
* **Accuracy:** ✅ *Most accurate method*
|
| 16 |
+
* **Performance:** ❗ *Slowest method*
|
| 17 |
+
* **Output:** `True` (edited) or `False` (authentic)
|
| 18 |
+
|
| 19 |
+
### 2. **FFT (Fast Fourier Transform)**
|
| 20 |
+
|
| 21 |
+
* **Purpose:** Identifies high-frequency patterns typical of AI-generated images.
|
| 22 |
+
* **Accuracy:** ⚠️ *Moderately accurate*
|
| 23 |
+
* **Performance:** ❗ *Moderate to slow*
|
| 24 |
+
* **Output:** `True` (likely AI-generated) or `False` (authentic)
|
| 25 |
+
|
| 26 |
+
### 3. **Metadata Analysis**
|
| 27 |
+
|
| 28 |
+
* **Purpose:** Detects traces of AI tools or editors in image metadata or binary content.
|
| 29 |
+
* **Accuracy:** ⚠️ *Fast but weaker signal*
|
| 30 |
+
* **Performance:** 🚀 *Fastest method*
|
| 31 |
+
* **Output:** One of:
|
| 32 |
+
|
| 33 |
+
* `"ai_generated"` – AI tool or generator identified
|
| 34 |
+
* `"edited"` – Edited using known software
|
| 35 |
+
* `"undetermined"` – No signature found
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## 🧩 Integration Plan
|
| 40 |
+
|
| 41 |
+
### ➕ Combine all three APIs into one unified endpoint:
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
POST /api/detect-image
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### Input:
|
| 48 |
+
|
| 49 |
+
* `image`: Image file (binary, any format supported by Pillow)
|
| 50 |
+
|
| 51 |
+
### Output:
|
| 52 |
+
|
| 53 |
+
```json
|
| 54 |
+
{
|
| 55 |
+
"ela_result": true,
|
| 56 |
+
"fft_result": false,
|
| 57 |
+
"metadata_result": "ai_generated",
|
| 58 |
+
"final_decision": "ai_generated"
|
| 59 |
+
}
|
| 60 |
+
```
|
| 61 |
+
> NOTE:Optionally recommending a default logic (e.g., trust ELA > FFT > Metadata).
|
| 62 |
+
|
| 63 |
+
## Result implementation
|
| 64 |
+
| `ela_result` | `fft_result` | `metadata_result` | Suggested Final Decision | Notes |
|
| 65 |
+
| ------------ | ------------ | ----------------- | ------------------------ | ----------------------------------------------------------------------- |
|
| 66 |
+
| `true` | `true` | `"ai_generated"` | `ai_generated` | Strong evidence from all three modules |
|
| 67 |
+
| `true` | `false` | `"edited"` | `edited` | ELA confirms editing, no AI signals |
|
| 68 |
+
| `true` | `false` | `"undetermined"` | `edited` | ELA indicates manipulation |
|
| 69 |
+
| `false` | `true` | `"ai_generated"` | `ai_generated` | No edits, but strong AI frequency & metadata signature |
|
| 70 |
+
| `false` | `true` | `"undetermined"` | `possibly_ai_generated` | Weak metadata, but FFT indicates possible AI generation |
|
| 71 |
+
| `false` | `false` | `"ai_generated"` | `ai_generated` | Metadata alone shows AI use |
|
| 72 |
+
| `false` | `false` | `"edited"` | `possibly_edited` | Weak signal—metadata shows editing but no structural or frequency signs |
|
| 73 |
+
| `false` | `false` | `"undetermined"` | `authentic` | No detectable manipulation or AI indicators |
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
### Decision Logic:
|
| 77 |
+
|
| 78 |
+
* Use **ELA** as the **primary indicator** for manipulation.
|
| 79 |
+
* Supplement with **FFT** and **Metadata** to improve reliability.
|
| 80 |
+
* Combine using a simple rule-based or voting system.
|
| 81 |
+
|
| 82 |
+
---
|
| 83 |
+
|
| 84 |
+
## ⚙️ Performance Consideration
|
| 85 |
+
|
| 86 |
+
| Method | Speed | Strength |
|
| 87 |
+
| -------- | ----------- | -------------------- |
|
| 88 |
+
| ELA | ❗ Slow | ✅ Highly accurate |
|
| 89 |
+
| FFT | ⚠️ Moderate | ⚠️ Somewhat reliable |
|
| 90 |
+
| Metadata | 🚀 Fast | ⚠️ Low confidence |
|
| 91 |
+
|
| 92 |
+
> For high-throughput systems, consider running Metadata first and conditionally applying ELA/FFT if suspicious.
|
| 93 |
+
|
| 94 |
+
[🔙 Back to Main README](../README.md)
|
docs/features/image_classifier.md
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Image Classifier
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This module classifies whether an input image is AI-generated or a real-life photograph.
|
| 6 |
+
|
| 7 |
+
## Model
|
| 8 |
+
|
| 9 |
+
- Architecture: InceptionV3
|
| 10 |
+
- Type: Binary Classifier (AI vs Real)
|
| 11 |
+
- Format: H5 model (`latest-my_cnn_model.h5`)
|
| 12 |
+
|
| 13 |
+
## Dataset
|
| 14 |
+
|
| 15 |
+
- Total images: ~79,950
|
| 16 |
+
- Balanced between real and generated images
|
| 17 |
+
- Preprocessing: Resizing, normalization
|
| 18 |
+
|
| 19 |
+
## Code Location
|
| 20 |
+
|
| 21 |
+
- Controller: `features/image_classifier/controller.py`
|
| 22 |
+
- Model Loader: `features/image_classifier/model_loader.py`
|
| 23 |
+
- Preprocessor: `features/image_classifier/preprocess.py`
|
| 24 |
+
|
| 25 |
+
## API
|
| 26 |
+
|
| 27 |
+
- Endpoint: [ENDPOINTS](../api_endpoints.md)
|
| 28 |
+
- Input: Image file (PNG/JPG)
|
| 29 |
+
- Output: JSON response with classification result and confidence
|
| 30 |
+
|
| 31 |
+
[🔙 Back to Main README](../README.md)
|
docs/features/nepali_text_classifier.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Nepali Text Classifier
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
This classifier identifies whether Nepali-language text content is written by a human or AI.
|
| 6 |
+
|
| 7 |
+
## Model
|
| 8 |
+
|
| 9 |
+
- Base Model: XLM-Roberta (XLMRClassifier)
|
| 10 |
+
- Language: Nepali (Multilingual model)
|
| 11 |
+
- Fine-tuned with scraped web content (~18,000 samples)
|
| 12 |
+
|
| 13 |
+
## Dataset
|
| 14 |
+
|
| 15 |
+
- Custom scraped dataset with manual labeling
|
| 16 |
+
- Includes news, blogs, and synthetic content from various LLMs
|
| 17 |
+
|
| 18 |
+
## Code Location
|
| 19 |
+
|
| 20 |
+
- Controller: `features/nepali_text_classifier/controller.py`
|
| 21 |
+
- Inference: `features/nepali_text_classifier/inferencer.py`
|
| 22 |
+
- Model Loader: `features/nepali_text_classifier/model_loader.py`
|
| 23 |
+
|
| 24 |
+
## API
|
| 25 |
+
|
| 26 |
+
- Endpoint: [ENDPOINTS](../api_endpoints.md)
|
| 27 |
+
- Input: Raw text
|
| 28 |
+
- Output: JSON classification with label and confidence score
|
| 29 |
+
|
| 30 |
+
[🔙 Back to Main README](../README.md)
|
docs/features/text_classifier.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# English Text Classifier
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
Detects whether English-language text is AI-generated or human-written.
|
| 6 |
+
|
| 7 |
+
## Model Pipeline
|
| 8 |
+
|
| 9 |
+
- Tokenizer: GPT-2 Tokenizer
|
| 10 |
+
- Model: Custom trained binary classifier
|
| 11 |
+
|
| 12 |
+
## Dataset
|
| 13 |
+
|
| 14 |
+
- Balanced dataset: Human vs AI-generated (ChatGPT, Claude, etc.)
|
| 15 |
+
- Tokenized and fed into the model using PyTorch/TensorFlow
|
| 16 |
+
|
| 17 |
+
## Code Location
|
| 18 |
+
|
| 19 |
+
- Controller: `features/text_classifier/controller.py`
|
| 20 |
+
- Inference: `features/text_classifier/inferencer.py`
|
| 21 |
+
- Model Loader: `features/text_classifier/model_loader.py`
|
| 22 |
+
- Preprocessor: `features/text_classifier/preprocess.py`
|
| 23 |
+
|
| 24 |
+
## API
|
| 25 |
+
|
| 26 |
+
- Endpoint: [ENDPOINTS](../api_endpoints.md)
|
| 27 |
+
- Input: Raw English text
|
| 28 |
+
- Output: Prediction result with probability/confidence
|
| 29 |
+
|
| 30 |
+
[🔙 Back to Main README](../README.md)
|
docs/functions.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Major Functions used
|
| 2 |
+
|
| 3 |
+
## in Text Classifier (`features/text_classifier/` and `features/text_classifier/`)
|
| 4 |
+
|
| 5 |
+
- **`load_model()`**
|
| 6 |
+
Loads the GPT-2 model and tokenizer from the specified directory paths.
|
| 7 |
+
|
| 8 |
+
- **`lifespan()`**
|
| 9 |
+
Manages the application lifecycle. Initializes the model at startup and handles cleanup on shutdown.
|
| 10 |
+
|
| 11 |
+
- **`classify_text_sync()`**
|
| 12 |
+
Synchronously tokenizes input text and predicts using the GPT-2 model. Returns classification and perplexity.
|
| 13 |
+
|
| 14 |
+
- **`classify_text()`**
|
| 15 |
+
Asynchronously runs `classify_text_sync()` in a thread pool for non-blocking text classification.
|
| 16 |
+
|
| 17 |
+
- **`analyze_text()`**
|
| 18 |
+
**POST** endpoint: Accepts text input, classifies it using `classify_text()`, and returns the result with perplexity.
|
| 19 |
+
|
| 20 |
+
- **`health()`**
|
| 21 |
+
**GET** endpoint: Simple health check for API liveness.
|
| 22 |
+
|
| 23 |
+
- **`parse_docx()`, `parse_pdf()`, `parse_txt()`**
|
| 24 |
+
Utilities to extract and convert `.docx`, `.pdf`, and `.txt` file contents to plain text.
|
| 25 |
+
|
| 26 |
+
- **`warmup()`**
|
| 27 |
+
Downloads the model repository and initializes the model/tokenizer using `load_model()`.
|
| 28 |
+
|
| 29 |
+
- **`download_model_repo()`**
|
| 30 |
+
Downloads the model files from the designated `MODEL` folder.
|
| 31 |
+
|
| 32 |
+
- **`get_model_tokenizer()`**
|
| 33 |
+
Checks if the model already exists; if not, downloads it—otherwise, loads the cached model.
|
| 34 |
+
|
| 35 |
+
- **`handle_file_upload()`**
|
| 36 |
+
Handles file uploads from the `/upload` route. Extracts text, classifies, and returns results.
|
| 37 |
+
|
| 38 |
+
- **`extract_file_contents()`**
|
| 39 |
+
Extracts and returns plain text from uploaded files (PDF, DOCX, TXT).
|
| 40 |
+
|
| 41 |
+
- **`handle_file_sentence()`**
|
| 42 |
+
Processes file uploads by analyzing each sentence (under 10,000 chars) before classification.
|
| 43 |
+
|
| 44 |
+
- **`handle_sentence_level_analysis()`**
|
| 45 |
+
Checks/strips each sentence, then computes AI/human likelihood for each.
|
| 46 |
+
|
| 47 |
+
- **`analyze_sentences()`**
|
| 48 |
+
Splits paragraphs into sentences, classifies each, and returns all results.
|
| 49 |
+
|
| 50 |
+
- **`analyze_sentence_file()`**
|
| 51 |
+
Like `handle_file_sentence()`—analyzes sentences in uploaded files.
|
| 52 |
+
---
|
| 53 |
+
## for image_classifier
|
| 54 |
+
|
| 55 |
+
- **`Classify_Image_router()`** – Handles image classification requests by routing and coordinating preprocessing and inference.
|
| 56 |
+
- **`classify_image()`** – Performs AI vs human image classification using the loaded model.
|
| 57 |
+
- **`load_model()`** – Loads the pretrained model from Hugging Face at server startup.
|
| 58 |
+
- **`preprocess_image()`** – Applies all required preprocessing steps to the input image.
|
| 59 |
+
|
| 60 |
+
> Note: While many functions mirror those in the text classifier, the image classifier primarily uses TensorFlow rather than PyTorch.
|
| 61 |
+
|
| 62 |
+
[🔙 Back to Main README](../README.md)
|
docs/nestjs_integration.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Nestjs + fastapi
|
| 2 |
+
|
| 3 |
+
You can easily call this API from a NestJS microservice.
|
| 4 |
+
|
| 5 |
+
**.env**
|
| 6 |
+
```env
|
| 7 |
+
FASTAPI_BASE_URL=http://localhost:8000
|
| 8 |
+
SECRET_TOKEN=your_secret_token_here
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
**fastapi.service.ts**
|
| 12 |
+
|
| 13 |
+
```typescript
|
| 14 |
+
import { Injectable } from "@nestjs/common";
|
| 15 |
+
import { HttpService } from "@nestjs/axios";
|
| 16 |
+
import { ConfigService } from "@nestjs/config";
|
| 17 |
+
import { firstValueFrom } from "rxjs";
|
| 18 |
+
|
| 19 |
+
@Injectable()
|
| 20 |
+
export class FastAPIService {
|
| 21 |
+
constructor(
|
| 22 |
+
private http: HttpService,
|
| 23 |
+
private config: ConfigService,
|
| 24 |
+
) {}
|
| 25 |
+
|
| 26 |
+
async analyzeText(text: string) {
|
| 27 |
+
const url = `${this.config.get("FASTAPI_BASE_URL")}/text/analyse`;
|
| 28 |
+
const token = this.config.get("SECRET_TOKEN");
|
| 29 |
+
|
| 30 |
+
const response = await firstValueFrom(
|
| 31 |
+
this.http.post(
|
| 32 |
+
url,
|
| 33 |
+
{ text },
|
| 34 |
+
{
|
| 35 |
+
headers: {
|
| 36 |
+
Authorization: `Bearer ${token}`,
|
| 37 |
+
},
|
| 38 |
+
},
|
| 39 |
+
),
|
| 40 |
+
);
|
| 41 |
+
|
| 42 |
+
return response.data;
|
| 43 |
+
}
|
| 44 |
+
}
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
**app.module.ts**
|
| 48 |
+
```typescript
|
| 49 |
+
import { Module } from "@nestjs/common";
|
| 50 |
+
import { ConfigModule } from "@nestjs/config";
|
| 51 |
+
import { HttpModule } from "@nestjs/axios";
|
| 52 |
+
import { AppController } from "./app.controller";
|
| 53 |
+
import { FastAPIService } from "./fastapi.service";
|
| 54 |
+
|
| 55 |
+
@Module({
|
| 56 |
+
imports: [ConfigModule.forRoot(), HttpModule],
|
| 57 |
+
controllers: [AppController],
|
| 58 |
+
providers: [FastAPIService],
|
| 59 |
+
})
|
| 60 |
+
export class AppModule {}
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
**app.controller.ts**
|
| 64 |
+
```typescript
|
| 65 |
+
import { Body, Controller, Post, Get } from '@nestjs/common';
|
| 66 |
+
import { FastAPIService } from './fastapi.service';
|
| 67 |
+
|
| 68 |
+
@Controller()
|
| 69 |
+
export class AppController {
|
| 70 |
+
constructor(private readonly fastapiService: FastAPIService) {}
|
| 71 |
+
|
| 72 |
+
@Post('analyze-text')
|
| 73 |
+
async callFastAPI(@Body('text') text: string) {
|
| 74 |
+
return this.fastapiService.analyzeText(text);
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
@Get()
|
| 78 |
+
getHello(): string {
|
| 79 |
+
return 'NestJS is connected to FastAPI';
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
```
|
| 83 |
+
[🔙 Back to Main README](../README.md)
|
docs/security.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Security: Bearer Token Auth
|
| 2 |
+
|
| 3 |
+
All endpoints require authentication via Bearer token:
|
| 4 |
+
|
| 5 |
+
- Set `SECRET_TOKEN` in `.env`
|
| 6 |
+
- Add header: `Authorization: Bearer <SECRET_TOKEN>`
|
| 7 |
+
|
| 8 |
+
Unauthorized requests receive `403 Forbidden`.
|
| 9 |
+
|
| 10 |
+
[🔙 Back to Main README](../README.md)
|
docs/setup.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Setup & Installation
|
| 2 |
+
|
| 3 |
+
## 1. Clone the Repository
|
| 4 |
+
```bash
|
| 5 |
+
git clone https://github.com/cyberalertnepal/aiapi
|
| 6 |
+
cd aiapi
|
| 7 |
+
```
|
| 8 |
+
|
| 9 |
+
## 2. Install Dependencies
|
| 10 |
+
```bash
|
| 11 |
+
pip install -r requirements.txt
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
## 3. Configure Environment
|
| 15 |
+
Create a `.env` file:
|
| 16 |
+
```env
|
| 17 |
+
SECRET_TOKEN=your_secret_token_here
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## 4. Run the API
|
| 21 |
+
```bash
|
| 22 |
+
uvicorn app:app --host 0.0.0.0 --port 8000
|
| 23 |
+
```
|
| 24 |
+
[🔙 Back to Main README](../README.md)
|
docs/status_code.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Error Codes Reference
|
| 2 |
+
|
| 3 |
+
## 🔹 Summary Table
|
| 4 |
+
|
| 5 |
+
| Code | Message | Description |
|
| 6 |
+
| ---- | ----------------------------------------------------- | ------------------------------------------ |
|
| 7 |
+
| 400 | Text must contain at least two words | Input text too short |
|
| 8 |
+
| 400 | Text should be less than 10,000 characters | Input text too long |
|
| 9 |
+
| 404 | The file is empty or only contains whitespace | File has no usable content |
|
| 10 |
+
| 404 | Invalid file type. Only .docx, .pdf, and .txt allowed | Unsupported file format |
|
| 11 |
+
| 403 | Invalid or expired token | Authentication token is invalid or expired |
|
| 12 |
+
| 413 | Text must contain at least two words | Text too short (alternative condition) |
|
| 13 |
+
| 413 | Text must be less than 10,000 characters | Text too long (alternative condition) |
|
| 14 |
+
| 413 | The image error (preprocessing) | Image size/content issue |
|
| 15 |
+
| 500 | Error processing the file | Internal server error while processing |
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
## 🔍 Error Details
|
| 20 |
+
|
| 21 |
+
### `400` - Bad Request
|
| 22 |
+
|
| 23 |
+
- **Text must contain at least two words**
|
| 24 |
+
The input text field is too short. Submit at least two words to proceed.
|
| 25 |
+
|
| 26 |
+
- **Text should be less than 10,000 characters**
|
| 27 |
+
Input text exceeds the maximum allowed character limit. Consider truncating or summarizing the content.
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
### `404` - Not Found
|
| 32 |
+
|
| 33 |
+
- **The file is empty or only contains whitespace**
|
| 34 |
+
The uploaded file is invalid due to lack of meaningful content. Ensure the file has readable, non-empty text.
|
| 35 |
+
|
| 36 |
+
- **Invalid file type. Only .docx, .pdf, and .txt are allowed**
|
| 37 |
+
The file format is not supported. Convert the file to one of the allowed formats before uploading.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
### `403` - Forbidden
|
| 42 |
+
|
| 43 |
+
- **Invalid or expired token**
|
| 44 |
+
Your access token is either expired or incorrect. Try logging in again or refreshing the token.
|
| 45 |
+
|
| 46 |
+
---
|
| 47 |
+
|
| 48 |
+
### `413` - Payload Too Large
|
| 49 |
+
|
| 50 |
+
- **Text must contain at least two words**
|
| 51 |
+
The text payload is too small or malformed under a large upload context. Add more content.
|
| 52 |
+
|
| 53 |
+
- **Text must be less than 10,000 characters**
|
| 54 |
+
The payload exceeds the allowed character limit for a single request. Break it into smaller chunks if needed.
|
| 55 |
+
|
| 56 |
+
- **The image error**
|
| 57 |
+
The uploaded image is too large or corrupted. Try resizing or compressing it before retrying.
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
### `500` - Internal Server Error
|
| 62 |
+
|
| 63 |
+
- **Error processing the file**
|
| 64 |
+
An unexpected server-side failure occurred during file analysis. Retry later or contact support if persistent.
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
> 📌 **Note:** Always validate inputs, check token status, and follow file guidelines before making requests.
|
docs/structure.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## 🏗️ Project Structure
|
| 2 |
+
|
| 3 |
+
```bash
|
| 4 |
+
AI-Checker/
|
| 5 |
+
│
|
| 6 |
+
├── app.py # Main FastAPI entry point
|
| 7 |
+
├── config.py # Configuration settings
|
| 8 |
+
├── Dockerfile # Docker build script
|
| 9 |
+
├── Procfile # Deployment entry for platforms like Heroku/Railway
|
| 10 |
+
├── requirements.txt # Python dependency list
|
| 11 |
+
├── README.md # Main project overview 📘
|
| 12 |
+
│
|
| 13 |
+
├── features/ # Core AI content detection modules
|
| 14 |
+
│ ├── image_classifier/ # Classifies AI vs Real images
|
| 15 |
+
│ │ ├── controller.py
|
| 16 |
+
│ │ ├── model_loader.py
|
| 17 |
+
│ │ └── preprocess.py
|
| 18 |
+
│ ├── image_edit_detector/ # Detects tampered or edited images
|
| 19 |
+
│ ├── nepali_text_classifier/ # Classifies Nepali text as AI or Human
|
| 20 |
+
│ │ ├── controller.py
|
| 21 |
+
│ │ ├── inferencer.py
|
| 22 |
+
│ │ ├── model_loader.py
|
| 23 |
+
│ │ └── preprocess.py
|
| 24 |
+
│ └── text_classifier/ # Classifies English text as AI or Human
|
| 25 |
+
│ ├── controller.py
|
| 26 |
+
│ ├── inferencer.py
|
| 27 |
+
│ ├── model_loader.py
|
| 28 |
+
│ └── preprocess.py
|
| 29 |
+
│
|
| 30 |
+
├── docs/ # Internal documentation and API references
|
| 31 |
+
│ ├── api_endpoints.md
|
| 32 |
+
│ ├── deployment.md
|
| 33 |
+
│ ├── detector/
|
| 34 |
+
│ │ ├── ELA.md
|
| 35 |
+
│ │ ├── fft.md
|
| 36 |
+
│ │ ├── meta.md
|
| 37 |
+
│ │ └── note-for-backend.md
|
| 38 |
+
│ ├── features/
|
| 39 |
+
│ │ ├── image_classifier.md
|
| 40 |
+
│ │ ├── nepali_text_classifier.md
|
| 41 |
+
│ │ └── text_classifier.md
|
| 42 |
+
│ ├── functions.md
|
| 43 |
+
│ ├── nestjs_integration.md
|
| 44 |
+
│ ├── security.md
|
| 45 |
+
│ ├── setup.md
|
| 46 |
+
│ └── structure.md
|
| 47 |
+
│
|
| 48 |
+
├── IMG_Models/ # Stored model weights
|
| 49 |
+
│ └── latest-my_cnn_model.h5
|
| 50 |
+
│
|
| 51 |
+
├── notebooks/ # Experimental/debug Jupyter notebooks
|
| 52 |
+
├── static/ # Static files (e.g., UI assets, test inputs)
|
| 53 |
+
└── test.md # Test usage notes
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
### 🌟 Key Files and Their Roles
|
| 57 |
+
|
| 58 |
+
- **`app.py`**: Entry point initializing FastAPI app and routes.
|
| 59 |
+
- **`Procfile`**: Tells Railway (or similar platforms) how to run the program.
|
| 60 |
+
- **`requirements.txt`**: Tracks all Python dependencies for the project.
|
| 61 |
+
- **`__init__.py`**: Package initializer for the root module and submodules.
|
| 62 |
+
- **`features/text_classifier/`**
|
| 63 |
+
- **`controller.py`**: Handles logic between routes and the model.
|
| 64 |
+
- **`inferencer.py`**: Runs inference and returns predictions as well as file system
|
| 65 |
+
utilities.
|
| 66 |
+
- **`features/NP/`**
|
| 67 |
+
- **`controller.py`**: Handles logic between routes and the model.
|
| 68 |
+
- **`inferencer.py`**: Runs inference and returns predictions as well as file system
|
| 69 |
+
utilities.
|
| 70 |
+
- **`model_loader.py`**: Loads the ML model and tokenizer.
|
| 71 |
+
- **`preprocess.py`**: Prepares input text for the model.
|
| 72 |
+
- **`routes.py`**: Defines API routes for text classification.
|
| 73 |
+
|
| 74 |
+
[🔙 Back to Main README](../README.md)
|
features/ai_human_image_classifier/controller.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import IO
|
| 2 |
+
from preprocessor import preprocessor
|
| 3 |
+
from inferencer import inferencer
|
| 4 |
+
|
| 5 |
+
class ClassificationController:
|
| 6 |
+
"""
|
| 7 |
+
Controller to handle the image classification logic.
|
| 8 |
+
"""
|
| 9 |
+
def classify_image(self, image_file: IO) -> dict:
|
| 10 |
+
"""
|
| 11 |
+
Orchestrates the classification of a single image file.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
image_file (IO): The image file to classify.
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
dict: The classification result.
|
| 18 |
+
"""
|
| 19 |
+
try:
|
| 20 |
+
# Step 1: Preprocess the image
|
| 21 |
+
image_tensor = preprocessor.process(image_file)
|
| 22 |
+
|
| 23 |
+
# Step 2: Perform inference
|
| 24 |
+
result = inferencer.predict(image_tensor)
|
| 25 |
+
|
| 26 |
+
return result
|
| 27 |
+
except ValueError as e:
|
| 28 |
+
# Handle specific errors like invalid images
|
| 29 |
+
return {"error": str(e)}
|
| 30 |
+
except Exception as e:
|
| 31 |
+
# Handle unexpected errors
|
| 32 |
+
print(f"An unexpected error occurred: {e}")
|
| 33 |
+
return {"error": "An internal error occurred during classification."}
|
| 34 |
+
|
| 35 |
+
controller = ClassificationController()
|
features/ai_human_image_classifier/inferencer.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
from model_loader import models
|
| 4 |
+
|
| 5 |
+
class Inferencer:
|
| 6 |
+
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.clip_model = models.clip_model
|
| 9 |
+
self.svm_model = models.svm_model
|
| 10 |
+
|
| 11 |
+
@torch.no_grad()
|
| 12 |
+
def predict(self, image_tensor:torch.Tensor) -> dict:
|
| 13 |
+
"""
|
| 14 |
+
Takes a preprocessed image tensor and returns the classification result.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
image_tensor (torch.Tensor): The preprocessed image tensor.
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
dict: A dictionary containing the classification label and confidence score.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
image_features = self.clip_model.encode_image(image_tensor)
|
| 24 |
+
image_features_np = image_features.cpu().numpy()
|
| 25 |
+
|
| 26 |
+
prediction = self.svm_model.predict(image_features_np)[0]
|
| 27 |
+
|
| 28 |
+
if hasattr(self.svm_model, "predict_proba"):
|
| 29 |
+
# If yes, use predict_proba for a true confidence score
|
| 30 |
+
confidence_scores = self.svm_model.predict_proba(image_features_np)[0]
|
| 31 |
+
confidence = float(np.max(confidence_scores))
|
| 32 |
+
else:
|
| 33 |
+
# If no, use decision_function as a fallback confidence measure.
|
| 34 |
+
# The absolute value of the decision function score indicates confidence.
|
| 35 |
+
# We can apply a sigmoid function to scale it to a [0, 1] range for consistency.
|
| 36 |
+
decision_score = self.svm_model.decision_function(image_features_np)[0]
|
| 37 |
+
confidence = 1 / (1 + np.exp(-np.abs(decision_score)))
|
| 38 |
+
confidence = float(confidence)
|
| 39 |
+
|
| 40 |
+
label_map = {0: 'real', 1: 'fake'}
|
| 41 |
+
classification_label = label_map.get(prediction, "unknown")
|
| 42 |
+
|
| 43 |
+
return {
|
| 44 |
+
"classification": classification_label,
|
| 45 |
+
"confidence": confidence
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
inferencer = Inferencer()
|
features/ai_human_image_classifier/main.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from routes import router as api_router
|
| 3 |
+
|
| 4 |
+
# Initialize the FastAPI app
|
| 5 |
+
app = FastAPI(
|
| 6 |
+
title="Real vs. Fake Image Classification API",
|
| 7 |
+
description="An API to classify images as real or fake using OpenAI's CLIP and an SVM model.",
|
| 8 |
+
version="1.0.0"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
# Include the API router
|
| 12 |
+
# All routes defined in routes.py will be available under the /api prefix
|
| 13 |
+
app.include_router(api_router, prefix="/api", tags=["Classification"])
|
| 14 |
+
|
| 15 |
+
@app.get("/", tags=["Root"])
|
| 16 |
+
async def read_root():
|
| 17 |
+
"""
|
| 18 |
+
A simple root endpoint to confirm the API is running.
|
| 19 |
+
"""
|
| 20 |
+
return {"message": "Welcome to the Image Classification API. Go to /docs for the API documentation."}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# To run this application:
|
| 24 |
+
# 1. Make sure you have all dependencies from requirements.txt installed.
|
| 25 |
+
# 2. Make sure the 'svm_model.joblib' file is in the same directory.
|
| 26 |
+
# 3. Run the following command in your terminal:
|
| 27 |
+
# uvicorn main:app --reload
|
features/ai_human_image_classifier/model_loader.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import clip
|
| 2 |
+
import torch
|
| 3 |
+
import joblib
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from huggingface_hub import hf_hub_download
|
| 6 |
+
|
| 7 |
+
class ModelLoader:
|
| 8 |
+
"""
|
| 9 |
+
A class to load and hold the machine learning models.
|
| 10 |
+
This ensures that models are loaded only once.
|
| 11 |
+
"""
|
| 12 |
+
def __init__(self, clip_model_name: str, svm_repo_id: str, svm_filename: str):
|
| 13 |
+
"""
|
| 14 |
+
Initializes the ModelLoader and loads the models.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
clip_model_name (str): The name of the CLIP model to load (e.g., 'ViT-L/14').
|
| 18 |
+
svm_repo_id (str): The repository ID on Hugging Face (e.g., 'rhnsa/ai_human_image_detector').
|
| 19 |
+
svm_filename (str): The name of the model file in the repository (e.g., 'model.joblib').
|
| 20 |
+
"""
|
| 21 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 22 |
+
print(f"Using device: {self.device}")
|
| 23 |
+
|
| 24 |
+
self.clip_model, self.clip_preprocess = self._load_clip_model(clip_model_name)
|
| 25 |
+
self.svm_model = self._load_svm_model(repo_id=svm_repo_id, filename=svm_filename)
|
| 26 |
+
print("Models loaded successfully.")
|
| 27 |
+
|
| 28 |
+
def _load_clip_model(self, model_name: str):
|
| 29 |
+
"""
|
| 30 |
+
Loads the specified CLIP model and its preprocessor.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
model_name (str): The name of the CLIP model.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
A tuple containing the loaded CLIP model and its preprocess function.
|
| 37 |
+
"""
|
| 38 |
+
try:
|
| 39 |
+
model, preprocess = clip.load(model_name, device=self.device)
|
| 40 |
+
return model, preprocess
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"Error loading CLIP model: {e}")
|
| 43 |
+
raise
|
| 44 |
+
|
| 45 |
+
def _load_svm_model(self, repo_id: str, filename: str):
|
| 46 |
+
"""
|
| 47 |
+
Downloads and loads the SVM model from a Hugging Face Hub repository.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
repo_id (str): The repository ID on Hugging Face.
|
| 51 |
+
filename (str): The name of the model file in the repository.
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
The loaded SVM model object.
|
| 55 |
+
"""
|
| 56 |
+
print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
|
| 57 |
+
try:
|
| 58 |
+
# Download the model file from the Hub. It returns the cached path.
|
| 59 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 60 |
+
print(f"SVM model downloaded to: {model_path}")
|
| 61 |
+
|
| 62 |
+
# Load the model from the downloaded path
|
| 63 |
+
svm_model = joblib.load(model_path)
|
| 64 |
+
return svm_model
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Error downloading or loading SVM model from Hugging Face: {e}")
|
| 67 |
+
raise
|
| 68 |
+
|
| 69 |
+
# --- Global Model Instance ---
|
| 70 |
+
# This creates a single instance of the models that can be imported by other modules.
|
| 71 |
+
CLIP_MODEL_NAME = 'ViT-L/14'
|
| 72 |
+
SVM_REPO_ID = 'rhnsa/ai_human_image_detector'
|
| 73 |
+
SVM_FILENAME = 'svm_model_real.joblib' # The name of your model file in the Hugging Face repo
|
| 74 |
+
|
| 75 |
+
# This instance will be created when the application starts.
|
| 76 |
+
models = ModelLoader(
|
| 77 |
+
clip_model_name=CLIP_MODEL_NAME,
|
| 78 |
+
svm_repo_id=SVM_REPO_ID,
|
| 79 |
+
svm_filename=SVM_FILENAME
|
| 80 |
+
)
|
features/ai_human_image_classifier/preprocessor.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import torch
|
| 3 |
+
from typing import IO
|
| 4 |
+
from model_loader import models
|
| 5 |
+
|
| 6 |
+
class ImagePreprocessor:
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.preprocess = models.clip_preprocess
|
| 10 |
+
self.device = models.device
|
| 11 |
+
|
| 12 |
+
def process(self, image_file: IO) -> torch.Tensor:
|
| 13 |
+
"""
|
| 14 |
+
Opens an image file, preprocesses it, and returns it as a tensor.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
image_file (IO): The image file object (e.g., from a file upload).
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
torch.Tensor: The preprocessed image as a tensor, ready for the model.
|
| 21 |
+
"""
|
| 22 |
+
try:
|
| 23 |
+
# Open the image from the file-like object
|
| 24 |
+
image = Image.open(image_file).convert("RGB")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error opening image: {e}")
|
| 27 |
+
# You might want to raise a custom exception here
|
| 28 |
+
raise ValueError("Invalid or corrupted image file.")
|
| 29 |
+
|
| 30 |
+
# Apply the CLIP preprocessing transformations and move to the correct device
|
| 31 |
+
image_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
|
| 32 |
+
return image_tensor
|
| 33 |
+
|
| 34 |
+
preprocessor = ImagePreprocessor()
|
features/ai_human_image_classifier/routes.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, File, UploadFile, HTTPException, status
|
| 2 |
+
from fastapi.responses import JSONResponse
|
| 3 |
+
from controller import controller
|
| 4 |
+
|
| 5 |
+
from fastapi import Request, Depends
|
| 6 |
+
from fastapi.security import HTTPBearer
|
| 7 |
+
from slowapi import Limiter
|
| 8 |
+
from slowapi.util import get_remote_address
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 13 |
+
security = HTTPBearer()
|
| 14 |
+
# Create an API router
|
| 15 |
+
router = APIRouter()
|
| 16 |
+
|
| 17 |
+
@router.post("/classify", summary="Classify an image as Real or Fake")
|
| 18 |
+
async def classify_image_endpoint(image: UploadFile = File(...)):
|
| 19 |
+
"""
|
| 20 |
+
Accepts an image file and classifies it as 'real' or 'fake'.
|
| 21 |
+
|
| 22 |
+
- **image**: The image file to be classified (e.g., JPEG, PNG).
|
| 23 |
+
|
| 24 |
+
Returns a JSON object with the classification and a confidence score.
|
| 25 |
+
"""
|
| 26 |
+
# Check for a valid image content type
|
| 27 |
+
if not image.content_type.startswith("image/"):
|
| 28 |
+
raise HTTPException(
|
| 29 |
+
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
| 30 |
+
detail="Unsupported file type. Please upload an image (e.g., JPEG, PNG)."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# The controller expects a file-like object, which `image.file` provides
|
| 34 |
+
result = controller.classify_image(image.file)
|
| 35 |
+
|
| 36 |
+
if "error" in result:
|
| 37 |
+
# If the controller returned an error, forward it as an HTTP exception
|
| 38 |
+
raise HTTPException(
|
| 39 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 40 |
+
detail=result["error"]
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
return JSONResponse(content=result, status_code=status.HTTP_200_OK)
|
| 44 |
+
|
features/image_classifier/__init__.py
ADDED
|
File without changes
|
features/image_classifier/controller.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import HTTPException, File, UploadFile
|
| 2 |
+
from .preprocess import preprocess_image
|
| 3 |
+
from .inferencer import classify_image
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
async def Classify_Image_router(file: UploadFile = File(...)):
|
| 7 |
+
try:
|
| 8 |
+
image_array = preprocess_image(file)
|
| 9 |
+
try:
|
| 10 |
+
result = classify_image(image_array)
|
| 11 |
+
return result
|
| 12 |
+
except:
|
| 13 |
+
raise HTTPException(status_code=423, detail="something went wrong")
|
| 14 |
+
|
| 15 |
+
except Exception as e:
|
| 16 |
+
raise HTTPException(status_code=413, detail=str(e))
|
features/image_classifier/inferencer.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from .model_loader import get_model
|
| 3 |
+
|
| 4 |
+
# Thresholds
|
| 5 |
+
AI_THRESHOLD = 0.55
|
| 6 |
+
HUMAN_THRESHOLD = 0.45
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def classify_image(image_array: np.ndarray) -> dict:
|
| 10 |
+
try:
|
| 11 |
+
model = get_model()
|
| 12 |
+
predictions = model.predict(image_array)
|
| 13 |
+
|
| 14 |
+
if predictions.ndim != 2 or predictions.shape[1] != 1:
|
| 15 |
+
raise ValueError(
|
| 16 |
+
"Model output shape is invalid. Expected shape: (batch, 1)"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
ai_conf = float(np.clip(predictions[0][0], 0.0, 1.0))
|
| 20 |
+
human_conf = 1.0 - ai_conf
|
| 21 |
+
|
| 22 |
+
# Classification logic
|
| 23 |
+
if ai_conf > AI_THRESHOLD:
|
| 24 |
+
label = "AI Generated"
|
| 25 |
+
elif ai_conf < HUMAN_THRESHOLD:
|
| 26 |
+
label = "Human Generated"
|
| 27 |
+
else:
|
| 28 |
+
label = "Uncertain (Maybe AI)"
|
| 29 |
+
|
| 30 |
+
return {
|
| 31 |
+
"label": label,
|
| 32 |
+
"ai_confidence": round(ai_conf * 100, 2),
|
| 33 |
+
"human_confidence": round(human_conf * 100, 2),
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return {
|
| 38 |
+
"error": str(e),
|
| 39 |
+
"label": "Classification Failed",
|
| 40 |
+
"ai_confidence": None,
|
| 41 |
+
"human_confidence": None,
|
| 42 |
+
}
|
features/image_classifier/model_loader.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import logging
|
| 4 |
+
import tensorflow as tf
|
| 5 |
+
from tensorflow.keras.layers import Layer
|
| 6 |
+
from huggingface_hub import snapshot_download
|
| 7 |
+
|
| 8 |
+
# Model config
|
| 9 |
+
REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
|
| 10 |
+
MODEL_DIR = "./IMG_Models"
|
| 11 |
+
WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
|
| 12 |
+
|
| 13 |
+
# Device info (for logging)
|
| 14 |
+
gpus = tf.config.list_physical_devices("GPU")
|
| 15 |
+
device = "cuda" if gpus else "cpu"
|
| 16 |
+
|
| 17 |
+
# Global model reference
|
| 18 |
+
_model_img = None
|
| 19 |
+
|
| 20 |
+
# Custom layer used in the model
|
| 21 |
+
class Cast(Layer):
|
| 22 |
+
def call(self, inputs):
|
| 23 |
+
return tf.cast(inputs, tf.float32)
|
| 24 |
+
|
| 25 |
+
def warmup():
|
| 26 |
+
global _model_img
|
| 27 |
+
download_model_repo()
|
| 28 |
+
_model_img = load_model()
|
| 29 |
+
logging.info("Image model is ready.")
|
| 30 |
+
|
| 31 |
+
def download_model_repo():
|
| 32 |
+
if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
|
| 33 |
+
logging.info("Image model already exists, skipping download.")
|
| 34 |
+
return
|
| 35 |
+
snapshot_path = snapshot_download(repo_id=REPO_ID)
|
| 36 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 37 |
+
shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
|
| 38 |
+
|
| 39 |
+
def load_model():
|
| 40 |
+
global _model_img
|
| 41 |
+
if _model_img is not None:
|
| 42 |
+
return _model_img
|
| 43 |
+
|
| 44 |
+
print(f"{'GPU detected' if device == 'cuda' else 'No GPU detected'}, loading model on {device.upper()}.")
|
| 45 |
+
|
| 46 |
+
_model_img = tf.keras.models.load_model(
|
| 47 |
+
WEIGHTS_PATH, custom_objects={"Cast": Cast}
|
| 48 |
+
)
|
| 49 |
+
print("Model input shape:", _model_img.input_shape)
|
| 50 |
+
return _model_img
|
| 51 |
+
|
| 52 |
+
def get_model():
|
| 53 |
+
global _model_img
|
| 54 |
+
if _model_img is None:
|
| 55 |
+
download_model_repo()
|
| 56 |
+
_model_img = load_model()
|
| 57 |
+
return _model_img
|
| 58 |
+
|
features/image_classifier/preprocess.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
from fastapi import HTTPException
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def preprocess_image(file):
|
| 7 |
+
try:
|
| 8 |
+
file.file.seek(0)
|
| 9 |
+
image_bytes = file.file.read()
|
| 10 |
+
nparr = np.frombuffer(image_bytes, np.uint8)
|
| 11 |
+
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 12 |
+
if img is None:
|
| 13 |
+
raise HTTPException(status_code=500, detail="Could not decode image.")
|
| 14 |
+
|
| 15 |
+
img = cv2.resize(img, (299, 299))
|
| 16 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 17 |
+
img = img / 255.0
|
| 18 |
+
img = np.expand_dims(img, axis=0).astype(np.float32)
|
| 19 |
+
return img
|
| 20 |
+
|
| 21 |
+
except HTTPException:
|
| 22 |
+
raise # Re-raise already defined HTTP errors
|
| 23 |
+
except Exception as e:
|
| 24 |
+
raise HTTPException(
|
| 25 |
+
status_code=500, detail=f"Image preprocessing failed: {str(e)}"
|
| 26 |
+
)
|
features/image_classifier/routes.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from slowapi import Limiter
|
| 2 |
+
from config import ACCESS_RATE
|
| 3 |
+
from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
|
| 4 |
+
from fastapi.security import HTTPBearer
|
| 5 |
+
from slowapi import Limiter
|
| 6 |
+
from slowapi.util import get_remote_address
|
| 7 |
+
from .controller import Classify_Image_router
|
| 8 |
+
router = APIRouter()
|
| 9 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 10 |
+
security = HTTPBearer()
|
| 11 |
+
|
| 12 |
+
@router.post("/analyse")
|
| 13 |
+
@limiter.limit(ACCESS_RATE)
|
| 14 |
+
async def analyse(
|
| 15 |
+
request: Request,
|
| 16 |
+
file: UploadFile = File(...),
|
| 17 |
+
token: str = Depends(security)
|
| 18 |
+
):
|
| 19 |
+
result = await Classify_Image_router(file) # await the async function
|
| 20 |
+
return result
|
| 21 |
+
|
| 22 |
+
@router.get("/health")
|
| 23 |
+
@limiter.limit(ACCESS_RATE)
|
| 24 |
+
def health(request: Request):
|
| 25 |
+
return {"status": "ok"}
|
| 26 |
+
|
features/image_edit_detector/controller.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import io
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
from .detectors.fft import run_fft
|
| 5 |
+
from .detectors.metadata import run_metadata
|
| 6 |
+
from .detectors.ela import run_ela
|
| 7 |
+
from .preprocess import preprocess_image
|
| 8 |
+
from fastapi import HTTPException,status,Depends
|
| 9 |
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 10 |
+
security=HTTPBearer()
|
| 11 |
+
import os
|
| 12 |
+
async def process_image_ela(image_bytes: bytes, quality: int=90):
|
| 13 |
+
image = Image.open(io.BytesIO(image_bytes))
|
| 14 |
+
|
| 15 |
+
if image.mode != "RGB":
|
| 16 |
+
image = image.convert("RGB")
|
| 17 |
+
|
| 18 |
+
compressed_image = preprocess_image(image, quality)
|
| 19 |
+
ela_result = run_ela(compressed_image, quality)
|
| 20 |
+
|
| 21 |
+
return {
|
| 22 |
+
"is_edited": ela_result,
|
| 23 |
+
"ela_score": ela_result
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
async def process_fft_image(image_bytes: bytes,threshold:float=0.95) -> dict:
|
| 27 |
+
image = Image.open(BytesIO(image_bytes)).convert("RGB")
|
| 28 |
+
result = run_fft(image,threshold)
|
| 29 |
+
return {"edited": bool(result)}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def process_meta_image(image_bytes: bytes) -> dict:
|
| 33 |
+
try:
|
| 34 |
+
result = run_metadata(image_bytes)
|
| 35 |
+
return {"source": result} # e.g. "edited", "phone_capture", "unknown"
|
| 36 |
+
except Exception as e:
|
| 37 |
+
# Handle errors gracefully, return useful message or raise HTTPException if preferred
|
| 38 |
+
return {"error": str(e)}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 42 |
+
token = credentials.credentials
|
| 43 |
+
expected_token = os.getenv("MY_SECRET_TOKEN")
|
| 44 |
+
if token != expected_token:
|
| 45 |
+
raise HTTPException(
|
| 46 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 47 |
+
detail="Invalid or expired token"
|
| 48 |
+
)
|
| 49 |
+
return token
|
features/image_edit_detector/detectors/ela.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image, ImageChops, ImageEnhance
|
| 2 |
+
import io
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool:
|
| 6 |
+
"""
|
| 7 |
+
Perform Error Level Analysis to detect image manipulation.
|
| 8 |
+
|
| 9 |
+
Parameters:
|
| 10 |
+
image (PIL.Image): Input image (should be RGB).
|
| 11 |
+
quality (int): JPEG compression quality for ELA.
|
| 12 |
+
threshold (int): Maximum pixel difference threshold to classify as edited.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
bool: True if image appears edited, False otherwise.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
# Recompress the image into JPEG format in memory
|
| 19 |
+
buffer = io.BytesIO()
|
| 20 |
+
image.save(buffer, format="JPEG", quality=quality)
|
| 21 |
+
buffer.seek(0)
|
| 22 |
+
recompressed = Image.open(buffer)
|
| 23 |
+
|
| 24 |
+
# Compute the pixel-wise difference
|
| 25 |
+
diff = ImageChops.difference(image, recompressed)
|
| 26 |
+
extrema = diff.getextrema()
|
| 27 |
+
max_diff = max([ex[1] for ex in extrema])
|
| 28 |
+
|
| 29 |
+
# Enhance difference image for debug (not returned)
|
| 30 |
+
_ = ImageEnhance.Brightness(diff).enhance(10)
|
| 31 |
+
|
| 32 |
+
return max_diff > threshold
|
features/image_edit_detector/detectors/fft.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from PIL import Image
|
| 3 |
+
from scipy.fft import fft2, fftshift
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def run_fft(image: Image.Image, threshold: float = 0.92) -> bool:
|
| 7 |
+
"""
|
| 8 |
+
Detects potential image manipulation or generation using FFT-based high-frequency analysis.
|
| 9 |
+
|
| 10 |
+
Parameters:
|
| 11 |
+
image (PIL.Image.Image): The input image.
|
| 12 |
+
threshold (float): Proportion of high-frequency components above which the image is flagged.
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
bool: True if the image is likely AI-generated or manipulated, False otherwise.
|
| 16 |
+
"""
|
| 17 |
+
gray_image = image.convert("L")
|
| 18 |
+
|
| 19 |
+
resized_image = gray_image.resize((512, 512))
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
image_array = np.array(resized_image)
|
| 23 |
+
|
| 24 |
+
fft_result = fft2(image_array)
|
| 25 |
+
|
| 26 |
+
fft_shifted = fftshift(fft_result)
|
| 27 |
+
|
| 28 |
+
magnitude_spectrum = np.abs(fft_shifted)
|
| 29 |
+
max_magnitude = np.max(magnitude_spectrum)
|
| 30 |
+
if max_magnitude == 0:
|
| 31 |
+
return False # Avoid division by zero if image is blank
|
| 32 |
+
normalized_spectrum = magnitude_spectrum / max_magnitude
|
| 33 |
+
|
| 34 |
+
high_freq_mask = normalized_spectrum > 0.5
|
| 35 |
+
|
| 36 |
+
high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size
|
| 37 |
+
|
| 38 |
+
is_fake = high_freq_ratio > threshold
|
| 39 |
+
return is_fake
|
| 40 |
+
|
features/image_edit_detector/detectors/metadata.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image, UnidentifiedImageError
|
| 2 |
+
import io
|
| 3 |
+
|
| 4 |
+
# Common AI metadata identifiers in image files.
|
| 5 |
+
AI_INDICATORS = [
|
| 6 |
+
b'c2pa', b'claim_generator', b'claim_generator_info',
|
| 7 |
+
b'created_software_agent', b'actions.v2', b'assertions',
|
| 8 |
+
b'urn:c2pa', b'jumd', b'jumb', b'jumdcbor', b'jumdc2ma',
|
| 9 |
+
b'jumdc2as', b'jumdc2cl', b'cbor', b'convertedsfwareagent',b'c2pa.version',
|
| 10 |
+
b'c2pa.assertions', b'c2pa.actions',
|
| 11 |
+
b'c2pa.thumbnail', b'c2pa.signature', b'c2pa.manifest',
|
| 12 |
+
b'c2pa.manifest_store', b'c2pa.ingredient', b'c2pa.parent',
|
| 13 |
+
b'c2pa.provenance', b'c2pa.claim', b'c2pa.hash', b'c2pa.authority',
|
| 14 |
+
b'jumdc2pn', b'jumdrefs', b'jumdver', b'jumdmeta',
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
'midjourney'.encode('utf-8'),
|
| 18 |
+
'stable-diffusion'.encode('utf-8'),
|
| 19 |
+
'stable diffusion'.encode('utf-8'),
|
| 20 |
+
'stable_diffusion'.encode('utf-8'),
|
| 21 |
+
'artbreeder'.encode('utf-8'),
|
| 22 |
+
'runwayml'.encode('utf-8'),
|
| 23 |
+
'remix.ai'.encode('utf-8'),
|
| 24 |
+
'firefly'.encode('utf-8'),
|
| 25 |
+
'adobe_firefly'.encode('utf-8'),
|
| 26 |
+
|
| 27 |
+
# OpenAI / DALL·E indicators (all encoded to bytes)
|
| 28 |
+
'openai'.encode('utf-8'),
|
| 29 |
+
'dalle'.encode('utf-8'),
|
| 30 |
+
'dalle2'.encode('utf-8'),
|
| 31 |
+
'DALL-E'.encode('utf-8'),
|
| 32 |
+
'DALL·E'.encode('utf-8'),
|
| 33 |
+
'created_by: openai'.encode('utf-8'),
|
| 34 |
+
'tool: dalle'.encode('utf-8'),
|
| 35 |
+
'tool: dalle2'.encode('utf-8'),
|
| 36 |
+
'creator: openai'.encode('utf-8'),
|
| 37 |
+
'creator: dalle'.encode('utf-8'),
|
| 38 |
+
'openai.com'.encode('utf-8'),
|
| 39 |
+
'api.openai.com'.encode('utf-8'),
|
| 40 |
+
'openai_model'.encode('utf-8'),
|
| 41 |
+
'openai_gpt'.encode('utf-8'),
|
| 42 |
+
|
| 43 |
+
#Further possible AI-Generation Indicators
|
| 44 |
+
'generated_by'.encode('utf-8'),
|
| 45 |
+
'model_id'.encode('utf-8'),
|
| 46 |
+
'model_version'.encode('utf-8'),
|
| 47 |
+
'model_info'.encode('utf-8'),
|
| 48 |
+
'tool_name'.encode('utf-8'),
|
| 49 |
+
'tool_creator'.encode('utf-8'),
|
| 50 |
+
'tool_version'.encode('utf-8'),
|
| 51 |
+
'model_signature'.encode('utf-8'),
|
| 52 |
+
'ai_model'.encode('utf-8'),
|
| 53 |
+
'ai_tool'.encode('utf-8'),
|
| 54 |
+
'generator'.encode('utf-8'),
|
| 55 |
+
'generated_by_ai'.encode('utf-8'),
|
| 56 |
+
'ai_generated'.encode('utf-8'),
|
| 57 |
+
'ai_art'.encode('utf-8')
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def run_metadata(image_bytes: bytes) -> str:
|
| 62 |
+
try:
|
| 63 |
+
img = Image.open(io.BytesIO(image_bytes))
|
| 64 |
+
img.load()
|
| 65 |
+
|
| 66 |
+
exif = img.getexif()
|
| 67 |
+
software = str(exif.get(305, "")).strip()
|
| 68 |
+
|
| 69 |
+
suspicious_editors = ["Photoshop", "GIMP", "Snapseed", "Pixlr", "VSCO", "Editor", "Adobe", "Luminar"]
|
| 70 |
+
|
| 71 |
+
if any(editor.lower() in software.lower() for editor in suspicious_editors):
|
| 72 |
+
return "edited"
|
| 73 |
+
|
| 74 |
+
if any(indicator in image_bytes for indicator in AI_INDICATORS):
|
| 75 |
+
return "ai_generated"
|
| 76 |
+
|
| 77 |
+
return "undetermined"
|
| 78 |
+
|
| 79 |
+
except UnidentifiedImageError:
|
| 80 |
+
return "error: invalid image format"
|
| 81 |
+
except Exception as e:
|
| 82 |
+
return f"error: {str(e)}"
|
features/image_edit_detector/preprocess.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import io
|
| 3 |
+
|
| 4 |
+
def preprocess_image(img: Image.Image, quality: int) -> Image.Image:
|
| 5 |
+
buffer = io.BytesIO()
|
| 6 |
+
img.save(buffer, format="JPEG", quality=quality)
|
| 7 |
+
buffer.seek(0)
|
| 8 |
+
return Image.open(buffer)
|
| 9 |
+
|
features/image_edit_detector/routes.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from slowapi import Limiter
|
| 2 |
+
from config import ACCESS_RATE
|
| 3 |
+
from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
|
| 4 |
+
from fastapi.security import HTTPBearer
|
| 5 |
+
from slowapi import Limiter
|
| 6 |
+
from slowapi.util import get_remote_address
|
| 7 |
+
from io import BytesIO
|
| 8 |
+
from .controller import process_image_ela , verify_token,process_fft_image, process_meta_image
|
| 9 |
+
import requests
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 12 |
+
security = HTTPBearer()
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@router.post("/ela")
|
| 17 |
+
@limiter.limit(ACCESS_RATE)
|
| 18 |
+
async def detect_ela(request:Request,file: UploadFile = File(...), quality: int = 90 ,token: str = Depends(verify_token)):
|
| 19 |
+
# Check file extension
|
| 20 |
+
allowed_types = ["image/jpeg", "image/png"]
|
| 21 |
+
|
| 22 |
+
if file.content_type not in allowed_types:
|
| 23 |
+
raise HTTPException(
|
| 24 |
+
status_code=400,
|
| 25 |
+
detail="Unsupported file type. Only JPEG and PNG images are allowed."
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
content = await file.read()
|
| 29 |
+
result = await process_image_ela(content, quality)
|
| 30 |
+
return result
|
| 31 |
+
|
| 32 |
+
@router.post("/fft")
|
| 33 |
+
@limiter.limit(ACCESS_RATE)
|
| 34 |
+
async def detect_fft(request:Request,file:UploadFile =File(...),threshold:float=0.95,token:str=Depends(verify_token)):
|
| 35 |
+
if file.content_type not in ["image/jpeg", "image/png"]:
|
| 36 |
+
raise HTTPException(status_code=400, detail="Unsupported image type.")
|
| 37 |
+
|
| 38 |
+
content = await file.read()
|
| 39 |
+
result = await process_fft_image(content,threshold)
|
| 40 |
+
return result
|
| 41 |
+
|
| 42 |
+
@router.post("/meta")
|
| 43 |
+
@limiter.limit(ACCESS_RATE)
|
| 44 |
+
async def detect_meta(request:Request,file:UploadFile=File(...),token:str=Depends(verify_token)):
|
| 45 |
+
if file.content_type not in ["image/jpeg", "image/png"]:
|
| 46 |
+
raise HTTPException(status_code=400, detail="Unsupported image type.")
|
| 47 |
+
content = await file.read()
|
| 48 |
+
result = await process_meta_image(content)
|
| 49 |
+
return result
|
| 50 |
+
@router.post("/health")
|
| 51 |
+
@limiter.limit(ACCESS_RATE)
|
| 52 |
+
def heath(request:Request):
|
| 53 |
+
return {"status":"ok"}
|
features/nepali_text_classifier/__init__.py
ADDED
|
File without changes
|
features/nepali_text_classifier/controller.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from fastapi import HTTPException, UploadFile, status, Depends
|
| 4 |
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 5 |
+
import os
|
| 6 |
+
from features.nepali_text_classifier.inferencer import classify_text
|
| 7 |
+
from features.nepali_text_classifier.preprocess import *
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
security = HTTPBearer()
|
| 11 |
+
|
| 12 |
+
def contains_english(text: str) -> bool:
|
| 13 |
+
# Remove escape characters
|
| 14 |
+
cleaned = text.replace("\n", "").replace("\t", "")
|
| 15 |
+
return bool(re.search(r'[a-zA-Z]', cleaned))
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
| 19 |
+
token = credentials.credentials
|
| 20 |
+
expected_token = os.getenv("MY_SECRET_TOKEN")
|
| 21 |
+
if token != expected_token:
|
| 22 |
+
raise HTTPException(
|
| 23 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 24 |
+
detail="Invalid or expired token"
|
| 25 |
+
)
|
| 26 |
+
return token
|
| 27 |
+
|
| 28 |
+
async def nepali_text_analysis(text: str):
|
| 29 |
+
end_symbol_for_NP_text(text)
|
| 30 |
+
words = text.split()
|
| 31 |
+
if len(words) < 10:
|
| 32 |
+
raise HTTPException(status_code=400, detail="Text must contain at least 10 words")
|
| 33 |
+
if len(text) > 10000:
|
| 34 |
+
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
|
| 35 |
+
|
| 36 |
+
result = await asyncio.to_thread(classify_text, text)
|
| 37 |
+
|
| 38 |
+
return result
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
#Extract text form uploaded files(.docx,.pdf,.txt)
|
| 42 |
+
async def extract_file_contents(file:UploadFile)-> str:
|
| 43 |
+
content = await file.read()
|
| 44 |
+
file_stream = BytesIO(content)
|
| 45 |
+
if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
| 46 |
+
return parse_docx(file_stream)
|
| 47 |
+
elif file.content_type =="application/pdf":
|
| 48 |
+
return parse_pdf(file_stream)
|
| 49 |
+
elif file.content_type =="text/plain":
|
| 50 |
+
return parse_txt(file_stream)
|
| 51 |
+
else:
|
| 52 |
+
raise HTTPException(status_code=415,detail="Invalid file type. Only .docx,.pdf and .txt are allowed")
|
| 53 |
+
|
| 54 |
+
async def handle_file_upload(file: UploadFile):
|
| 55 |
+
try:
|
| 56 |
+
file_contents = await extract_file_contents(file)
|
| 57 |
+
end_symbol_for_NP_text(file_contents)
|
| 58 |
+
if len(file_contents) > 10000:
|
| 59 |
+
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
|
| 60 |
+
|
| 61 |
+
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
|
| 62 |
+
if not cleaned_text:
|
| 63 |
+
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
|
| 64 |
+
|
| 65 |
+
result = await asyncio.to_thread(classify_text, cleaned_text)
|
| 66 |
+
return result
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logging.error(f"Error processing file: {e}")
|
| 69 |
+
raise HTTPException(status_code=500, detail="Error processing the file")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
async def handle_sentence_level_analysis(text: str):
|
| 74 |
+
text = text.strip()
|
| 75 |
+
if len(text) > 10000:
|
| 76 |
+
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
|
| 77 |
+
|
| 78 |
+
end_symbol_for_NP_text(text)
|
| 79 |
+
|
| 80 |
+
# Split text into sentences
|
| 81 |
+
sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]
|
| 82 |
+
|
| 83 |
+
results = []
|
| 84 |
+
for sentence in sentences:
|
| 85 |
+
end_symbol_for_NP_text(sentence)
|
| 86 |
+
result = await asyncio.to_thread(classify_text, sentence)
|
| 87 |
+
results.append({
|
| 88 |
+
"text": sentence,
|
| 89 |
+
"result": result["label"],
|
| 90 |
+
"likelihood": result["confidence"]
|
| 91 |
+
})
|
| 92 |
+
|
| 93 |
+
return {"analysis": results}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
async def handle_file_sentence(file:UploadFile):
|
| 97 |
+
try:
|
| 98 |
+
file_contents = await extract_file_contents(file)
|
| 99 |
+
if len(file_contents) > 10000:
|
| 100 |
+
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
|
| 101 |
+
|
| 102 |
+
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
|
| 103 |
+
if not cleaned_text:
|
| 104 |
+
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
|
| 105 |
+
# Ensure text ends with danda so last sentence is included
|
| 106 |
+
|
| 107 |
+
# Split text into sentences
|
| 108 |
+
sentences = [s.strip() + "।" for s in cleaned_text.split("।") if s.strip()]
|
| 109 |
+
|
| 110 |
+
results = []
|
| 111 |
+
for sentence in sentences:
|
| 112 |
+
end_symbol_for_NP_text(sentence)
|
| 113 |
+
|
| 114 |
+
result = await asyncio.to_thread(classify_text, sentence)
|
| 115 |
+
results.append({
|
| 116 |
+
"text": sentence,
|
| 117 |
+
"result": result["label"],
|
| 118 |
+
"likelihood": result["confidence"]
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
return {"analysis": results}
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logging.error(f"Error processing file: {e}")
|
| 125 |
+
raise HTTPException(status_code=500, detail="Error processing the file")
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def classify(text: str):
|
| 129 |
+
return classify_text(text)
|
| 130 |
+
|
features/nepali_text_classifier/inferencer.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from .model_loader import get_model_tokenizer
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def classify_text(text: str):
|
| 9 |
+
model, tokenizer = get_model_tokenizer()
|
| 10 |
+
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
|
| 11 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 12 |
+
|
| 13 |
+
with torch.no_grad():
|
| 14 |
+
outputs = model(**inputs)
|
| 15 |
+
logits = outputs if isinstance(outputs, torch.Tensor) else outputs.logits
|
| 16 |
+
probs = F.softmax(logits, dim=1)
|
| 17 |
+
pred = torch.argmax(probs, dim=1).item()
|
| 18 |
+
prob_percent = probs[0][pred].item() * 100
|
| 19 |
+
|
| 20 |
+
return {"label": "Human" if pred == 0 else "AI", "confidence": round(prob_percent, 2)}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
features/nepali_text_classifier/model_loader.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
import logging
|
| 7 |
+
from huggingface_hub import snapshot_download
|
| 8 |
+
from transformers import AutoTokenizer, AutoModel
|
| 9 |
+
|
| 10 |
+
# Configs
|
| 11 |
+
REPO_ID = "can-org/Nepali-AI-VS-HUMAN"
|
| 12 |
+
BASE_DIR = "./np_text_model"
|
| 13 |
+
TOKENIZER_DIR = os.path.join(BASE_DIR, "classifier") # <- update this to match your uploaded folder
|
| 14 |
+
WEIGHTS_PATH = os.path.join(BASE_DIR, "model_95_acc.pth") # <- change to match actual uploaded weight
|
| 15 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
+
|
| 17 |
+
# Define model class
|
| 18 |
+
class XLMRClassifier(nn.Module):
|
| 19 |
+
def __init__(self):
|
| 20 |
+
super(XLMRClassifier, self).__init__()
|
| 21 |
+
self.bert = AutoModel.from_pretrained("xlm-roberta-base")
|
| 22 |
+
self.classifier = nn.Linear(self.bert.config.hidden_size, 2)
|
| 23 |
+
|
| 24 |
+
def forward(self, input_ids, attention_mask):
|
| 25 |
+
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
|
| 26 |
+
cls_output = outputs.last_hidden_state[:, 0, :]
|
| 27 |
+
return self.classifier(cls_output)
|
| 28 |
+
|
| 29 |
+
# Globals for caching
|
| 30 |
+
_model = None
|
| 31 |
+
_tokenizer = None
|
| 32 |
+
|
| 33 |
+
def download_model_repo():
|
| 34 |
+
if os.path.exists(BASE_DIR) and os.path.isdir(BASE_DIR):
|
| 35 |
+
logging.info("Model already downloaded.")
|
| 36 |
+
return
|
| 37 |
+
snapshot_path = snapshot_download(repo_id=REPO_ID)
|
| 38 |
+
os.makedirs(BASE_DIR, exist_ok=True)
|
| 39 |
+
shutil.copytree(snapshot_path, BASE_DIR, dirs_exist_ok=True)
|
| 40 |
+
|
| 41 |
+
def load_model():
|
| 42 |
+
download_model_repo()
|
| 43 |
+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
|
| 44 |
+
model = XLMRClassifier().to(device)
|
| 45 |
+
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
|
| 46 |
+
model.eval()
|
| 47 |
+
return model, tokenizer
|
| 48 |
+
|
| 49 |
+
def get_model_tokenizer():
|
| 50 |
+
global _model, _tokenizer
|
| 51 |
+
if _model is None or _tokenizer is None:
|
| 52 |
+
_model, _tokenizer = load_model()
|
| 53 |
+
return _model, _tokenizer
|
| 54 |
+
|
features/nepali_text_classifier/preprocess.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import fitz # PyMuPDF
|
| 2 |
+
import docx
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
import logging
|
| 5 |
+
from fastapi import HTTPException
|
| 6 |
+
from pypdf import PdfReader
|
| 7 |
+
|
| 8 |
+
def parse_docx(file: BytesIO):
|
| 9 |
+
doc = docx.Document(file)
|
| 10 |
+
text = ""
|
| 11 |
+
for para in doc.paragraphs:
|
| 12 |
+
text += para.text + "\n"
|
| 13 |
+
return text
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def parse_pdf(file: BytesIO):
|
| 17 |
+
try:
|
| 18 |
+
doc = PdfReader(file)
|
| 19 |
+
text = ""
|
| 20 |
+
for page in doc.pages:
|
| 21 |
+
text += page.extract_text()
|
| 22 |
+
return text
|
| 23 |
+
except Exception as e:
|
| 24 |
+
logging.error(f"Error while processing PDF: {str(e)}")
|
| 25 |
+
raise HTTPException(
|
| 26 |
+
status_code=500, detail="Error processing PDF file")
|
| 27 |
+
|
| 28 |
+
def parse_txt(file: BytesIO):
|
| 29 |
+
return file.read().decode("utf-8")
|
| 30 |
+
|
| 31 |
+
def end_symbol_for_NP_text(text: str) -> str:
|
| 32 |
+
text = text.strip()
|
| 33 |
+
if not text.endswith("।"):
|
| 34 |
+
text += "।"
|
| 35 |
+
return text
|
features/nepali_text_classifier/routes.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from slowapi import Limiter
|
| 2 |
+
from config import ACCESS_RATE
|
| 3 |
+
from .controller import handle_file_sentence, handle_sentence_level_analysis, nepali_text_analysis
|
| 4 |
+
from .inferencer import classify_text
|
| 5 |
+
from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
|
| 6 |
+
from fastapi.security import HTTPBearer
|
| 7 |
+
from slowapi import Limiter
|
| 8 |
+
from slowapi.util import get_remote_address
|
| 9 |
+
from pydantic import BaseModel
|
| 10 |
+
from .controller import handle_file_upload
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 13 |
+
security = HTTPBearer()
|
| 14 |
+
|
| 15 |
+
# Input schema
|
| 16 |
+
class TextInput(BaseModel):
|
| 17 |
+
text: str
|
| 18 |
+
|
| 19 |
+
@router.post("/analyse")
|
| 20 |
+
@limiter.limit(ACCESS_RATE)
|
| 21 |
+
async def analyse(request: Request, data: TextInput, token: str = Depends(security)):
|
| 22 |
+
result = classify_text(data.text)
|
| 23 |
+
return result
|
| 24 |
+
|
| 25 |
+
@router.post("/upload")
|
| 26 |
+
@limiter.limit(ACCESS_RATE)
|
| 27 |
+
async def upload_file(request:Request,file:UploadFile=File(...),token:str=Depends(security)):
|
| 28 |
+
return await handle_file_upload(file)
|
| 29 |
+
|
| 30 |
+
@router.post("/analyse-sentences")
|
| 31 |
+
@limiter.limit(ACCESS_RATE)
|
| 32 |
+
async def upload_file(request:Request,data:TextInput,token:str=Depends(security)):
|
| 33 |
+
return await handle_sentence_level_analysis(data.text)
|
| 34 |
+
|
| 35 |
+
@router.post("/file-sentences-analyse")
|
| 36 |
+
@limiter.limit(ACCESS_RATE)
|
| 37 |
+
async def analyze_sentance_file(request: Request, file: UploadFile = File(...), token: str = Depends(security)):
|
| 38 |
+
return await handle_file_sentence(file)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@router.get("/health")
|
| 42 |
+
@limiter.limit(ACCESS_RATE)
|
| 43 |
+
def health(request: Request):
|
| 44 |
+
return {"status": "ok"}
|
| 45 |
+
|
features/rag_chatbot/__init__.py
ADDED
|
File without changes
|