Pujan-Dev commited on
Commit
538bdae
·
2 Parent(s): 7fd374c6396e6b

Merge branch 'Testing'

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env-example +34 -0
  2. .gitignore +63 -166
  3. Dockerfile +26 -0
  4. Procfile +1 -0
  5. README.md +22 -1
  6. READMEs.md +152 -0
  7. __init__.py +0 -0
  8. app.py +62 -0
  9. config.py +2 -0
  10. docs/api_endpoints.md +92 -0
  11. docs/deployment.md +108 -0
  12. docs/detector/ELA.md +65 -0
  13. docs/detector/ai_human_image_checker.md +132 -0
  14. docs/detector/fft.md +136 -0
  15. docs/detector/meta.md +20 -0
  16. docs/detector/note-for-backend.md +94 -0
  17. docs/features/image_classifier.md +31 -0
  18. docs/features/nepali_text_classifier.md +30 -0
  19. docs/features/text_classifier.md +30 -0
  20. docs/functions.md +62 -0
  21. docs/nestjs_integration.md +83 -0
  22. docs/security.md +10 -0
  23. docs/setup.md +24 -0
  24. docs/status_code.md +68 -0
  25. docs/structure.md +74 -0
  26. features/ai_human_image_classifier/controller.py +35 -0
  27. features/ai_human_image_classifier/inferencer.py +48 -0
  28. features/ai_human_image_classifier/main.py +27 -0
  29. features/ai_human_image_classifier/model_loader.py +80 -0
  30. features/ai_human_image_classifier/preprocessor.py +34 -0
  31. features/ai_human_image_classifier/routes.py +44 -0
  32. features/image_classifier/__init__.py +0 -0
  33. features/image_classifier/controller.py +16 -0
  34. features/image_classifier/inferencer.py +42 -0
  35. features/image_classifier/model_loader.py +58 -0
  36. features/image_classifier/preprocess.py +26 -0
  37. features/image_classifier/routes.py +26 -0
  38. features/image_edit_detector/controller.py +49 -0
  39. features/image_edit_detector/detectors/ela.py +32 -0
  40. features/image_edit_detector/detectors/fft.py +40 -0
  41. features/image_edit_detector/detectors/metadata.py +82 -0
  42. features/image_edit_detector/preprocess.py +9 -0
  43. features/image_edit_detector/routes.py +53 -0
  44. features/nepali_text_classifier/__init__.py +0 -0
  45. features/nepali_text_classifier/controller.py +130 -0
  46. features/nepali_text_classifier/inferencer.py +23 -0
  47. features/nepali_text_classifier/model_loader.py +54 -0
  48. features/nepali_text_classifier/preprocess.py +35 -0
  49. features/nepali_text_classifier/routes.py +45 -0
  50. features/rag_chatbot/__init__.py +0 -0
.env-example ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MY_SECRET_TOKEN="SECRET_CODE_TOKEN"
2
+
3
+ # CHROMA_HOST = "localhost" (Host gareko address rakhney)
4
+
5
+
6
+ # EXAMPLE CONFIGURATIONS FOR DIFFERENT PROVIDERS(Use only one at once)
7
+ # ===========================================
8
+
9
+ # FOR OPENAI:(PAID)
10
+ # LLM_PROVIDER=openai
11
+ # LLM_API_KEY=sk-your-openai-api-key
12
+ # LLM_MODEL=gpt-3.5-turbo
13
+ # # Other options: gpt-4, gpt-4-turbo-preview, etc.
14
+
15
+ # FOR GROQ:(FREE: BABAL XA-> prefer this)
16
+ # LLM_PROVIDER=groq
17
+ # LLM_API_KEY=gsk_your-groq-api-key
18
+ # LLM_MODEL=llama-3.3-70b-versatile
19
+ # # Other options: llama-3.1-70b-versatile, mixtral-8x7b-32768, etc.
20
+
21
+ # FOR OPENROUTER:(FREE: LASTAI RATE LIMIT LAGAUXA)
22
+ # LLM_PROVIDER=openrouter
23
+ # LLM_API_KEY=sk-or-your-openrouter-api-key
24
+ # LLM_MODEL=meta-llama/llama-3.1-8b-instruct:free
25
+ # # Other options: anthropic/claude-3-haiku, google/gemma-7b-it, etc.
26
+
27
+ # ===========================================
28
+ # ADVANCED CONFIGURATION
29
+ # ===========================================
30
+ # Temperature (0.0 to 1.0) - controls randomness
31
+ # LLM_TEMPERATURE=0.1
32
+
33
+ # Maximum tokens for response
34
+ # LLM_MAX_TOKENS=4096
.gitignore CHANGED
@@ -1,174 +1,71 @@
1
- # Byte-compiled / optimized / DLL files
 
 
 
 
 
 
 
2
  __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Distribution / packaging
10
- .Python
11
  build/
12
- develop-eggs/
13
  dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
  *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py,cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
 
97
- # UV
98
- # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- #uv.lock
102
 
103
- # poetry
104
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
- # This is especially recommended for binary packages to ensure reproducibility, and is more
106
- # commonly ignored for libraries.
107
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
- #poetry.lock
109
-
110
- # pdm
111
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
- #pdm.lock
113
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
- # in version control.
115
- # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
- .pdm.toml
117
- .pdm-python
118
- .pdm-build/
119
-
120
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
- __pypackages__/
122
-
123
- # Celery stuff
124
- celerybeat-schedule
125
- celerybeat.pid
126
-
127
- # SageMath parsed files
128
- *.sage.py
129
-
130
- # Environments
131
  .env
132
- .venv
133
- env/
134
- venv/
135
- ENV/
136
- env.bak/
137
- venv.bak/
138
-
139
- # Spyder project settings
140
- .spyderproject
141
- .spyproject
142
-
143
- # Rope project settings
144
- .ropeproject
145
-
146
- # mkdocs documentation
147
- /site
148
-
149
- # mypy
150
- .mypy_cache/
151
- .dmypy.json
152
- dmypy.json
153
-
154
- # Pyre type checker
155
- .pyre/
156
-
157
- # pytype static type analyzer
158
- .pytype/
159
-
160
- # Cython debug symbols
161
- cython_debug/
162
-
163
- # PyCharm
164
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
- # and can be added to the global gitignore or merged into this file. For a more nuclear
167
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
- #.idea/
169
-
170
- # Ruff stuff:
171
- .ruff_cache/
172
-
173
- # PyPI configuration file
174
- .pypirc
 
1
+ # ---- Python Environment ----
2
+ venv/
3
+ .venv/
4
+ env/
5
+ ENV/
6
+ *.pyc
7
+ *.pyo
8
+ *.pyd
9
  __pycache__/
10
+ **/__pycache__/
11
+
12
+ # ---- VS Code / IDEs ----
13
+ .vscode/
14
+ .idea/
15
+ *.swp
16
+
17
+ # ---- Jupyter / IPython ----
18
+ .ipynb_checkpoints/
19
+ *.ipynb
20
+
21
+ # ---- Model & Data Artifacts ----
22
+ *.pth
23
+ *.pt
24
+ *.h5
25
+ *.ckpt
26
+ *.onnx
27
+ *.joblib
28
+ *.pkl
29
+
30
+ # ---- Hugging Face Cache ----
31
+ ~/.cache/huggingface/
32
+ huggingface_cache/
33
+
34
+ # ---- Logs and Dumps ----
35
+ *.log
36
+ *.out
37
+ *.err
38
 
39
+ # ---- Build Artifacts ----
 
40
  build/
 
41
  dist/
 
 
 
 
 
 
 
 
 
 
42
  *.egg-info/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # ---- System Files ----
45
+ .DS_Store
46
+ Thumbs.db
 
 
47
 
48
+ # ---- Environment Configs ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  .env
50
+ .env.*
51
+
52
+ # ---- Project-specific ----
53
+ Ai-Text-Detector/
54
+ HuggingFace/model/
55
+
56
+ # ---- Node Projects (if applicable) ----
57
+ node_modules/
58
+ model/
59
+ models/.gitattributes #<-- This line can stay if you only want to ignore that file, not the whole folder
60
+
61
+ todo.md
62
+ np_text_model
63
+ IMG_Models
64
+ notebooks
65
+ # Ignore model and tokenizer files
66
+ np_text_model/classifier/sentencepiece.bpe.model
67
+ np_text_model/classifier/tokenizer.json
68
+
69
+ # vector database
70
+ chroma_data
71
+ chroma_database
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ # Create user first
7
+ RUN useradd -m -u 1000 user
8
+
9
+ # Install system dependencies (requires root)
10
+ RUN apt-get update && apt-get install -y libgl1
11
+
12
+ # Switch to non-root user
13
+ USER user
14
+ ENV PATH="/home/user/.local/bin:$PATH"
15
+
16
+ # Add TensorFlow environment variables to reduce logging noise
17
+ WORKDIR /app
18
+
19
+ COPY --chown=user ./requirements.txt requirements.txt
20
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
21
+ RUN python -m spacy download en_core_web_sm || echo "Failed to download model"
22
+
23
+ COPY --chown=user . /app
24
+
25
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
26
+
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}
README.md CHANGED
@@ -1 +1,22 @@
1
- # aiapi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Testing AI Contain
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ sdk_version: "latest"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # Testing AI Contain
13
+
14
+ This Hugging Face Space uses **Docker** to run a custom environment for AI content detection.
15
+
16
+ ## How to run locally
17
+
18
+ ```bash
19
+ docker build -t testing-ai-contain .
20
+ docker run -p 7860:7860 testing-ai-contain
21
+
22
+ ```
READMEs.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI-Contain-Checker
2
+
3
+ A modular AI content detection system with support for **image classification**, **image edit detection**, **Nepali text classification**, and **general text classification**. Built for performance and extensibility, it is ideal for detecting AI-generated content in both visual and textual forms.
4
+
5
+
6
+ ## 🌟 Features
7
+
8
+ ### 🖼️ Image Classifier
9
+
10
+ * **Purpose**: Classifies whether an image is AI-generated or a real-life photo.
11
+ * **Model**: Fine-tuned **InceptionV3** CNN.
12
+ * **Dataset**: Custom curated dataset with **\~79,950 images** for binary classification.
13
+ * **Location**: [`features/image_classifier`](features/image_classifier)
14
+ * **Docs**: [`docs/features/image_classifier.md`](docs/features/image_classifier.md)
15
+
16
+ ### 🖌️ Image Edit Detector
17
+
18
+ * **Purpose**: Detects image tampering or post-processing.
19
+ * **Techniques Used**:
20
+
21
+ * **Error Level Analysis (ELA)**: Visualizes compression artifacts.
22
+ * **Fast Fourier Transform (FFT)**: Detects unnatural frequency patterns.
23
+ * **Location**: [`features/image_edit_detector`](features/image_edit_detector)
24
+ * **Docs**:
25
+
26
+ * [ELA](docs/detector/ELA.md)
27
+ * [FFT](docs/detector/fft.md )
28
+ * [Metadata Analysis](docs/detector/meta.md)
29
+ * [Backend Notes](docs/detector/note-for-backend.md)
30
+
31
+ ### 📝 Nepali Text Classifier
32
+
33
+ * **Purpose**: Determines if Nepali text content is AI-generated or written by a human.
34
+ * **Model**: Based on `XLMRClassifier` fine-tuned on Nepali language data.
35
+ * **Dataset**: Scraped dataset of **\~18,000** Nepali texts.
36
+ * **Location**: [`features/nepali_text_classifier`](features/nepali_text_classifier)
37
+ * **Docs**: [`docs/features/nepali_text_classifier.md`](docs/features/nepali_text_classifier.md)
38
+
39
+ ### 🌐 English Text Classifier
40
+
41
+ * **Purpose**: Detects if English text is AI-generated or human-written.
42
+ * **Pipeline**:
43
+
44
+ * Uses **GPT2 tokenizer** for input preprocessing.
45
+ * Custom binary classifier to differentiate between AI and human-written content.
46
+ * **Location**: [`features/text_classifier`](features/text_classifier)
47
+ * **Docs**: [`docs/features/text_classifier.md`](docs/features/text_classifier.md)
48
+
49
+ ---
50
+
51
+ ## 🗂️ Project Structure
52
+
53
+ ```bash
54
+ AI-Checker/
55
+
56
+ ├── app.py # Main FastAPI entry point
57
+ ├── config.py # Configuration settings
58
+ ├── Dockerfile # Docker build script
59
+ ├── Procfile # Deployment file for Heroku or similar
60
+ ├── requirements.txt # Python dependencies
61
+ ├── README.md # You are here 📘
62
+
63
+ ├── features/ # Core detection modules
64
+ │ ├── image_classifier/
65
+ │ ├── image_edit_detector/
66
+ │ ├── nepali_text_classifier/
67
+ │ └── text_classifier/
68
+
69
+ ├── docs/ # Internal and API documentation
70
+ │ ├── api_endpoints.md
71
+ │ ├── deployment.md
72
+ │ ├── detector/
73
+ │ │ ├── ELA.md
74
+ │ │ ├── fft.md
75
+ │ │ ├── meta.md
76
+ │ │ └── note-for-backend.md
77
+ │ ├── functions.md
78
+ │ ├── nestjs_integration.md
79
+ │ ├── security.md
80
+ │ ├── setup.md
81
+ │ └── structure.md
82
+
83
+ ├── IMG_Models/ # Saved image classifier model(s)
84
+ │ └── latest-my_cnn_model.h5
85
+
86
+ ├── notebooks/ # Experimental and debug notebooks
87
+ ├── static/ # Static assets if needed
88
+ └── test.md # Test notes
89
+ ````
90
+
91
+ ---
92
+
93
+ ## 📚 Documentation Links
94
+
95
+ * [API Endpoints](docs/api_endpoints.md)
96
+ * [Deployment Guide](docs/deployment.md)
97
+ * [Detector Documentation](docs/detector/)
98
+
99
+ * [Error Level Analysis (ELA)](docs/detector/ELA.md)
100
+ * [Fast Fourier Transform (FFT)](docs/detector/fft.md)
101
+ * [Metadata Analysis](docs/detector/meta.md)
102
+ * [Backend Notes](docs/detector/note-for-backend.md)
103
+ * [Functions Overview](docs/functions.md)
104
+ * [NestJS Integration Guide](docs/nestjs_integration.md)
105
+ * [Security Details](docs/security.md)
106
+ * [Setup Instructions](docs/setup.md)
107
+ * [Project Structure](docs/structure.md)
108
+
109
+ ---
110
+
111
+ ## 🚀 Usage
112
+
113
+ 1. **Install dependencies**
114
+
115
+ ```bash
116
+ pip install -r requirements.txt
117
+ ```
118
+
119
+ 2. **Run the API**
120
+
121
+ ```bash
122
+ uvicorn app:app --reload
123
+ ```
124
+
125
+ 3. **Build Docker (optional)**
126
+
127
+ ```bash
128
+ docker build -t ai-contain-checker .
129
+ docker run -p 8000:8000 ai-contain-checker
130
+ ```
131
+
132
+ ---
133
+
134
+ ## 🔐 Security & Integration
135
+
136
+ * **Token Authentication** and **IP Whitelisting** supported.
137
+ * NestJS integration guide: [`docs/nestjs_integration.md`](docs/nestjs_integration.md)
138
+ * Rate limiting handled using `slowapi`.
139
+
140
+ ---
141
+
142
+ ## 🛡️ Future Plans
143
+
144
+ * Add **video classifier** module.
145
+ * Expand dataset for **multilingual** AI content detection.
146
+ * Add **fine-tuning UI** for models.
147
+
148
+ ---
149
+
150
+ ## 📄 License
151
+
152
+ See full license terms here: [`LICENSE.md`](license.md)
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from slowapi import Limiter, _rate_limit_exceeded_handler
3
+ from fastapi.responses import FileResponse
4
+ from slowapi.middleware import SlowAPIMiddleware
5
+ from slowapi.errors import RateLimitExceeded
6
+ from slowapi.util import get_remote_address
7
+ from fastapi.responses import JSONResponse
8
+ from features.text_classifier.routes import router as text_classifier_router
9
+ from features.nepali_text_classifier.routes import (
10
+ router as nepali_text_classifier_router,
11
+ )
12
+ from features.image_classifier.routes import router as image_classifier_router
13
+ from features.image_edit_detector.routes import router as image_edit_detector_router
14
+ from fastapi.staticfiles import StaticFiles
15
+
16
+ from config import ACCESS_RATE
17
+
18
+ import requests
19
+
20
+ limiter = Limiter(key_func=get_remote_address, default_limits=[ACCESS_RATE])
21
+
22
+ app = FastAPI()
23
+ # added the robots.txt
24
+ # Set up SlowAPI
25
+ app.state.limiter = limiter
26
+ app.add_exception_handler(
27
+ RateLimitExceeded,
28
+ lambda request, exc: JSONResponse(
29
+ status_code=429,
30
+ content={
31
+ "status_code": 429,
32
+ "error": "Rate limit exceeded",
33
+ "message": "Too many requests. Chill for a bit and try again",
34
+ },
35
+ ),
36
+ )
37
+ app.add_middleware(SlowAPIMiddleware)
38
+
39
+ # Include your routes
40
+ app.include_router(text_classifier_router, prefix="/text")
41
+ app.include_router(nepali_text_classifier_router, prefix="/NP")
42
+ app.include_router(image_classifier_router, prefix="/AI-image")
43
+ app.include_router(image_edit_detector_router, prefix="/detect")
44
+
45
+
46
+ @app.get("/")
47
+ @limiter.limit(ACCESS_RATE)
48
+ async def root(request: Request):
49
+ return {
50
+ "message": "API is working",
51
+ "endpoints": [
52
+ "/text/analyse",
53
+ "/text/upload",
54
+ "/text/analyse-sentences",
55
+ "/text/analyse-sentance-file",
56
+ "/NP/analyse",
57
+ "/NP/upload",
58
+ "/NP/analyse-sentences",
59
+ "/NP/file-sentences-analyse",
60
+ "/AI-image/analyse",
61
+ ],
62
+ }
config.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ACCESS_RATE = "20/minute"
2
+
docs/api_endpoints.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧩 API Endpoints
2
+
3
+ ### English (GPT-2) - `/text/`
4
+
5
+ | Endpoint | Method | Description |
6
+ | ----------------------------- | ------ | -------------------------------------- |
7
+ | `/text/analyse` | POST | Classify raw English text |
8
+ | `/text/analyse-sentences` | POST | Sentence-by-sentence breakdown |
9
+ | `/text/analyse-sentance-file` | POST | Upload file, per-sentence breakdown |
10
+ | `/text/upload` | POST | Upload file for overall classification |
11
+ | `/text/health` | GET | Health check |
12
+
13
+ #### Example: Classify English text
14
+
15
+ ```bash
16
+ curl -X POST http://localhost:8000/text/analyse \
17
+ -H "Authorization: Bearer <SECRET_TOKEN>" \
18
+ -H "Content-Type: application/json" \
19
+ -d '{"text": "This is a sample text for analysis."}'
20
+ ```
21
+
22
+ **Response:**
23
+
24
+ ```json
25
+ {
26
+ "result": "AI-generated",
27
+ "perplexity": 55.67,
28
+ "ai_likelihood": 66.6
29
+ }
30
+ ```
31
+
32
+ #### Example: File upload
33
+
34
+ ```bash
35
+ curl -X POST http://localhost:8000/text/upload \
36
+ -H "Authorization: Bearer <SECRET_TOKEN>" \
37
+ -F 'file=@yourfile.txt;type=text/plain'
38
+ ```
39
+
40
+ ---
41
+
42
+ ### Nepali (SentencePiece) - `/NP/`
43
+
44
+ | Endpoint | Method | Description |
45
+ | ---------------------------- | ------ | ------------------------------------ |
46
+ | `/NP/analyse` | POST | Classify Nepali text |
47
+ | `/NP/analyse-sentences` | POST | Sentence-by-sentence breakdown |
48
+ | `/NP/upload` | POST | Upload Nepali PDF for classification |
49
+ | `/NP/file-sentences-analyse` | POST | PDF upload, per-sentence breakdown |
50
+ | `/NP/health` | GET | Health check |
51
+
52
+ #### Example: Nepali text classification
53
+
54
+ ```bash
55
+ curl -X POST http://localhost:8000/NP/analyse \
56
+ -H "Authorization: Bearer <SECRET_TOKEN>" \
57
+ -H "Content-Type: application/json" \
58
+ -d '{"text": "यो उदाहरण वाक्य हो।"}'
59
+ ```
60
+
61
+ **Response:**
62
+
63
+ ```json
64
+ {
65
+ "label": "Human",
66
+ "confidence": 98.6
67
+ }
68
+ ```
69
+
70
+ #### Example: Nepali PDF upload
71
+
72
+ ```bash
73
+ curl -X POST http://localhost:8000/NP/upload \
74
+ -H "Authorization: Bearer <SECRET_TOKEN>" \
75
+ -F 'file=@NepaliText.pdf;type=application/pdf'
76
+ ```
77
+
78
+ ### Image-Classification -`/verify-image/`
79
+
80
+ | Endpoint | Method | Description |
81
+ | ----------------------- | ------ | ----------------------- |
82
+ | `/verify-image/analyse` | POST | Classify Image using ML |
83
+
84
+ #### Example: Image-Classification
85
+
86
+ ```bash
87
+ curl -X POST http://localhost:8000/verify-image/analyse \
88
+ -H "Authorization: Bearer <SECRET_TOKEN>" \
89
+ -F 'file=@test1.png'
90
+ ```
91
+
92
+ [🔙 Back to Main README](../README.md)
docs/deployment.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Deployment
3
+
4
+ This project is containerized and deployed on **Hugging Face Spaces** using a custom `Dockerfile`. This guide explains the structure of the Dockerfile and key considerations for deploying FastAPI apps on Spaces with Docker SDK.
5
+
6
+ ---
7
+
8
+ ## 📦 Base Image
9
+
10
+ ```dockerfile
11
+ FROM python:3.9
12
+ ````
13
+
14
+ We use the official Python 3.9 image for compatibility and stability across most Python libraries and tools.
15
+
16
+ ---
17
+
18
+ ## 👤 Create a Non-Root User
19
+
20
+ ```dockerfile
21
+ RUN useradd -m -u 1000 user
22
+ USER user
23
+ ENV PATH="/home/user/.local/bin:$PATH"
24
+ ```
25
+
26
+ * Hugging Face Spaces **requires** that containers run as a non-root user with UID `1000`.
27
+ * We also prepend the user's local binary path to `PATH` for Python package accessibility.
28
+
29
+ ---
30
+
31
+ ## 🗂️ Set Working Directory
32
+
33
+ ```dockerfile
34
+ WORKDIR /app
35
+ ```
36
+
37
+ All application files will reside under `/app` for consistency and clarity.
38
+
39
+ ---
40
+
41
+ ## 📋 Install Dependencies
42
+
43
+ ```dockerfile
44
+ COPY --chown=user ./requirements.txt requirements.txt
45
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
46
+ ```
47
+
48
+ * Copies the dependency list with correct file ownership.
49
+ * Uses `--no-cache-dir` to reduce image size.
50
+ * Ensures the latest compatible versions are installed.
51
+
52
+ ---
53
+
54
+ ## 🔡 Download Language Model (Optional)
55
+
56
+ ```dockerfile
57
+ RUN python -m spacy download en_core_web_sm || echo "Failed to download model"
58
+ ```
59
+
60
+ * Downloads the small English NLP model required by SpaCy.
61
+ * Uses `|| echo ...` to prevent build failure if the download fails (optional safeguard).
62
+
63
+ ---
64
+
65
+ ## 📁 Copy Project Files
66
+
67
+ ```dockerfile
68
+ COPY --chown=user . /app
69
+ ```
70
+
71
+ Copies the entire project source into the container, setting correct ownership for Hugging Face's user-based execution.
72
+
73
+ ---
74
+
75
+ ## 🌐 Start the FastAPI Server
76
+
77
+ ```dockerfile
78
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
79
+ ```
80
+
81
+ * Launches the FastAPI app using `uvicorn`.
82
+ * **Port 7860 is mandatory** for Docker-based Hugging Face Spaces deployments.
83
+ * `app:app` refers to the `FastAPI()` instance in `app.py`.
84
+
85
+ ---
86
+
87
+ ## ✅ Deployment Checklist
88
+
89
+ * [x] Ensure your main file is named `app.py` or adjust `CMD` accordingly.
90
+ * [x] All dependencies should be listed in `requirements.txt`.
91
+ * [x] If using models like SpaCy, verify they are downloaded or bundled.
92
+ * [x] Test your Dockerfile locally with `docker build` before pushing to Hugging Face.
93
+
94
+ ---
95
+
96
+ ## 📚 References
97
+
98
+ * Hugging Face Docs: [Spaces Docker SDK](https://huggingface.co/docs/hub/spaces-sdks-docker)
99
+ * Uvicorn Docs: [https://www.uvicorn.org/](https://www.uvicorn.org/)
100
+ * SpaCy Models: [https://spacy.io/models](https://spacy.io/models)
101
+
102
+ ---
103
+
104
+ Happy deploying!
105
+ **P.S.** Try not to break stuff. 😅
106
+
107
+
108
+ [🔙 Back to Main README](../README.md)
docs/detector/ELA.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Error Level Analysis (ELA) Detector
2
+
3
+ This module provides a function to perform Error Level Analysis (ELA) on images to detect potential manipulations or edits.
4
+
5
+ ## Function: `run_ela`
6
+
7
+ ```python
8
+ def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool:
9
+ ```
10
+
11
+ ### Description
12
+
13
+ Error Level Analysis (ELA) works by recompressing an image at a specified JPEG quality level and comparing it to the original image. Differences between the two images reveal areas with inconsistent compression artifacts — often indicating image manipulation.
14
+
15
+ The function computes the maximum pixel difference across all color channels and uses a threshold to determine if the image is likely edited.
16
+
17
+ ### Parameters
18
+
19
+ | Parameter | Type | Default | Description |
20
+ | ----------- | ----------- | ------- | ------------------------------------------------------------------------------------------- |
21
+ | `image` | `PIL.Image` | N/A | Input image in RGB mode to analyze. |
22
+ | `quality` | `int` | 90 | JPEG compression quality used for recompression during analysis (lower = more compression). |
23
+ | `threshold` | `int` | 15 | Pixel difference threshold to flag the image as edited. |
24
+
25
+ ### Returns
26
+
27
+ `bool`
28
+
29
+ - `True` if the image is likely edited (max pixel difference > threshold).
30
+ - `False` if the image appears unedited.
31
+
32
+ ### Usage Example
33
+
34
+ ```python
35
+ from PIL import Image
36
+ from detectors.ela import run_ela
37
+
38
+ # Open and convert image to RGB
39
+ img = Image.open("example.jpg").convert("RGB")
40
+
41
+ # Run ELA detection
42
+ is_edited = run_ela(img, quality=90, threshold=15)
43
+
44
+ print("Image edited:", is_edited)
45
+ ```
46
+
47
+ ### Notes
48
+
49
+ - The input image **must** be in RGB mode for accurate analysis.
50
+ - ELA is a heuristic technique; combining it with other detection methods increases reliability.
51
+ - Visualizing the enhanced difference image can help identify edited regions (not returned by this function but possible to add).
52
+
53
+ ### Installation
54
+
55
+ Make sure you have Pillow installed:
56
+
57
+ ```bash
58
+ pip install pillow
59
+ ```
60
+
61
+ ### Running Locally
62
+
63
+ Just put the function in a notebook or script file and run it with your image. It works well for basic images.
64
+
65
+ [🔙 Back to Main README](../README.md)
docs/detector/ai_human_image_checker.md ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Real vs. Fake Image Classification for Production Pipeline
2
+ ==========================================================
3
+
4
+ 1\. Business Problem
5
+ --------------------
6
+
7
+ This project addresses the critical business need to automatically identify and flag manipulated or synthetically generated images. By accurately classifying images as **"real"** or **"fake,"** we can enhance the integrity of our platform, prevent the spread of misinformation, and protect our users from fraudulent content. This solution is designed for integration into our production pipeline to process images in real-time.
8
+
9
+ 2\. Solution Overview
10
+ ---------------------
11
+
12
+ This solution leverages OpenAI's CLIP (Contrastive Language-Image Pre-Training) model to differentiate between real and fake images. The system operates as follows:
13
+
14
+ 1. **Feature Extraction:** A pre-trained CLIP model ('ViT-L/14') converts input images into 768-dimensional feature vectors.
15
+
16
+ 2. **Classification:** A Support Vector Machine (SVM) model, trained on our internal dataset of real and fake images, classifies the feature vectors.
17
+
18
+ 3. **Deployment:** The trained model is deployed as a service that can be integrated into our production image processing pipeline.
19
+
20
+
21
+ The model has achieved an accuracy of **98.29%** on our internal test set, demonstrating its effectiveness in distinguishing between real and fake images.
22
+
23
+ 3\. Getting Started
24
+ -------------------
25
+
26
+ ### 3.1. Dependencies
27
+
28
+ To ensure a reproducible environment, all dependencies are listed in the requirements.txt file. Install them using pip:
29
+
30
+ ```bash
31
+ pip install -r requirements.txt
32
+ ```
33
+
34
+ **requirements.txt**:
35
+ - numpy
36
+ - Pillow
37
+ - torch
38
+ - clip-by-openai
39
+ - scikit-learn
40
+ - tqdm
41
+ - seaborn
42
+ - matplotlib
43
+
44
+ ### 3.2. Data Preparation
45
+
46
+ The model was trained on a dataset of real and fake images obtained form kaggle the dataset link is https://www.kaggle.com/datasets/tristanzhang32/ai-generated-images-vs-real-images/data$0.
47
+
48
+ ### 3.3. Usage
49
+
50
+ #### 3.3.1. Feature Extraction
51
+
52
+ To extract features from a new dataset, run the following command:
53
+
54
+ ```
55
+ python extract_features.py --data_dir /path/to/your/data --output_file features.npz
56
+ ```
57
+
58
+ #### 3.3.2. Model Training
59
+
60
+ To retrain the SVM model on a new set of extracted features, run:
61
+
62
+ ```
63
+ python train_model.py --features_file features.npz --model_output_path model.joblib
64
+ ```
65
+
66
+ #### 3.3.3. Inference
67
+
68
+ To classify a single image using the trained model, use the provided inference script:
69
+ ```
70
+ python classify.py --image_path /path/to/your/image.jpg --model_path model.joblib
71
+ ```
72
+
73
+ 4\. Production Deployment
74
+ -------------------------
75
+
76
+ The image classification model is deployed as a microservice. The service exposes an API endpoint that accepts an image and returns a classification result ("real" or "fake").
77
+
78
+ ### 4.1. API Specification
79
+
80
+ * **Endpoint:** /classify
81
+
82
+ * **Method:** POST
83
+
84
+ * **Request Body:** multipart/form-data with a single field image.
85
+
86
+ * **Response:**
87
+
88
+ * JSON{ "classification": "real", "confidence": 0.95}
89
+
90
+ * JSON{ "error": "Error message"}
91
+
92
+
93
+ ### 4.2. Scalability and Monitoring
94
+
95
+ The service is deployed in a containerized environment (e.g., Docker) and managed by an orchestrator (e.g., Kubernetes) to ensure scalability and high availability. Monitoring and logging are in place to track model performance, API latency, and error rates.
96
+
97
+ 5\. Model Versioning
98
+ --------------------
99
+
100
+ We use a combination of Git for code versioning and a model registry for tracking trained model artifacts. Each model is versioned and associated with the commit hash of the code that produced it. The current production model is **v1.2.0**.
101
+
102
+ 6\. Testing
103
+ -----------
104
+
105
+ The project includes a suite of tests to ensure correctness and reliability:
106
+
107
+ * **Unit tests:** To verify individual functions and components.
108
+
109
+ * **Integration tests:** To test the interaction between different parts of the system.
110
+
111
+ * **Model evaluation tests:** To continuously monitor model performance on a golden dataset.
112
+
113
+
114
+ To run the tests, execute:
115
+ ```
116
+ pytest
117
+ ```
118
+
119
+ 7\. Future Work
120
+ ---------------
121
+
122
+ * **Explore more advanced classifiers:** Investigate the use of neural network-based classifiers on top of CLIP features.
123
+
124
+ * **Fine-tune the CLIP model:** For even better performance, we can fine-tune the CLIP model on our specific domain of images.
125
+
126
+ * **Expand the training dataset:** Continuously augment the training data with new examples of real and fake images to improve the model's robustness.
127
+
128
+
129
+ 8\. Contact/Support
130
+ -------------------
131
+
132
+ For any questions or issues regarding this project, please contact the Machine Learning team at [your-team-email@yourcompany.com](mailto:your-team-email@yourcompany.com) .
docs/detector/fft.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Fast Fourier Transform (FFT) Detector
3
+
4
+ ```python
5
+ def run_fft(image: Image.Image, threshold: float = 0.92) -> bool:
6
+ ```
7
+
8
+ ## **Overview**
9
+
10
+ The `run_fft` function performs a frequency domain analysis on an image using the **Fast Fourier Transform (FFT)** to detect possible **AI generation or digital manipulation**. It leverages the fact that artificially generated or heavily edited images often exhibit a distinct high-frequency pattern.
11
+
12
+ ---
13
+
14
+ ## **Parameters**
15
+
16
+ | Parameter | Type | Description |
17
+ | ----------- | ----------------- | --------------------------------------------------------------------------------------- |
18
+ | `image` | `PIL.Image.Image` | Input image to analyze. It will be converted to grayscale and resized. |
19
+ | `threshold` | `float` | Proportion threshold of high-frequency components to flag the image. Default is `0.92`. |
20
+
21
+ ---
22
+
23
+ ## **Returns**
24
+
25
+ | Type | Description |
26
+ | ------ | ---------------------------------------------------------------------- |
27
+ | `bool` | `True` if image is likely AI-generated/manipulated; otherwise `False`. |
28
+
29
+ ---
30
+
31
+ ## **Step-by-Step Explanation**
32
+
33
+ ### 1. **Grayscale Conversion**
34
+
35
+ All images are converted to grayscale:
36
+
37
+ ```python
38
+ gray_image = image.convert("L")
39
+ ```
40
+
41
+ ### 2. **Resize**
42
+
43
+ The image is resized to a fixed $512 \times 512$ for uniformity:
44
+
45
+ ```python
46
+ resized_image = gray_image.resize((512, 512))
47
+ ```
48
+
49
+ ### 3. **FFT Calculation**
50
+
51
+ Compute the 2D Discrete Fourier Transform:
52
+
53
+ $$
54
+ F(u, v) = \sum_{x=0}^{M-1} \sum_{y=0}^{N-1} f(x, y) \cdot e^{-2\pi i \left( \frac{ux}{M} + \frac{vy}{N} \right)}
55
+ $$
56
+
57
+ ```python
58
+ fft_result = fft2(image_array)
59
+ ```
60
+
61
+ ### 4. **Shift Zero Frequency to Center**
62
+
63
+ Use `fftshift` to center the zero-frequency component:
64
+
65
+ ```python
66
+ fft_shifted = fftshift(fft_result)
67
+ ```
68
+
69
+ ### 5. **Magnitude Spectrum**
70
+
71
+ $$
72
+ |F(u, v)| = \sqrt{\Re^2 + \Im^2}
73
+ $$
74
+
75
+ ```python
76
+ magnitude_spectrum = np.abs(fft_shifted)
77
+ ```
78
+
79
+ ### 6. **Normalization**
80
+
81
+ Normalize the spectrum to avoid scale issues:
82
+
83
+ $$
84
+ \text{Normalized}(u,v) = \frac{|F(u,v)|}{\max(|F(u,v)|)}
85
+ $$
86
+
87
+ ```python
88
+ normalized_spectrum = magnitude_spectrum / max_magnitude
89
+ ```
90
+
91
+ ### 7. **High-Frequency Detection**
92
+
93
+ High-frequency components are defined as:
94
+
95
+ $$
96
+ \text{Mask}(u,v) =
97
+ \begin{cases}
98
+ 1 & \text{if } \text{Normalized}(u,v) > 0.5 \\
99
+ 0 & \text{otherwise}
100
+ \end{cases}
101
+ $$
102
+
103
+ ```python
104
+ high_freq_mask = normalized_spectrum > 0.5
105
+ ```
106
+
107
+ ### 8. **Proportion Calculation**
108
+
109
+ $$
110
+ \text{Ratio} = \frac{\sum \text{Mask}}{\text{Total pixels}}
111
+ $$
112
+
113
+ ```python
114
+ high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size
115
+ ```
116
+
117
+ ### 9. **Threshold Decision**
118
+
119
+ If the ratio exceeds the threshold:
120
+
121
+ $$
122
+ \text{is\_fake} = (\text{Ratio} > \text{Threshold})
123
+ $$
124
+
125
+ ```python
126
+ is_fake = high_freq_ratio > threshold
127
+ ```
128
+
129
+ it is implemented in the api
130
+
131
+ ### Running Locally
132
+
133
+ Just put the function in a notebook or script file and run it with your image. It works well for basic images.
134
+
135
+
136
+ [🔙 Back to Main README](../README.md)
docs/detector/meta.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Metadata Analysis for Image Edit Detection
2
+
3
+ This module inspects image metadata to detect possible signs of AI-generation or post-processing edits.
4
+
5
+ ## Overview
6
+
7
+ - Many AI-generated images and edited images leave identifiable traces in their metadata.
8
+ - This detector scans image EXIF metadata and raw bytes for known AI generation indicators and common photo editing software signatures.
9
+ - It classifies images as `"ai_generated"`, `"edited"`, or `"undetermined"` based on detected markers.
10
+ - Handles invalid image formats gracefully by reporting errors.
11
+
12
+ ## How It Works
13
+
14
+ - Opens the image from raw bytes using the Python Pillow library (`PIL`).
15
+ - Reads EXIF metadata and specifically looks for the "Software" tag that often contains the editing app name.
16
+ - Checks for common image editors such as Photoshop, GIMP, Snapseed, etc.
17
+ - Scans the entire raw byte content of the image for embedded AI generation identifiers like "midjourney", "stable-diffusion", "openai", etc.
18
+ - Returns a status string indicating the metadata classification.
19
+
20
+ [🔙 Back to Main README](../README.md)
docs/detector/note-for-backend.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # 📦API integration note
3
+
4
+ ## Overview
5
+
6
+ This system integrates **three image forensics methods**—**ELA**, **FFT**, and **Metadata analysis**—into a single detection pipeline to determine whether an image is AI-generated, manipulated, or authentic.
7
+
8
+ ---
9
+
10
+ ## 🔍 Detection Modules
11
+
12
+ ### 1. **ELA (Error Level Analysis)**
13
+
14
+ * **Purpose:** Detects tampering or editing by analyzing compression error levels.
15
+ * **Accuracy:** ✅ *Most accurate method*
16
+ * **Performance:** ❗ *Slowest method*
17
+ * **Output:** `True` (edited) or `False` (authentic)
18
+
19
+ ### 2. **FFT (Fast Fourier Transform)**
20
+
21
+ * **Purpose:** Identifies high-frequency patterns typical of AI-generated images.
22
+ * **Accuracy:** ⚠️ *Moderately accurate*
23
+ * **Performance:** ❗ *Moderate to slow*
24
+ * **Output:** `True` (likely AI-generated) or `False` (authentic)
25
+
26
+ ### 3. **Metadata Analysis**
27
+
28
+ * **Purpose:** Detects traces of AI tools or editors in image metadata or binary content.
29
+ * **Accuracy:** ⚠️ *Fast but weaker signal*
30
+ * **Performance:** 🚀 *Fastest method*
31
+ * **Output:** One of:
32
+
33
+ * `"ai_generated"` – AI tool or generator identified
34
+ * `"edited"` – Edited using known software
35
+ * `"undetermined"` – No signature found
36
+
37
+ ---
38
+
39
+ ## 🧩 Integration Plan
40
+
41
+ ### ➕ Combine all three APIs into one unified endpoint:
42
+
43
+ ```bash
44
+ POST /api/detect-image
45
+ ```
46
+
47
+ ### Input:
48
+
49
+ * `image`: Image file (binary, any format supported by Pillow)
50
+
51
+ ### Output:
52
+
53
+ ```json
54
+ {
55
+ "ela_result": true,
56
+ "fft_result": false,
57
+ "metadata_result": "ai_generated",
58
+ "final_decision": "ai_generated"
59
+ }
60
+ ```
61
+ > NOTE:Optionally recommending a default logic (e.g., trust ELA > FFT > Metadata).
62
+
63
+ ## Result implementation
64
+ | `ela_result` | `fft_result` | `metadata_result` | Suggested Final Decision | Notes |
65
+ | ------------ | ------------ | ----------------- | ------------------------ | ----------------------------------------------------------------------- |
66
+ | `true` | `true` | `"ai_generated"` | `ai_generated` | Strong evidence from all three modules |
67
+ | `true` | `false` | `"edited"` | `edited` | ELA confirms editing, no AI signals |
68
+ | `true` | `false` | `"undetermined"` | `edited` | ELA indicates manipulation |
69
+ | `false` | `true` | `"ai_generated"` | `ai_generated` | No edits, but strong AI frequency & metadata signature |
70
+ | `false` | `true` | `"undetermined"` | `possibly_ai_generated` | Weak metadata, but FFT indicates possible AI generation |
71
+ | `false` | `false` | `"ai_generated"` | `ai_generated` | Metadata alone shows AI use |
72
+ | `false` | `false` | `"edited"` | `possibly_edited` | Weak signal—metadata shows editing but no structural or frequency signs |
73
+ | `false` | `false` | `"undetermined"` | `authentic` | No detectable manipulation or AI indicators |
74
+
75
+
76
+ ### Decision Logic:
77
+
78
+ * Use **ELA** as the **primary indicator** for manipulation.
79
+ * Supplement with **FFT** and **Metadata** to improve reliability.
80
+ * Combine using a simple rule-based or voting system.
81
+
82
+ ---
83
+
84
+ ## ⚙️ Performance Consideration
85
+
86
+ | Method | Speed | Strength |
87
+ | -------- | ----------- | -------------------- |
88
+ | ELA | ❗ Slow | ✅ Highly accurate |
89
+ | FFT | ⚠️ Moderate | ⚠️ Somewhat reliable |
90
+ | Metadata | 🚀 Fast | ⚠️ Low confidence |
91
+
92
+ > For high-throughput systems, consider running Metadata first and conditionally applying ELA/FFT if suspicious.
93
+
94
+ [🔙 Back to Main README](../README.md)
docs/features/image_classifier.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Image Classifier
2
+
3
+ ## Overview
4
+
5
+ This module classifies whether an input image is AI-generated or a real-life photograph.
6
+
7
+ ## Model
8
+
9
+ - Architecture: InceptionV3
10
+ - Type: Binary Classifier (AI vs Real)
11
+ - Format: H5 model (`latest-my_cnn_model.h5`)
12
+
13
+ ## Dataset
14
+
15
+ - Total images: ~79,950
16
+ - Balanced between real and generated images
17
+ - Preprocessing: Resizing, normalization
18
+
19
+ ## Code Location
20
+
21
+ - Controller: `features/image_classifier/controller.py`
22
+ - Model Loader: `features/image_classifier/model_loader.py`
23
+ - Preprocessor: `features/image_classifier/preprocess.py`
24
+
25
+ ## API
26
+
27
+ - Endpoint: [ENDPOINTS](../api_endpoints.md)
28
+ - Input: Image file (PNG/JPG)
29
+ - Output: JSON response with classification result and confidence
30
+
31
+ [🔙 Back to Main README](../README.md)
docs/features/nepali_text_classifier.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Nepali Text Classifier
2
+
3
+ ## Overview
4
+
5
+ This classifier identifies whether Nepali-language text content is written by a human or AI.
6
+
7
+ ## Model
8
+
9
+ - Base Model: XLM-Roberta (XLMRClassifier)
10
+ - Language: Nepali (Multilingual model)
11
+ - Fine-tuned with scraped web content (~18,000 samples)
12
+
13
+ ## Dataset
14
+
15
+ - Custom scraped dataset with manual labeling
16
+ - Includes news, blogs, and synthetic content from various LLMs
17
+
18
+ ## Code Location
19
+
20
+ - Controller: `features/nepali_text_classifier/controller.py`
21
+ - Inference: `features/nepali_text_classifier/inferencer.py`
22
+ - Model Loader: `features/nepali_text_classifier/model_loader.py`
23
+
24
+ ## API
25
+
26
+ - Endpoint: [ENDPOINTS](../api_endpoints.md)
27
+ - Input: Raw text
28
+ - Output: JSON classification with label and confidence score
29
+
30
+ [🔙 Back to Main README](../README.md)
docs/features/text_classifier.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # English Text Classifier
2
+
3
+ ## Overview
4
+
5
+ Detects whether English-language text is AI-generated or human-written.
6
+
7
+ ## Model Pipeline
8
+
9
+ - Tokenizer: GPT-2 Tokenizer
10
+ - Model: Custom trained binary classifier
11
+
12
+ ## Dataset
13
+
14
+ - Balanced dataset: Human vs AI-generated (ChatGPT, Claude, etc.)
15
+ - Tokenized and fed into the model using PyTorch/TensorFlow
16
+
17
+ ## Code Location
18
+
19
+ - Controller: `features/text_classifier/controller.py`
20
+ - Inference: `features/text_classifier/inferencer.py`
21
+ - Model Loader: `features/text_classifier/model_loader.py`
22
+ - Preprocessor: `features/text_classifier/preprocess.py`
23
+
24
+ ## API
25
+
26
+ - Endpoint: [ENDPOINTS](../api_endpoints.md)
27
+ - Input: Raw English text
28
+ - Output: Prediction result with probability/confidence
29
+
30
+ [🔙 Back to Main README](../README.md)
docs/functions.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Major Functions used
2
+
3
+ ## in Text Classifier (`features/text_classifier/` and `features/text_classifier/`)
4
+
5
+ - **`load_model()`**
6
+ Loads the GPT-2 model and tokenizer from the specified directory paths.
7
+
8
+ - **`lifespan()`**
9
+ Manages the application lifecycle. Initializes the model at startup and handles cleanup on shutdown.
10
+
11
+ - **`classify_text_sync()`**
12
+ Synchronously tokenizes input text and predicts using the GPT-2 model. Returns classification and perplexity.
13
+
14
+ - **`classify_text()`**
15
+ Asynchronously runs `classify_text_sync()` in a thread pool for non-blocking text classification.
16
+
17
+ - **`analyze_text()`**
18
+ **POST** endpoint: Accepts text input, classifies it using `classify_text()`, and returns the result with perplexity.
19
+
20
+ - **`health()`**
21
+ **GET** endpoint: Simple health check for API liveness.
22
+
23
+ - **`parse_docx()`, `parse_pdf()`, `parse_txt()`**
24
+ Utilities to extract and convert `.docx`, `.pdf`, and `.txt` file contents to plain text.
25
+
26
+ - **`warmup()`**
27
+ Downloads the model repository and initializes the model/tokenizer using `load_model()`.
28
+
29
+ - **`download_model_repo()`**
30
+ Downloads the model files from the designated `MODEL` folder.
31
+
32
+ - **`get_model_tokenizer()`**
33
+ Checks if the model already exists; if not, downloads it—otherwise, loads the cached model.
34
+
35
+ - **`handle_file_upload()`**
36
+ Handles file uploads from the `/upload` route. Extracts text, classifies, and returns results.
37
+
38
+ - **`extract_file_contents()`**
39
+ Extracts and returns plain text from uploaded files (PDF, DOCX, TXT).
40
+
41
+ - **`handle_file_sentence()`**
42
+ Processes file uploads by analyzing each sentence (under 10,000 chars) before classification.
43
+
44
+ - **`handle_sentence_level_analysis()`**
45
+ Checks/strips each sentence, then computes AI/human likelihood for each.
46
+
47
+ - **`analyze_sentences()`**
48
+ Splits paragraphs into sentences, classifies each, and returns all results.
49
+
50
+ - **`analyze_sentence_file()`**
51
+ Like `handle_file_sentence()`—analyzes sentences in uploaded files.
52
+ ---
53
+ ## for image_classifier
54
+
55
+ - **`Classify_Image_router()`** – Handles image classification requests by routing and coordinating preprocessing and inference.
56
+ - **`classify_image()`** – Performs AI vs human image classification using the loaded model.
57
+ - **`load_model()`** – Loads the pretrained model from Hugging Face at server startup.
58
+ - **`preprocess_image()`** – Applies all required preprocessing steps to the input image.
59
+
60
+ > Note: While many functions mirror those in the text classifier, the image classifier primarily uses TensorFlow rather than PyTorch.
61
+
62
+ [🔙 Back to Main README](../README.md)
docs/nestjs_integration.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Nestjs + fastapi
2
+
3
+ You can easily call this API from a NestJS microservice.
4
+
5
+ **.env**
6
+ ```env
7
+ FASTAPI_BASE_URL=http://localhost:8000
8
+ SECRET_TOKEN=your_secret_token_here
9
+ ```
10
+
11
+ **fastapi.service.ts**
12
+
13
+ ```typescript
14
+ import { Injectable } from "@nestjs/common";
15
+ import { HttpService } from "@nestjs/axios";
16
+ import { ConfigService } from "@nestjs/config";
17
+ import { firstValueFrom } from "rxjs";
18
+
19
+ @Injectable()
20
+ export class FastAPIService {
21
+ constructor(
22
+ private http: HttpService,
23
+ private config: ConfigService,
24
+ ) {}
25
+
26
+ async analyzeText(text: string) {
27
+ const url = `${this.config.get("FASTAPI_BASE_URL")}/text/analyse`;
28
+ const token = this.config.get("SECRET_TOKEN");
29
+
30
+ const response = await firstValueFrom(
31
+ this.http.post(
32
+ url,
33
+ { text },
34
+ {
35
+ headers: {
36
+ Authorization: `Bearer ${token}`,
37
+ },
38
+ },
39
+ ),
40
+ );
41
+
42
+ return response.data;
43
+ }
44
+ }
45
+ ```
46
+
47
+ **app.module.ts**
48
+ ```typescript
49
+ import { Module } from "@nestjs/common";
50
+ import { ConfigModule } from "@nestjs/config";
51
+ import { HttpModule } from "@nestjs/axios";
52
+ import { AppController } from "./app.controller";
53
+ import { FastAPIService } from "./fastapi.service";
54
+
55
+ @Module({
56
+ imports: [ConfigModule.forRoot(), HttpModule],
57
+ controllers: [AppController],
58
+ providers: [FastAPIService],
59
+ })
60
+ export class AppModule {}
61
+ ```
62
+
63
+ **app.controller.ts**
64
+ ```typescript
65
+ import { Body, Controller, Post, Get } from '@nestjs/common';
66
+ import { FastAPIService } from './fastapi.service';
67
+
68
+ @Controller()
69
+ export class AppController {
70
+ constructor(private readonly fastapiService: FastAPIService) {}
71
+
72
+ @Post('analyze-text')
73
+ async callFastAPI(@Body('text') text: string) {
74
+ return this.fastapiService.analyzeText(text);
75
+ }
76
+
77
+ @Get()
78
+ getHello(): string {
79
+ return 'NestJS is connected to FastAPI';
80
+ }
81
+ }
82
+ ```
83
+ [🔙 Back to Main README](../README.md)
docs/security.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security: Bearer Token Auth
2
+
3
+ All endpoints require authentication via Bearer token:
4
+
5
+ - Set `SECRET_TOKEN` in `.env`
6
+ - Add header: `Authorization: Bearer <SECRET_TOKEN>`
7
+
8
+ Unauthorized requests receive `403 Forbidden`.
9
+
10
+ [🔙 Back to Main README](../README.md)
docs/setup.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Setup & Installation
2
+
3
+ ## 1. Clone the Repository
4
+ ```bash
5
+ git clone https://github.com/cyberalertnepal/aiapi
6
+ cd aiapi
7
+ ```
8
+
9
+ ## 2. Install Dependencies
10
+ ```bash
11
+ pip install -r requirements.txt
12
+ ```
13
+
14
+ ## 3. Configure Environment
15
+ Create a `.env` file:
16
+ ```env
17
+ SECRET_TOKEN=your_secret_token_here
18
+ ```
19
+
20
+ ## 4. Run the API
21
+ ```bash
22
+ uvicorn app:app --host 0.0.0.0 --port 8000
23
+ ```
24
+ [🔙 Back to Main README](../README.md)
docs/status_code.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Error Codes Reference
2
+
3
+ ## 🔹 Summary Table
4
+
5
+ | Code | Message | Description |
6
+ | ---- | ----------------------------------------------------- | ------------------------------------------ |
7
+ | 400 | Text must contain at least two words | Input text too short |
8
+ | 400 | Text should be less than 10,000 characters | Input text too long |
9
+ | 404 | The file is empty or only contains whitespace | File has no usable content |
10
+ | 404 | Invalid file type. Only .docx, .pdf, and .txt allowed | Unsupported file format |
11
+ | 403 | Invalid or expired token | Authentication token is invalid or expired |
12
+ | 413 | Text must contain at least two words | Text too short (alternative condition) |
13
+ | 413 | Text must be less than 10,000 characters | Text too long (alternative condition) |
14
+ | 413 | The image error (preprocessing) | Image size/content issue |
15
+ | 500 | Error processing the file | Internal server error while processing |
16
+
17
+ ---
18
+
19
+ ## 🔍 Error Details
20
+
21
+ ### `400` - Bad Request
22
+
23
+ - **Text must contain at least two words**
24
+ The input text field is too short. Submit at least two words to proceed.
25
+
26
+ - **Text should be less than 10,000 characters**
27
+ Input text exceeds the maximum allowed character limit. Consider truncating or summarizing the content.
28
+
29
+ ---
30
+
31
+ ### `404` - Not Found
32
+
33
+ - **The file is empty or only contains whitespace**
34
+ The uploaded file is invalid due to lack of meaningful content. Ensure the file has readable, non-empty text.
35
+
36
+ - **Invalid file type. Only .docx, .pdf, and .txt are allowed**
37
+ The file format is not supported. Convert the file to one of the allowed formats before uploading.
38
+
39
+ ---
40
+
41
+ ### `403` - Forbidden
42
+
43
+ - **Invalid or expired token**
44
+ Your access token is either expired or incorrect. Try logging in again or refreshing the token.
45
+
46
+ ---
47
+
48
+ ### `413` - Payload Too Large
49
+
50
+ - **Text must contain at least two words**
51
+ The text payload is too small or malformed under a large upload context. Add more content.
52
+
53
+ - **Text must be less than 10,000 characters**
54
+ The payload exceeds the allowed character limit for a single request. Break it into smaller chunks if needed.
55
+
56
+ - **The image error**
57
+ The uploaded image is too large or corrupted. Try resizing or compressing it before retrying.
58
+
59
+ ---
60
+
61
+ ### `500` - Internal Server Error
62
+
63
+ - **Error processing the file**
64
+ An unexpected server-side failure occurred during file analysis. Retry later or contact support if persistent.
65
+
66
+ ---
67
+
68
+ > 📌 **Note:** Always validate inputs, check token status, and follow file guidelines before making requests.
docs/structure.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 🏗️ Project Structure
2
+
3
+ ```bash
4
+ AI-Checker/
5
+
6
+ ├── app.py # Main FastAPI entry point
7
+ ├── config.py # Configuration settings
8
+ ├── Dockerfile # Docker build script
9
+ ├── Procfile # Deployment entry for platforms like Heroku/Railway
10
+ ├── requirements.txt # Python dependency list
11
+ ├── README.md # Main project overview 📘
12
+
13
+ ├── features/ # Core AI content detection modules
14
+ │ ├── image_classifier/ # Classifies AI vs Real images
15
+ │ │ ├── controller.py
16
+ │ │ ├── model_loader.py
17
+ │ │ └── preprocess.py
18
+ │ ├── image_edit_detector/ # Detects tampered or edited images
19
+ │ ├── nepali_text_classifier/ # Classifies Nepali text as AI or Human
20
+ │ │ ├── controller.py
21
+ │ │ ├── inferencer.py
22
+ │ │ ├── model_loader.py
23
+ │ │ └── preprocess.py
24
+ │ └── text_classifier/ # Classifies English text as AI or Human
25
+ │ ├── controller.py
26
+ │ ├── inferencer.py
27
+ │ ├── model_loader.py
28
+ │ └── preprocess.py
29
+
30
+ ├── docs/ # Internal documentation and API references
31
+ │ ├── api_endpoints.md
32
+ │ ├── deployment.md
33
+ │ ├── detector/
34
+ │ │ ├── ELA.md
35
+ │ │ ├── fft.md
36
+ │ │ ├── meta.md
37
+ │ │ └── note-for-backend.md
38
+ │ ├── features/
39
+ │ │ ├── image_classifier.md
40
+ │ │ ├── nepali_text_classifier.md
41
+ │ │ └── text_classifier.md
42
+ │ ├── functions.md
43
+ │ ├── nestjs_integration.md
44
+ │ ├── security.md
45
+ │ ├── setup.md
46
+ │ └── structure.md
47
+
48
+ ├── IMG_Models/ # Stored model weights
49
+ │ └── latest-my_cnn_model.h5
50
+
51
+ ├── notebooks/ # Experimental/debug Jupyter notebooks
52
+ ├── static/ # Static files (e.g., UI assets, test inputs)
53
+ └── test.md # Test usage notes
54
+ ```
55
+
56
+ ### 🌟 Key Files and Their Roles
57
+
58
+ - **`app.py`**: Entry point initializing FastAPI app and routes.
59
+ - **`Procfile`**: Tells Railway (or similar platforms) how to run the program.
60
+ - **`requirements.txt`**: Tracks all Python dependencies for the project.
61
+ - **`__init__.py`**: Package initializer for the root module and submodules.
62
+ - **`features/text_classifier/`**
63
+ - **`controller.py`**: Handles logic between routes and the model.
64
+ - **`inferencer.py`**: Runs inference and returns predictions as well as file system
65
+ utilities.
66
+ - **`features/NP/`**
67
+ - **`controller.py`**: Handles logic between routes and the model.
68
+ - **`inferencer.py`**: Runs inference and returns predictions as well as file system
69
+ utilities.
70
+ - **`model_loader.py`**: Loads the ML model and tokenizer.
71
+ - **`preprocess.py`**: Prepares input text for the model.
72
+ - **`routes.py`**: Defines API routes for text classification.
73
+
74
+ [🔙 Back to Main README](../README.md)
features/ai_human_image_classifier/controller.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import IO
2
+ from preprocessor import preprocessor
3
+ from inferencer import inferencer
4
+
5
+ class ClassificationController:
6
+ """
7
+ Controller to handle the image classification logic.
8
+ """
9
+ def classify_image(self, image_file: IO) -> dict:
10
+ """
11
+ Orchestrates the classification of a single image file.
12
+
13
+ Args:
14
+ image_file (IO): The image file to classify.
15
+
16
+ Returns:
17
+ dict: The classification result.
18
+ """
19
+ try:
20
+ # Step 1: Preprocess the image
21
+ image_tensor = preprocessor.process(image_file)
22
+
23
+ # Step 2: Perform inference
24
+ result = inferencer.predict(image_tensor)
25
+
26
+ return result
27
+ except ValueError as e:
28
+ # Handle specific errors like invalid images
29
+ return {"error": str(e)}
30
+ except Exception as e:
31
+ # Handle unexpected errors
32
+ print(f"An unexpected error occurred: {e}")
33
+ return {"error": "An internal error occurred during classification."}
34
+
35
+ controller = ClassificationController()
features/ai_human_image_classifier/inferencer.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from model_loader import models
4
+
5
+ class Inferencer:
6
+
7
+ def __init__(self):
8
+ self.clip_model = models.clip_model
9
+ self.svm_model = models.svm_model
10
+
11
+ @torch.no_grad()
12
+ def predict(self, image_tensor:torch.Tensor) -> dict:
13
+ """
14
+ Takes a preprocessed image tensor and returns the classification result.
15
+
16
+ Args:
17
+ image_tensor (torch.Tensor): The preprocessed image tensor.
18
+
19
+ Returns:
20
+ dict: A dictionary containing the classification label and confidence score.
21
+ """
22
+
23
+ image_features = self.clip_model.encode_image(image_tensor)
24
+ image_features_np = image_features.cpu().numpy()
25
+
26
+ prediction = self.svm_model.predict(image_features_np)[0]
27
+
28
+ if hasattr(self.svm_model, "predict_proba"):
29
+ # If yes, use predict_proba for a true confidence score
30
+ confidence_scores = self.svm_model.predict_proba(image_features_np)[0]
31
+ confidence = float(np.max(confidence_scores))
32
+ else:
33
+ # If no, use decision_function as a fallback confidence measure.
34
+ # The absolute value of the decision function score indicates confidence.
35
+ # We can apply a sigmoid function to scale it to a [0, 1] range for consistency.
36
+ decision_score = self.svm_model.decision_function(image_features_np)[0]
37
+ confidence = 1 / (1 + np.exp(-np.abs(decision_score)))
38
+ confidence = float(confidence)
39
+
40
+ label_map = {0: 'real', 1: 'fake'}
41
+ classification_label = label_map.get(prediction, "unknown")
42
+
43
+ return {
44
+ "classification": classification_label,
45
+ "confidence": confidence
46
+ }
47
+
48
+ inferencer = Inferencer()
features/ai_human_image_classifier/main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from routes import router as api_router
3
+
4
+ # Initialize the FastAPI app
5
+ app = FastAPI(
6
+ title="Real vs. Fake Image Classification API",
7
+ description="An API to classify images as real or fake using OpenAI's CLIP and an SVM model.",
8
+ version="1.0.0"
9
+ )
10
+
11
+ # Include the API router
12
+ # All routes defined in routes.py will be available under the /api prefix
13
+ app.include_router(api_router, prefix="/api", tags=["Classification"])
14
+
15
+ @app.get("/", tags=["Root"])
16
+ async def read_root():
17
+ """
18
+ A simple root endpoint to confirm the API is running.
19
+ """
20
+ return {"message": "Welcome to the Image Classification API. Go to /docs for the API documentation."}
21
+
22
+
23
+ # To run this application:
24
+ # 1. Make sure you have all dependencies from requirements.txt installed.
25
+ # 2. Make sure the 'svm_model.joblib' file is in the same directory.
26
+ # 3. Run the following command in your terminal:
27
+ # uvicorn main:app --reload
features/ai_human_image_classifier/model_loader.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import clip
2
+ import torch
3
+ import joblib
4
+ from pathlib import Path
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ class ModelLoader:
8
+ """
9
+ A class to load and hold the machine learning models.
10
+ This ensures that models are loaded only once.
11
+ """
12
+ def __init__(self, clip_model_name: str, svm_repo_id: str, svm_filename: str):
13
+ """
14
+ Initializes the ModelLoader and loads the models.
15
+
16
+ Args:
17
+ clip_model_name (str): The name of the CLIP model to load (e.g., 'ViT-L/14').
18
+ svm_repo_id (str): The repository ID on Hugging Face (e.g., 'rhnsa/ai_human_image_detector').
19
+ svm_filename (str): The name of the model file in the repository (e.g., 'model.joblib').
20
+ """
21
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ print(f"Using device: {self.device}")
23
+
24
+ self.clip_model, self.clip_preprocess = self._load_clip_model(clip_model_name)
25
+ self.svm_model = self._load_svm_model(repo_id=svm_repo_id, filename=svm_filename)
26
+ print("Models loaded successfully.")
27
+
28
+ def _load_clip_model(self, model_name: str):
29
+ """
30
+ Loads the specified CLIP model and its preprocessor.
31
+
32
+ Args:
33
+ model_name (str): The name of the CLIP model.
34
+
35
+ Returns:
36
+ A tuple containing the loaded CLIP model and its preprocess function.
37
+ """
38
+ try:
39
+ model, preprocess = clip.load(model_name, device=self.device)
40
+ return model, preprocess
41
+ except Exception as e:
42
+ print(f"Error loading CLIP model: {e}")
43
+ raise
44
+
45
+ def _load_svm_model(self, repo_id: str, filename: str):
46
+ """
47
+ Downloads and loads the SVM model from a Hugging Face Hub repository.
48
+
49
+ Args:
50
+ repo_id (str): The repository ID on Hugging Face.
51
+ filename (str): The name of the model file in the repository.
52
+
53
+ Returns:
54
+ The loaded SVM model object.
55
+ """
56
+ print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
57
+ try:
58
+ # Download the model file from the Hub. It returns the cached path.
59
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename)
60
+ print(f"SVM model downloaded to: {model_path}")
61
+
62
+ # Load the model from the downloaded path
63
+ svm_model = joblib.load(model_path)
64
+ return svm_model
65
+ except Exception as e:
66
+ print(f"Error downloading or loading SVM model from Hugging Face: {e}")
67
+ raise
68
+
69
+ # --- Global Model Instance ---
70
+ # This creates a single instance of the models that can be imported by other modules.
71
+ CLIP_MODEL_NAME = 'ViT-L/14'
72
+ SVM_REPO_ID = 'rhnsa/ai_human_image_detector'
73
+ SVM_FILENAME = 'svm_model_real.joblib' # The name of your model file in the Hugging Face repo
74
+
75
+ # This instance will be created when the application starts.
76
+ models = ModelLoader(
77
+ clip_model_name=CLIP_MODEL_NAME,
78
+ svm_repo_id=SVM_REPO_ID,
79
+ svm_filename=SVM_FILENAME
80
+ )
features/ai_human_image_classifier/preprocessor.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+ from typing import IO
4
+ from model_loader import models
5
+
6
+ class ImagePreprocessor:
7
+
8
+ def __init__(self):
9
+ self.preprocess = models.clip_preprocess
10
+ self.device = models.device
11
+
12
+ def process(self, image_file: IO) -> torch.Tensor:
13
+ """
14
+ Opens an image file, preprocesses it, and returns it as a tensor.
15
+
16
+ Args:
17
+ image_file (IO): The image file object (e.g., from a file upload).
18
+
19
+ Returns:
20
+ torch.Tensor: The preprocessed image as a tensor, ready for the model.
21
+ """
22
+ try:
23
+ # Open the image from the file-like object
24
+ image = Image.open(image_file).convert("RGB")
25
+ except Exception as e:
26
+ print(f"Error opening image: {e}")
27
+ # You might want to raise a custom exception here
28
+ raise ValueError("Invalid or corrupted image file.")
29
+
30
+ # Apply the CLIP preprocessing transformations and move to the correct device
31
+ image_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
32
+ return image_tensor
33
+
34
+ preprocessor = ImagePreprocessor()
features/ai_human_image_classifier/routes.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, File, UploadFile, HTTPException, status
2
+ from fastapi.responses import JSONResponse
3
+ from controller import controller
4
+
5
+ from fastapi import Request, Depends
6
+ from fastapi.security import HTTPBearer
7
+ from slowapi import Limiter
8
+ from slowapi.util import get_remote_address
9
+
10
+
11
+ router = APIRouter()
12
+ limiter = Limiter(key_func=get_remote_address)
13
+ security = HTTPBearer()
14
+ # Create an API router
15
+ router = APIRouter()
16
+
17
+ @router.post("/classify", summary="Classify an image as Real or Fake")
18
+ async def classify_image_endpoint(image: UploadFile = File(...)):
19
+ """
20
+ Accepts an image file and classifies it as 'real' or 'fake'.
21
+
22
+ - **image**: The image file to be classified (e.g., JPEG, PNG).
23
+
24
+ Returns a JSON object with the classification and a confidence score.
25
+ """
26
+ # Check for a valid image content type
27
+ if not image.content_type.startswith("image/"):
28
+ raise HTTPException(
29
+ status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
30
+ detail="Unsupported file type. Please upload an image (e.g., JPEG, PNG)."
31
+ )
32
+
33
+ # The controller expects a file-like object, which `image.file` provides
34
+ result = controller.classify_image(image.file)
35
+
36
+ if "error" in result:
37
+ # If the controller returned an error, forward it as an HTTP exception
38
+ raise HTTPException(
39
+ status_code=status.HTTP_400_BAD_REQUEST,
40
+ detail=result["error"]
41
+ )
42
+
43
+ return JSONResponse(content=result, status_code=status.HTTP_200_OK)
44
+
features/image_classifier/__init__.py ADDED
File without changes
features/image_classifier/controller.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import HTTPException, File, UploadFile
2
+ from .preprocess import preprocess_image
3
+ from .inferencer import classify_image
4
+
5
+
6
+ async def Classify_Image_router(file: UploadFile = File(...)):
7
+ try:
8
+ image_array = preprocess_image(file)
9
+ try:
10
+ result = classify_image(image_array)
11
+ return result
12
+ except:
13
+ raise HTTPException(status_code=423, detail="something went wrong")
14
+
15
+ except Exception as e:
16
+ raise HTTPException(status_code=413, detail=str(e))
features/image_classifier/inferencer.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from .model_loader import get_model
3
+
4
+ # Thresholds
5
+ AI_THRESHOLD = 0.55
6
+ HUMAN_THRESHOLD = 0.45
7
+
8
+
9
+ def classify_image(image_array: np.ndarray) -> dict:
10
+ try:
11
+ model = get_model()
12
+ predictions = model.predict(image_array)
13
+
14
+ if predictions.ndim != 2 or predictions.shape[1] != 1:
15
+ raise ValueError(
16
+ "Model output shape is invalid. Expected shape: (batch, 1)"
17
+ )
18
+
19
+ ai_conf = float(np.clip(predictions[0][0], 0.0, 1.0))
20
+ human_conf = 1.0 - ai_conf
21
+
22
+ # Classification logic
23
+ if ai_conf > AI_THRESHOLD:
24
+ label = "AI Generated"
25
+ elif ai_conf < HUMAN_THRESHOLD:
26
+ label = "Human Generated"
27
+ else:
28
+ label = "Uncertain (Maybe AI)"
29
+
30
+ return {
31
+ "label": label,
32
+ "ai_confidence": round(ai_conf * 100, 2),
33
+ "human_confidence": round(human_conf * 100, 2),
34
+ }
35
+
36
+ except Exception as e:
37
+ return {
38
+ "error": str(e),
39
+ "label": "Classification Failed",
40
+ "ai_confidence": None,
41
+ "human_confidence": None,
42
+ }
features/image_classifier/model_loader.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import logging
4
+ import tensorflow as tf
5
+ from tensorflow.keras.layers import Layer
6
+ from huggingface_hub import snapshot_download
7
+
8
+ # Model config
9
+ REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
10
+ MODEL_DIR = "./IMG_Models"
11
+ WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
12
+
13
+ # Device info (for logging)
14
+ gpus = tf.config.list_physical_devices("GPU")
15
+ device = "cuda" if gpus else "cpu"
16
+
17
+ # Global model reference
18
+ _model_img = None
19
+
20
+ # Custom layer used in the model
21
+ class Cast(Layer):
22
+ def call(self, inputs):
23
+ return tf.cast(inputs, tf.float32)
24
+
25
+ def warmup():
26
+ global _model_img
27
+ download_model_repo()
28
+ _model_img = load_model()
29
+ logging.info("Image model is ready.")
30
+
31
+ def download_model_repo():
32
+ if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
33
+ logging.info("Image model already exists, skipping download.")
34
+ return
35
+ snapshot_path = snapshot_download(repo_id=REPO_ID)
36
+ os.makedirs(MODEL_DIR, exist_ok=True)
37
+ shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
38
+
39
+ def load_model():
40
+ global _model_img
41
+ if _model_img is not None:
42
+ return _model_img
43
+
44
+ print(f"{'GPU detected' if device == 'cuda' else 'No GPU detected'}, loading model on {device.upper()}.")
45
+
46
+ _model_img = tf.keras.models.load_model(
47
+ WEIGHTS_PATH, custom_objects={"Cast": Cast}
48
+ )
49
+ print("Model input shape:", _model_img.input_shape)
50
+ return _model_img
51
+
52
+ def get_model():
53
+ global _model_img
54
+ if _model_img is None:
55
+ download_model_repo()
56
+ _model_img = load_model()
57
+ return _model_img
58
+
features/image_classifier/preprocess.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ from fastapi import HTTPException
4
+
5
+
6
+ def preprocess_image(file):
7
+ try:
8
+ file.file.seek(0)
9
+ image_bytes = file.file.read()
10
+ nparr = np.frombuffer(image_bytes, np.uint8)
11
+ img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
12
+ if img is None:
13
+ raise HTTPException(status_code=500, detail="Could not decode image.")
14
+
15
+ img = cv2.resize(img, (299, 299))
16
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
17
+ img = img / 255.0
18
+ img = np.expand_dims(img, axis=0).astype(np.float32)
19
+ return img
20
+
21
+ except HTTPException:
22
+ raise # Re-raise already defined HTTP errors
23
+ except Exception as e:
24
+ raise HTTPException(
25
+ status_code=500, detail=f"Image preprocessing failed: {str(e)}"
26
+ )
features/image_classifier/routes.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from slowapi import Limiter
2
+ from config import ACCESS_RATE
3
+ from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
4
+ from fastapi.security import HTTPBearer
5
+ from slowapi import Limiter
6
+ from slowapi.util import get_remote_address
7
+ from .controller import Classify_Image_router
8
+ router = APIRouter()
9
+ limiter = Limiter(key_func=get_remote_address)
10
+ security = HTTPBearer()
11
+
12
+ @router.post("/analyse")
13
+ @limiter.limit(ACCESS_RATE)
14
+ async def analyse(
15
+ request: Request,
16
+ file: UploadFile = File(...),
17
+ token: str = Depends(security)
18
+ ):
19
+ result = await Classify_Image_router(file) # await the async function
20
+ return result
21
+
22
+ @router.get("/health")
23
+ @limiter.limit(ACCESS_RATE)
24
+ def health(request: Request):
25
+ return {"status": "ok"}
26
+
features/image_edit_detector/controller.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import io
3
+ from io import BytesIO
4
+ from .detectors.fft import run_fft
5
+ from .detectors.metadata import run_metadata
6
+ from .detectors.ela import run_ela
7
+ from .preprocess import preprocess_image
8
+ from fastapi import HTTPException,status,Depends
9
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
10
+ security=HTTPBearer()
11
+ import os
12
+ async def process_image_ela(image_bytes: bytes, quality: int=90):
13
+ image = Image.open(io.BytesIO(image_bytes))
14
+
15
+ if image.mode != "RGB":
16
+ image = image.convert("RGB")
17
+
18
+ compressed_image = preprocess_image(image, quality)
19
+ ela_result = run_ela(compressed_image, quality)
20
+
21
+ return {
22
+ "is_edited": ela_result,
23
+ "ela_score": ela_result
24
+ }
25
+
26
+ async def process_fft_image(image_bytes: bytes,threshold:float=0.95) -> dict:
27
+ image = Image.open(BytesIO(image_bytes)).convert("RGB")
28
+ result = run_fft(image,threshold)
29
+ return {"edited": bool(result)}
30
+
31
+
32
+ async def process_meta_image(image_bytes: bytes) -> dict:
33
+ try:
34
+ result = run_metadata(image_bytes)
35
+ return {"source": result} # e.g. "edited", "phone_capture", "unknown"
36
+ except Exception as e:
37
+ # Handle errors gracefully, return useful message or raise HTTPException if preferred
38
+ return {"error": str(e)}
39
+
40
+
41
+ async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
42
+ token = credentials.credentials
43
+ expected_token = os.getenv("MY_SECRET_TOKEN")
44
+ if token != expected_token:
45
+ raise HTTPException(
46
+ status_code=status.HTTP_403_FORBIDDEN,
47
+ detail="Invalid or expired token"
48
+ )
49
+ return token
features/image_edit_detector/detectors/ela.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image, ImageChops, ImageEnhance
2
+ import io
3
+
4
+
5
+ def run_ela(image: Image.Image, quality: int = 90, threshold: int = 15) -> bool:
6
+ """
7
+ Perform Error Level Analysis to detect image manipulation.
8
+
9
+ Parameters:
10
+ image (PIL.Image): Input image (should be RGB).
11
+ quality (int): JPEG compression quality for ELA.
12
+ threshold (int): Maximum pixel difference threshold to classify as edited.
13
+
14
+ Returns:
15
+ bool: True if image appears edited, False otherwise.
16
+ """
17
+
18
+ # Recompress the image into JPEG format in memory
19
+ buffer = io.BytesIO()
20
+ image.save(buffer, format="JPEG", quality=quality)
21
+ buffer.seek(0)
22
+ recompressed = Image.open(buffer)
23
+
24
+ # Compute the pixel-wise difference
25
+ diff = ImageChops.difference(image, recompressed)
26
+ extrema = diff.getextrema()
27
+ max_diff = max([ex[1] for ex in extrema])
28
+
29
+ # Enhance difference image for debug (not returned)
30
+ _ = ImageEnhance.Brightness(diff).enhance(10)
31
+
32
+ return max_diff > threshold
features/image_edit_detector/detectors/fft.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+ from scipy.fft import fft2, fftshift
4
+
5
+
6
+ def run_fft(image: Image.Image, threshold: float = 0.92) -> bool:
7
+ """
8
+ Detects potential image manipulation or generation using FFT-based high-frequency analysis.
9
+
10
+ Parameters:
11
+ image (PIL.Image.Image): The input image.
12
+ threshold (float): Proportion of high-frequency components above which the image is flagged.
13
+
14
+ Returns:
15
+ bool: True if the image is likely AI-generated or manipulated, False otherwise.
16
+ """
17
+ gray_image = image.convert("L")
18
+
19
+ resized_image = gray_image.resize((512, 512))
20
+
21
+
22
+ image_array = np.array(resized_image)
23
+
24
+ fft_result = fft2(image_array)
25
+
26
+ fft_shifted = fftshift(fft_result)
27
+
28
+ magnitude_spectrum = np.abs(fft_shifted)
29
+ max_magnitude = np.max(magnitude_spectrum)
30
+ if max_magnitude == 0:
31
+ return False # Avoid division by zero if image is blank
32
+ normalized_spectrum = magnitude_spectrum / max_magnitude
33
+
34
+ high_freq_mask = normalized_spectrum > 0.5
35
+
36
+ high_freq_ratio = np.sum(high_freq_mask) / normalized_spectrum.size
37
+
38
+ is_fake = high_freq_ratio > threshold
39
+ return is_fake
40
+
features/image_edit_detector/detectors/metadata.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image, UnidentifiedImageError
2
+ import io
3
+
4
+ # Common AI metadata identifiers in image files.
5
+ AI_INDICATORS = [
6
+ b'c2pa', b'claim_generator', b'claim_generator_info',
7
+ b'created_software_agent', b'actions.v2', b'assertions',
8
+ b'urn:c2pa', b'jumd', b'jumb', b'jumdcbor', b'jumdc2ma',
9
+ b'jumdc2as', b'jumdc2cl', b'cbor', b'convertedsfwareagent',b'c2pa.version',
10
+ b'c2pa.assertions', b'c2pa.actions',
11
+ b'c2pa.thumbnail', b'c2pa.signature', b'c2pa.manifest',
12
+ b'c2pa.manifest_store', b'c2pa.ingredient', b'c2pa.parent',
13
+ b'c2pa.provenance', b'c2pa.claim', b'c2pa.hash', b'c2pa.authority',
14
+ b'jumdc2pn', b'jumdrefs', b'jumdver', b'jumdmeta',
15
+
16
+
17
+ 'midjourney'.encode('utf-8'),
18
+ 'stable-diffusion'.encode('utf-8'),
19
+ 'stable diffusion'.encode('utf-8'),
20
+ 'stable_diffusion'.encode('utf-8'),
21
+ 'artbreeder'.encode('utf-8'),
22
+ 'runwayml'.encode('utf-8'),
23
+ 'remix.ai'.encode('utf-8'),
24
+ 'firefly'.encode('utf-8'),
25
+ 'adobe_firefly'.encode('utf-8'),
26
+
27
+ # OpenAI / DALL·E indicators (all encoded to bytes)
28
+ 'openai'.encode('utf-8'),
29
+ 'dalle'.encode('utf-8'),
30
+ 'dalle2'.encode('utf-8'),
31
+ 'DALL-E'.encode('utf-8'),
32
+ 'DALL·E'.encode('utf-8'),
33
+ 'created_by: openai'.encode('utf-8'),
34
+ 'tool: dalle'.encode('utf-8'),
35
+ 'tool: dalle2'.encode('utf-8'),
36
+ 'creator: openai'.encode('utf-8'),
37
+ 'creator: dalle'.encode('utf-8'),
38
+ 'openai.com'.encode('utf-8'),
39
+ 'api.openai.com'.encode('utf-8'),
40
+ 'openai_model'.encode('utf-8'),
41
+ 'openai_gpt'.encode('utf-8'),
42
+
43
+ #Further possible AI-Generation Indicators
44
+ 'generated_by'.encode('utf-8'),
45
+ 'model_id'.encode('utf-8'),
46
+ 'model_version'.encode('utf-8'),
47
+ 'model_info'.encode('utf-8'),
48
+ 'tool_name'.encode('utf-8'),
49
+ 'tool_creator'.encode('utf-8'),
50
+ 'tool_version'.encode('utf-8'),
51
+ 'model_signature'.encode('utf-8'),
52
+ 'ai_model'.encode('utf-8'),
53
+ 'ai_tool'.encode('utf-8'),
54
+ 'generator'.encode('utf-8'),
55
+ 'generated_by_ai'.encode('utf-8'),
56
+ 'ai_generated'.encode('utf-8'),
57
+ 'ai_art'.encode('utf-8')
58
+ ]
59
+
60
+
61
+ def run_metadata(image_bytes: bytes) -> str:
62
+ try:
63
+ img = Image.open(io.BytesIO(image_bytes))
64
+ img.load()
65
+
66
+ exif = img.getexif()
67
+ software = str(exif.get(305, "")).strip()
68
+
69
+ suspicious_editors = ["Photoshop", "GIMP", "Snapseed", "Pixlr", "VSCO", "Editor", "Adobe", "Luminar"]
70
+
71
+ if any(editor.lower() in software.lower() for editor in suspicious_editors):
72
+ return "edited"
73
+
74
+ if any(indicator in image_bytes for indicator in AI_INDICATORS):
75
+ return "ai_generated"
76
+
77
+ return "undetermined"
78
+
79
+ except UnidentifiedImageError:
80
+ return "error: invalid image format"
81
+ except Exception as e:
82
+ return f"error: {str(e)}"
features/image_edit_detector/preprocess.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import io
3
+
4
+ def preprocess_image(img: Image.Image, quality: int) -> Image.Image:
5
+ buffer = io.BytesIO()
6
+ img.save(buffer, format="JPEG", quality=quality)
7
+ buffer.seek(0)
8
+ return Image.open(buffer)
9
+
features/image_edit_detector/routes.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from slowapi import Limiter
2
+ from config import ACCESS_RATE
3
+ from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
4
+ from fastapi.security import HTTPBearer
5
+ from slowapi import Limiter
6
+ from slowapi.util import get_remote_address
7
+ from io import BytesIO
8
+ from .controller import process_image_ela , verify_token,process_fft_image, process_meta_image
9
+ import requests
10
+ router = APIRouter()
11
+ limiter = Limiter(key_func=get_remote_address)
12
+ security = HTTPBearer()
13
+
14
+
15
+
16
+ @router.post("/ela")
17
+ @limiter.limit(ACCESS_RATE)
18
+ async def detect_ela(request:Request,file: UploadFile = File(...), quality: int = 90 ,token: str = Depends(verify_token)):
19
+ # Check file extension
20
+ allowed_types = ["image/jpeg", "image/png"]
21
+
22
+ if file.content_type not in allowed_types:
23
+ raise HTTPException(
24
+ status_code=400,
25
+ detail="Unsupported file type. Only JPEG and PNG images are allowed."
26
+ )
27
+
28
+ content = await file.read()
29
+ result = await process_image_ela(content, quality)
30
+ return result
31
+
32
+ @router.post("/fft")
33
+ @limiter.limit(ACCESS_RATE)
34
+ async def detect_fft(request:Request,file:UploadFile =File(...),threshold:float=0.95,token:str=Depends(verify_token)):
35
+ if file.content_type not in ["image/jpeg", "image/png"]:
36
+ raise HTTPException(status_code=400, detail="Unsupported image type.")
37
+
38
+ content = await file.read()
39
+ result = await process_fft_image(content,threshold)
40
+ return result
41
+
42
+ @router.post("/meta")
43
+ @limiter.limit(ACCESS_RATE)
44
+ async def detect_meta(request:Request,file:UploadFile=File(...),token:str=Depends(verify_token)):
45
+ if file.content_type not in ["image/jpeg", "image/png"]:
46
+ raise HTTPException(status_code=400, detail="Unsupported image type.")
47
+ content = await file.read()
48
+ result = await process_meta_image(content)
49
+ return result
50
+ @router.post("/health")
51
+ @limiter.limit(ACCESS_RATE)
52
+ def heath(request:Request):
53
+ return {"status":"ok"}
features/nepali_text_classifier/__init__.py ADDED
File without changes
features/nepali_text_classifier/controller.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from io import BytesIO
3
+ from fastapi import HTTPException, UploadFile, status, Depends
4
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
5
+ import os
6
+ from features.nepali_text_classifier.inferencer import classify_text
7
+ from features.nepali_text_classifier.preprocess import *
8
+ import re
9
+
10
+ security = HTTPBearer()
11
+
12
+ def contains_english(text: str) -> bool:
13
+ # Remove escape characters
14
+ cleaned = text.replace("\n", "").replace("\t", "")
15
+ return bool(re.search(r'[a-zA-Z]', cleaned))
16
+
17
+
18
+ async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
19
+ token = credentials.credentials
20
+ expected_token = os.getenv("MY_SECRET_TOKEN")
21
+ if token != expected_token:
22
+ raise HTTPException(
23
+ status_code=status.HTTP_403_FORBIDDEN,
24
+ detail="Invalid or expired token"
25
+ )
26
+ return token
27
+
28
+ async def nepali_text_analysis(text: str):
29
+ end_symbol_for_NP_text(text)
30
+ words = text.split()
31
+ if len(words) < 10:
32
+ raise HTTPException(status_code=400, detail="Text must contain at least 10 words")
33
+ if len(text) > 10000:
34
+ raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
35
+
36
+ result = await asyncio.to_thread(classify_text, text)
37
+
38
+ return result
39
+
40
+
41
+ #Extract text form uploaded files(.docx,.pdf,.txt)
42
+ async def extract_file_contents(file:UploadFile)-> str:
43
+ content = await file.read()
44
+ file_stream = BytesIO(content)
45
+ if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
46
+ return parse_docx(file_stream)
47
+ elif file.content_type =="application/pdf":
48
+ return parse_pdf(file_stream)
49
+ elif file.content_type =="text/plain":
50
+ return parse_txt(file_stream)
51
+ else:
52
+ raise HTTPException(status_code=415,detail="Invalid file type. Only .docx,.pdf and .txt are allowed")
53
+
54
+ async def handle_file_upload(file: UploadFile):
55
+ try:
56
+ file_contents = await extract_file_contents(file)
57
+ end_symbol_for_NP_text(file_contents)
58
+ if len(file_contents) > 10000:
59
+ raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
60
+
61
+ cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
62
+ if not cleaned_text:
63
+ raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
64
+
65
+ result = await asyncio.to_thread(classify_text, cleaned_text)
66
+ return result
67
+ except Exception as e:
68
+ logging.error(f"Error processing file: {e}")
69
+ raise HTTPException(status_code=500, detail="Error processing the file")
70
+
71
+
72
+
73
+ async def handle_sentence_level_analysis(text: str):
74
+ text = text.strip()
75
+ if len(text) > 10000:
76
+ raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
77
+
78
+ end_symbol_for_NP_text(text)
79
+
80
+ # Split text into sentences
81
+ sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]
82
+
83
+ results = []
84
+ for sentence in sentences:
85
+ end_symbol_for_NP_text(sentence)
86
+ result = await asyncio.to_thread(classify_text, sentence)
87
+ results.append({
88
+ "text": sentence,
89
+ "result": result["label"],
90
+ "likelihood": result["confidence"]
91
+ })
92
+
93
+ return {"analysis": results}
94
+
95
+
96
+ async def handle_file_sentence(file:UploadFile):
97
+ try:
98
+ file_contents = await extract_file_contents(file)
99
+ if len(file_contents) > 10000:
100
+ raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
101
+
102
+ cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
103
+ if not cleaned_text:
104
+ raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
105
+ # Ensure text ends with danda so last sentence is included
106
+
107
+ # Split text into sentences
108
+ sentences = [s.strip() + "।" for s in cleaned_text.split("।") if s.strip()]
109
+
110
+ results = []
111
+ for sentence in sentences:
112
+ end_symbol_for_NP_text(sentence)
113
+
114
+ result = await asyncio.to_thread(classify_text, sentence)
115
+ results.append({
116
+ "text": sentence,
117
+ "result": result["label"],
118
+ "likelihood": result["confidence"]
119
+ })
120
+
121
+ return {"analysis": results}
122
+
123
+ except Exception as e:
124
+ logging.error(f"Error processing file: {e}")
125
+ raise HTTPException(status_code=500, detail="Error processing the file")
126
+
127
+
128
+ def classify(text: str):
129
+ return classify_text(text)
130
+
features/nepali_text_classifier/inferencer.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from .model_loader import get_model_tokenizer
3
+ import torch.nn.functional as F
4
+
5
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
+
7
+
8
+ def classify_text(text: str):
9
+ model, tokenizer = get_model_tokenizer()
10
+ inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
11
+ inputs = {k: v.to(device) for k, v in inputs.items()}
12
+
13
+ with torch.no_grad():
14
+ outputs = model(**inputs)
15
+ logits = outputs if isinstance(outputs, torch.Tensor) else outputs.logits
16
+ probs = F.softmax(logits, dim=1)
17
+ pred = torch.argmax(probs, dim=1).item()
18
+ prob_percent = probs[0][pred].item() * 100
19
+
20
+ return {"label": "Human" if pred == 0 else "AI", "confidence": round(prob_percent, 2)}
21
+
22
+
23
+
features/nepali_text_classifier/model_loader.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ import logging
7
+ from huggingface_hub import snapshot_download
8
+ from transformers import AutoTokenizer, AutoModel
9
+
10
+ # Configs
11
+ REPO_ID = "can-org/Nepali-AI-VS-HUMAN"
12
+ BASE_DIR = "./np_text_model"
13
+ TOKENIZER_DIR = os.path.join(BASE_DIR, "classifier") # <- update this to match your uploaded folder
14
+ WEIGHTS_PATH = os.path.join(BASE_DIR, "model_95_acc.pth") # <- change to match actual uploaded weight
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+
17
+ # Define model class
18
+ class XLMRClassifier(nn.Module):
19
+ def __init__(self):
20
+ super(XLMRClassifier, self).__init__()
21
+ self.bert = AutoModel.from_pretrained("xlm-roberta-base")
22
+ self.classifier = nn.Linear(self.bert.config.hidden_size, 2)
23
+
24
+ def forward(self, input_ids, attention_mask):
25
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
26
+ cls_output = outputs.last_hidden_state[:, 0, :]
27
+ return self.classifier(cls_output)
28
+
29
+ # Globals for caching
30
+ _model = None
31
+ _tokenizer = None
32
+
33
+ def download_model_repo():
34
+ if os.path.exists(BASE_DIR) and os.path.isdir(BASE_DIR):
35
+ logging.info("Model already downloaded.")
36
+ return
37
+ snapshot_path = snapshot_download(repo_id=REPO_ID)
38
+ os.makedirs(BASE_DIR, exist_ok=True)
39
+ shutil.copytree(snapshot_path, BASE_DIR, dirs_exist_ok=True)
40
+
41
+ def load_model():
42
+ download_model_repo()
43
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
44
+ model = XLMRClassifier().to(device)
45
+ model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
46
+ model.eval()
47
+ return model, tokenizer
48
+
49
+ def get_model_tokenizer():
50
+ global _model, _tokenizer
51
+ if _model is None or _tokenizer is None:
52
+ _model, _tokenizer = load_model()
53
+ return _model, _tokenizer
54
+
features/nepali_text_classifier/preprocess.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import fitz # PyMuPDF
2
+ import docx
3
+ from io import BytesIO
4
+ import logging
5
+ from fastapi import HTTPException
6
+ from pypdf import PdfReader
7
+
8
+ def parse_docx(file: BytesIO):
9
+ doc = docx.Document(file)
10
+ text = ""
11
+ for para in doc.paragraphs:
12
+ text += para.text + "\n"
13
+ return text
14
+
15
+
16
+ def parse_pdf(file: BytesIO):
17
+ try:
18
+ doc = PdfReader(file)
19
+ text = ""
20
+ for page in doc.pages:
21
+ text += page.extract_text()
22
+ return text
23
+ except Exception as e:
24
+ logging.error(f"Error while processing PDF: {str(e)}")
25
+ raise HTTPException(
26
+ status_code=500, detail="Error processing PDF file")
27
+
28
+ def parse_txt(file: BytesIO):
29
+ return file.read().decode("utf-8")
30
+
31
+ def end_symbol_for_NP_text(text: str) -> str:
32
+ text = text.strip()
33
+ if not text.endswith("।"):
34
+ text += "।"
35
+ return text
features/nepali_text_classifier/routes.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from slowapi import Limiter
2
+ from config import ACCESS_RATE
3
+ from .controller import handle_file_sentence, handle_sentence_level_analysis, nepali_text_analysis
4
+ from .inferencer import classify_text
5
+ from fastapi import APIRouter, File, Request, Depends, HTTPException, UploadFile
6
+ from fastapi.security import HTTPBearer
7
+ from slowapi import Limiter
8
+ from slowapi.util import get_remote_address
9
+ from pydantic import BaseModel
10
+ from .controller import handle_file_upload
11
+ router = APIRouter()
12
+ limiter = Limiter(key_func=get_remote_address)
13
+ security = HTTPBearer()
14
+
15
+ # Input schema
16
+ class TextInput(BaseModel):
17
+ text: str
18
+
19
+ @router.post("/analyse")
20
+ @limiter.limit(ACCESS_RATE)
21
+ async def analyse(request: Request, data: TextInput, token: str = Depends(security)):
22
+ result = classify_text(data.text)
23
+ return result
24
+
25
+ @router.post("/upload")
26
+ @limiter.limit(ACCESS_RATE)
27
+ async def upload_file(request:Request,file:UploadFile=File(...),token:str=Depends(security)):
28
+ return await handle_file_upload(file)
29
+
30
+ @router.post("/analyse-sentences")
31
+ @limiter.limit(ACCESS_RATE)
32
+ async def upload_file(request:Request,data:TextInput,token:str=Depends(security)):
33
+ return await handle_sentence_level_analysis(data.text)
34
+
35
+ @router.post("/file-sentences-analyse")
36
+ @limiter.limit(ACCESS_RATE)
37
+ async def analyze_sentance_file(request: Request, file: UploadFile = File(...), token: str = Depends(security)):
38
+ return await handle_file_sentence(file)
39
+
40
+
41
+ @router.get("/health")
42
+ @limiter.limit(ACCESS_RATE)
43
+ def health(request: Request):
44
+ return {"status": "ok"}
45
+
features/rag_chatbot/__init__.py ADDED
File without changes