Pujan Neupane commited on
Commit
7098c2e
·
1 Parent(s): 869a4ba

Refact: ADDED DOCKER FILE and updated requirement.txt and app.py for hugging face

Browse files
Files changed (6) hide show
  1. Dockerfile +34 -0
  2. HuggingFace/main.py +0 -18
  3. HuggingFace/readme.md +0 -61
  4. app.py +35 -51
  5. requirements.txt +6 -209
  6. test.sh +0 -1
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the latest slim Python 3.11 image
2
+ FROM python:3.11-slim
3
+
4
+ # Set environment variables
5
+ ENV HOME=/home/user \
6
+ PATH=/home/user/.local/bin:$PATH \
7
+ PYTHONDONTWRITEBYTECODE=1 \
8
+ PYTHONUNBUFFERED=1
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ git \
14
+ curl \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ # Create a non-root user for safety
18
+ RUN useradd -ms /bin/bash user
19
+ USER user
20
+ WORKDIR $HOME/app
21
+
22
+ # Copy app source code
23
+ COPY --chown=user . .
24
+
25
+ # Install Python dependencies
26
+ RUN pip install --no-cache-dir --upgrade pip \
27
+ && pip install --no-cache-dir -r requirements.txt
28
+
29
+ # Expose port
30
+ EXPOSE 7860
31
+
32
+ # Start the FastAPI app using uvicorn
33
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
34
+
HuggingFace/main.py DELETED
@@ -1,18 +0,0 @@
1
- import os
2
- from huggingface_hub import Repository
3
-
4
-
5
- def download_repo():
6
- hf_token = os.getenv("HF_TOKEN")
7
- if not hf_token:
8
- raise ValueError("HF_TOKEN not found in environment variables.")
9
-
10
- repo_id = "Pujan-Dev/test"
11
- local_dir = "../Ai-Text-Detector/"
12
-
13
- repo = Repository(local_dir, clone_from=repo_id, token=hf_token)
14
- print(f"Repository downloaded to: {local_dir}")
15
-
16
-
17
- if __name__ == "__main__":
18
- download_repo()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
HuggingFace/readme.md DELETED
@@ -1,61 +0,0 @@
1
- ### Hugging Face CLI Tool
2
-
3
- This CLI tool allows you to **upload** and **download** models from Hugging Face repositories. It requires an **Hugging Face Access Token (`HF_TOKEN`)** for authentication, especially for private repositories.
4
-
5
- ### Prerequisites
6
-
7
- 1. **Install Hugging Face Hub**:
8
-
9
- ```bash
10
- pip install huggingface_hub
11
- ```
12
-
13
- 2. **Get HF_TOKEN**:
14
- - Log in to [Hugging Face](https://huggingface.co/).
15
- - Go to **Settings** → **Access Tokens** → **Create a new token** with `read` and `write` permissions.
16
- - Save the token.
17
-
18
- ### Usage
19
-
20
- 1. **Set the Token**:
21
-
22
- - **Linux/macOS**:
23
- ```bash
24
- export HF_TOKEN=your_token_here
25
- ```
26
- - **Windows (CMD)**:
27
- ```bash
28
- set HF_TOKEN=your_token_here
29
- ```
30
-
31
- 2. **Download Model**:
32
-
33
- ```bash
34
- python main.py --download --repo-id <repo_name> --save-dir <local_save_path>
35
- ```
36
-
37
- 3. **Upload Model**:
38
- ```bash
39
- python main.py --upload --repo-id <repo_name> --model-path <local_model_path>
40
- ```
41
-
42
- ### Example
43
-
44
- To download a model:
45
-
46
- ```bash
47
- python main.py
48
- ```
49
-
50
- ### Authentication
51
-
52
- Ensure you set `HF_TOKEN` to access private repositories. If not set, the script will raise an error.
53
- Here’s a clearer and more polished version of that note:
54
-
55
- ---
56
-
57
- ### ⚠️ Note
58
-
59
- **Make sure to run this script from the `HuggingFace` directory to ensure correct path resolution and functionality.**
60
-
61
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,44 +1,40 @@
1
  import torch
2
- from transformers import GPT2LMHeadModel, GPT2TokenizerFast
3
- from fastapi import FastAPI, HTTPException, Header
4
  from pydantic import BaseModel
5
- import asyncio
6
- from concurrent.futures import ThreadPoolExecutor
7
  from contextlib import asynccontextmanager
8
- from dotenv import dotenv_values
9
 
10
- # FastAPI instance
11
  app = FastAPI()
12
- executor = ThreadPoolExecutor(max_workers=20)
13
-
14
- # Load .env file
15
- env = dotenv_values(".env")
16
- EXPECTED_TOKEN = env.get("SECRET_TOKEN")
17
 
18
- # Global variables for model and tokenizer
19
  model, tokenizer = None, None
20
 
21
- # Function to verify token
22
-
23
-
24
- def verify_token(auth: str):
25
- if auth != f"Bearer {EXPECTED_TOKEN}":
26
- raise HTTPException(status_code=403, detail="Unauthorized")
27
-
28
-
29
  # Function to load model and tokenizer
30
 
31
 
32
  def load_model():
33
  model_path = "./Ai-Text-Detector/model"
34
  weights_path = "./Ai-Text-Detector/model_weights.pth"
35
- tokenizer = GPT2TokenizerFast.from_pretrained(model_path)
36
- model = GPT2LMHeadModel.from_pretrained("gpt2")
37
- model.load_state_dict(torch.load(weights_path, map_location=torch.device("cpu")))
38
- model.eval() # Set the model to evaluation mode
 
 
 
 
 
 
 
 
39
  return model, tokenizer
40
 
41
 
 
 
 
42
  @asynccontextmanager
43
  async def lifespan(app: FastAPI):
44
  global model, tokenizer
@@ -46,20 +42,20 @@ async def lifespan(app: FastAPI):
46
  yield
47
 
48
 
49
- # Attach the lifespan context manager
50
  app = FastAPI(lifespan=lifespan)
51
 
52
- # Request body for input data
53
 
54
 
55
  class TextInput(BaseModel):
56
  text: str
57
 
58
 
59
- # Sync function to classify text
60
 
61
 
62
- def classify_text_sync(sentence: str):
63
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
64
  input_ids = inputs["input_ids"]
65
  attention_mask = inputs["attention_mask"]
@@ -70,35 +66,26 @@ def classify_text_sync(sentence: str):
70
  perplexity = torch.exp(loss).item()
71
 
72
  if perplexity < 60:
73
- result = "AI-generated*"
74
  elif perplexity < 80:
75
- result = "Probably AI-generated*"
76
  else:
77
- result = "Human-written*"
78
 
79
  return result, perplexity
80
 
81
 
82
- # Async wrapper for text classification
83
-
84
-
85
- async def classify_text(sentence: str):
86
- loop = asyncio.get_event_loop()
87
- return await loop.run_in_executor(executor, classify_text_sync, sentence)
88
-
89
-
90
  # POST route to analyze text
91
 
92
 
93
  @app.post("/analyze")
94
- async def analyze_text(data: TextInput, authorization: str = Header(default="")):
95
- verify_token(authorization) # Token verification
96
  user_input = data.text.strip()
97
-
98
  if not user_input:
99
  raise HTTPException(status_code=400, detail="Text cannot be empty")
100
 
101
- result, perplexity = await classify_text(user_input)
 
102
 
103
  return {
104
  "result": result,
@@ -119,11 +106,8 @@ async def health_check():
119
 
120
  @app.get("/")
121
  def index():
122
- return {"message": "It's an API"}
123
-
124
-
125
- # Start the app (run with uvicorn)
126
- if __name__ == "__main__":
127
- import uvicorn
128
-
129
- uvicorn.run("main:app", host="0.0.0.0", port=8000, workers=4)
 
1
  import torch
2
+ from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
3
+ from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
 
 
5
  from contextlib import asynccontextmanager
6
+ import asyncio
7
 
8
+ # FastAPI app instance
9
  app = FastAPI()
 
 
 
 
 
10
 
11
+ # Global model and tokenizer variables
12
  model, tokenizer = None, None
13
 
 
 
 
 
 
 
 
 
14
  # Function to load model and tokenizer
15
 
16
 
17
  def load_model():
18
  model_path = "./Ai-Text-Detector/model"
19
  weights_path = "./Ai-Text-Detector/model_weights.pth"
20
+
21
+ try:
22
+ tokenizer = GPT2TokenizerFast.from_pretrained(model_path)
23
+ config = GPT2Config.from_pretrained(model_path)
24
+ model = GPT2LMHeadModel(config)
25
+ model.load_state_dict(
26
+ torch.load(weights_path, map_location=torch.device("cpu"))
27
+ )
28
+ model.eval() # Set model to evaluation mode
29
+ except Exception as e:
30
+ raise RuntimeError(f"Error loading model: {str(e)}")
31
+
32
  return model, tokenizer
33
 
34
 
35
+ # Load model on app startup
36
+
37
+
38
  @asynccontextmanager
39
  async def lifespan(app: FastAPI):
40
  global model, tokenizer
 
42
  yield
43
 
44
 
45
+ # Attach startup loader
46
  app = FastAPI(lifespan=lifespan)
47
 
48
+ # Input schema
49
 
50
 
51
  class TextInput(BaseModel):
52
  text: str
53
 
54
 
55
+ # Sync text classification
56
 
57
 
58
+ def classify_text(sentence: str):
59
  inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
60
  input_ids = inputs["input_ids"]
61
  attention_mask = inputs["attention_mask"]
 
66
  perplexity = torch.exp(loss).item()
67
 
68
  if perplexity < 60:
69
+ result = "AI-generated"
70
  elif perplexity < 80:
71
+ result = "Probably AI-generated"
72
  else:
73
+ result = "Human-written"
74
 
75
  return result, perplexity
76
 
77
 
 
 
 
 
 
 
 
 
78
  # POST route to analyze text
79
 
80
 
81
  @app.post("/analyze")
82
+ async def analyze_text(data: TextInput):
 
83
  user_input = data.text.strip()
 
84
  if not user_input:
85
  raise HTTPException(status_code=400, detail="Text cannot be empty")
86
 
87
+ # Run classification asynchronously to prevent blocking
88
+ result, perplexity = await asyncio.to_thread(classify_text, user_input)
89
 
90
  return {
91
  "result": result,
 
106
 
107
  @app.get("/")
108
  def index():
109
+ return {
110
+ "message": "FastAPI API is up.",
111
+ "try": "/docs to test the API.",
112
+ "status": "OK",
113
+ }
 
 
 
requirements.txt CHANGED
@@ -1,210 +1,7 @@
1
- absl-py==2.2.2
2
- accelerate==1.6.0
3
- aiohappyeyeballs==2.6.1
4
- aiohttp==3.11.16
5
- aiosignal==1.3.2
6
- altair==5.5.0
7
- annotated-types==0.7.0
8
- anyio==4.9.0
9
- argon2-cffi==23.1.0
10
- argon2-cffi-bindings==21.2.0
11
- arrow==1.3.0
12
- asgiref==3.8.1
13
- asttokens==3.0.0
14
- async-lru==2.0.5
15
- attrs==25.3.0
16
- babel==2.17.0
17
- beautifulsoup4==4.13.4
18
- bleach==6.2.0
19
- blinker==1.9.0
20
- cachetools==5.5.2
21
- certifi==2025.1.31
22
- cffi==1.17.1
23
- charset-normalizer==2.1.1
24
- click==8.1.8
25
- comm==0.2.2
26
- contourpy==1.3.1
27
- cycler==0.12.1
28
- datasets==3.5.0
29
- DateTime==4.7
30
- debugpy==1.8.13
31
- decorator==5.2.1
32
- defusedxml==0.7.1
33
- dill==0.3.8
34
- Django==5.2
35
- dotenv==0.9.9
36
- executing==2.2.0
37
- fastapi==0.115.12
38
- fastjsonschema==2.21.1
39
- filelock==3.13.1
40
- Flask==3.1.0
41
- flask-cors==5.0.1
42
- fonttools==4.56.0
43
- fqdn==1.5.1
44
- frozenlist==1.6.0
45
- fsspec==2024.6.1
46
- generativeai==0.0.1
47
- gitdb==4.0.12
48
- GitPython==3.1.44
49
- google-ai-generativelanguage==0.6.15
50
- google-api-core==2.24.2
51
- google-api-python-client==2.165.0
52
- google-auth==2.38.0
53
- google-auth-httplib2==0.2.0
54
- google-genai==1.7.0
55
- google-generativeai==0.8.4
56
- googleapis-common-protos==1.69.2
57
- grpcio==1.71.0
58
- grpcio-status==1.71.0
59
- h11==0.14.0
60
- h5py==3.13.0
61
- html5lib==1.1
62
- httpcore==1.0.7
63
- httplib2==0.22.0
64
- httpx==0.28.1
65
- huggingface-hub==0.30.2
66
- idna==3.10
67
- inquirerpy==0.3.4
68
- ipykernel==6.29.5
69
- ipython==9.0.2
70
- ipython_pygments_lexers==1.1.1
71
- isoduration==20.11.0
72
- itsdangerous==2.2.0
73
- jedi==0.19.2
74
- Jinja2==3.1.4
75
- joblib==1.4.2
76
- json5==0.12.0
77
- jsonpointer==3.0.0
78
- jsonschema==4.23.0
79
- jsonschema-specifications==2024.10.1
80
- jupyter-events==0.12.0
81
- jupyter-lsp==2.2.5
82
- jupyter_client==8.6.3
83
- jupyter_core==5.7.2
84
- jupyter_server==2.15.0
85
- jupyter_server_terminals==0.5.3
86
- jupyterlab==4.4.0
87
- jupyterlab_pygments==0.3.0
88
- jupyterlab_server==2.27.3
89
- keras==3.9.2
90
- kiwisolver==1.4.8
91
- markdown-it-py==3.0.0
92
- MarkupSafe==3.0.2
93
- matplotlib==3.10.1
94
- matplotlib-inline==0.1.7
95
- mdurl==0.1.2
96
- mechanize==0.4.10
97
- mistune==3.1.3
98
- ml_dtypes==0.5.1
99
- mpmath==1.3.0
100
- multidict==6.4.3
101
- multiprocess==0.70.16
102
- namex==0.0.8
103
- narwhals==1.35.0
104
- nbclient==0.10.2
105
- nbconvert==7.16.6
106
- nbformat==5.10.4
107
- nest-asyncio==1.6.0
108
- networkx==3.3
109
- notebook==7.4.0
110
- notebook_shim==0.2.4
111
- numpy==2.2.4
112
- nvidia-cublas-cu11==11.11.3.6
113
- nvidia-cuda-cupti-cu11==11.8.87
114
- nvidia-cuda-nvrtc-cu11==11.8.89
115
- nvidia-cuda-runtime-cu11==11.8.89
116
- nvidia-cudnn-cu11==9.1.0.70
117
- nvidia-cufft-cu11==10.9.0.58
118
- nvidia-curand-cu11==10.3.0.86
119
- nvidia-cusolver-cu11==11.4.1.48
120
- nvidia-cusparse-cu11==11.7.5.86
121
- nvidia-nccl-cu11==2.21.5
122
- nvidia-nvtx-cu11==11.8.86
123
- optree==0.15.0
124
- overrides==7.7.0
125
- packaging==24.2
126
- pandas==2.2.3
127
- pandocfilters==1.5.1
128
- parso==0.8.4
129
- pexpect==4.9.0
130
- pfzy==0.3.4
131
- pillow==11.1.0
132
- platformdirs==4.3.7
133
- prometheus_client==0.21.1
134
- prompt_toolkit==3.0.50
135
- propcache==0.3.1
136
- proto-plus==1.26.1
137
- protobuf==5.29.4
138
- psutil==7.0.0
139
- ptyprocess==0.7.0
140
- pure_eval==0.2.3
141
- pyarrow==19.0.1
142
- pyasn1==0.6.1
143
- pyasn1_modules==0.4.1
144
- pycparser==2.22
145
- pydantic==2.10.6
146
- pydantic_core==2.27.2
147
- pydeck==0.9.1
148
- pygame==2.6.1
149
- Pygments==2.19.1
150
- pyparsing==3.2.2
151
- pystyle==2.0
152
- python-dateutil==2.9.0.post0
153
- python-dotenv==1.1.0
154
- python-json-logger==3.3.0
155
- pytz==2025.1
156
- PyYAML==6.0.2
157
- pyzmq==26.3.0
158
- referencing==0.36.2
159
- regex==2024.11.6
160
- requests==2.32.3
161
- rfc3339-validator==0.1.4
162
- rfc3986-validator==0.1.1
163
- rich==14.0.0
164
- rpds-py==0.24.0
165
- rsa==4.9
166
- safetensors==0.5.3
167
- scikit-learn==1.6.1
168
- scipy==1.15.2
169
- seaborn==0.13.2
170
- Send2Trash==1.8.3
171
- setuptools==70.2.0
172
- six==1.17.0
173
- smmap==5.0.2
174
- sniffio==1.3.1
175
- soupsieve==2.6
176
- sqlparse==0.5.3
177
- stack-data==0.6.3
178
- starlette==0.46.2
179
- streamlit==1.44.1
180
- sympy==1.13.1
181
- tenacity==9.1.2
182
- terminado==0.18.1
183
- threadpoolctl==3.6.0
184
- tinycss2==1.4.0
185
- tokenizers==0.21.1
186
- toml==0.10.2
187
- torch==2.6.0+cu118
188
- torchaudio==2.6.0+cu118
189
- torchvision==0.21.0+cu118
190
- tornado==6.4.2
191
- tqdm==4.67.1
192
- traitlets==5.14.3
193
  transformers==4.51.3
194
- triton==3.2.0
195
- types-python-dateutil==2.9.0.20241206
196
- typing_extensions==4.12.2
197
- tzdata==2025.2
198
- uri-template==1.3.0
199
- uritemplate==4.1.1
200
- urllib3==1.26.20
201
- watchdog==6.0.0
202
- wcwidth==0.2.13
203
- webcolors==24.11.1
204
- webencodings==0.5.1
205
- websocket-client==1.8.0
206
- websockets==15.0.1
207
- Werkzeug==3.1.3
208
- xxhash==3.5.0
209
- yarl==1.20.0
210
- zope.interface==7.2
 
1
+ torch==2.6.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  transformers==4.51.3
3
+ fastapi==0.103.0
4
+ pydantic==1.10.12
5
+ asyncio==3.4.3
6
+ uvicorn[standard]==0.21.1
7
+
 
 
 
 
 
 
 
 
 
 
 
 
test.sh DELETED
@@ -1 +0,0 @@
1
- echo "ok"