Alleinzellgaenger commited on
Commit
fe79d9c
Β·
1 Parent(s): 35a6c56

First setup

Browse files
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .env
6
+ .git
7
+ .gitignore
8
+ archive/*
9
+ archive/
Dockerfile CHANGED
@@ -1,36 +1,23 @@
 
1
  FROM python:3.9-slim
2
 
3
- # install node.js for frontend
4
- RUN apt-get update && apt-get install -y curl
5
- RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash -
6
- RUN apt-get install -y nodejs
7
-
8
- # don't create .pyc files on the import of source files
9
  ENV PYTHONDONTWRITEBYTECODE=1
10
- # see input/output in real time in terminal
11
  ENV PYTHONUNBUFFERED=1
12
 
13
- RUN useradd -m -u 1000 user
14
- USER user
15
- ENV PATH="/home/user/.local/bin:$PATH"
16
-
17
- #set working directory (where COPY– RUN– EXPOSE– is run from)
18
  WORKDIR /app
19
 
20
- # install requirements.txt (generated that using pipreqs) using python -m syntax, because I wanna install the packages into the current working python version
21
- COPY --chown=user:users ./requirements.txt requirements.txt
22
- RUN pip install --no-cache-dir --upgrade pip
23
- RUN pip install --no-cache-dir -r requirements.txt
24
- COPY --chown=user:users . /app
25
-
26
- COPY frontend/package.json frontend/package-lock.json ./frontend/
27
- RUN cd frontend && npm install
28
- COPY frontend/ ./frontend/
29
- RUN cd frontend && npm run build
30
-
31
 
32
- EXPOSE 7860
33
- RUN pip install uvicorn aiofiles
34
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
35
 
 
 
36
 
 
 
 
1
+ # Use an official Python runtime as a base image
2
  FROM python:3.9-slim
3
 
4
+ # Environment variables to prevent Python from writing pyc files and buffering stdout/stderr
 
 
 
 
 
5
  ENV PYTHONDONTWRITEBYTECODE=1
 
6
  ENV PYTHONUNBUFFERED=1
7
 
8
+ # Set working directory inside the container
 
 
 
 
9
  WORKDIR /app
10
 
11
+ # Copy and install the backend requirements
12
+ COPY backend/requirements.txt /app/backend/requirements.txt
13
+ RUN pip install --upgrade pip
14
+ RUN pip install -r /app/backend/requirements.txt
 
 
 
 
 
 
 
15
 
16
+ # Copy the entire project into the container
17
+ COPY . /app
 
18
 
19
+ # Expose port 8000 (the port our app will run on)
20
+ EXPOSE 8000
21
 
22
+ # Command to run the FastAPI app using Uvicorn
23
+ CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "8000"]
.gitattributes β†’ archive/.gitattributes RENAMED
File without changes
archive/Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # install node.js for frontend
4
+ RUN apt-get update && apt-get install -y curl
5
+ RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash -
6
+ RUN apt-get install -y nodejs
7
+
8
+ # don't create .pyc files on the import of source files
9
+ ENV PYTHONDONTWRITEBYTECODE=1
10
+ # see input/output in real time in terminal
11
+ ENV PYTHONUNBUFFERED=1
12
+
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV PATH="/home/user/.local/bin:$PATH"
16
+
17
+ #set working directory (where COPY– RUN– EXPOSE– is run from)
18
+ WORKDIR /app
19
+
20
+ # install requirements.txt (generated that using pipreqs) using python -m syntax, because I wanna install the packages into the current working python version
21
+ COPY --chown=user:users ./requirements.txt requirements.txt
22
+ RUN pip install --no-cache-dir --upgrade pip
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+ COPY --chown=user:users . /app
25
+
26
+ COPY frontend/package.json frontend/package-lock.json ./frontend/
27
+ RUN cd frontend && npm install
28
+ COPY frontend/ ./frontend/
29
+ RUN cd frontend && npm run build
30
+
31
+
32
+ EXPOSE 7860
33
+ RUN pip install uvicorn aiofiles
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
35
+
36
+
README.md β†’ archive/README.md RENAMED
File without changes
{frontend β†’ archive/frontend}/.gitignore RENAMED
File without changes
{frontend β†’ archive/frontend}/README.md RENAMED
File without changes
{frontend β†’ archive/frontend}/eslint.config.mjs RENAMED
File without changes
{frontend β†’ archive/frontend}/next.config.ts RENAMED
File without changes
{frontend β†’ archive/frontend}/package-lock.json RENAMED
File without changes
{frontend β†’ archive/frontend}/package.json RENAMED
File without changes
{frontend β†’ archive/frontend}/postcss.config.mjs RENAMED
File without changes
{frontend β†’ archive/frontend}/public/file.svg RENAMED
File without changes
{frontend β†’ archive/frontend}/public/globe.svg RENAMED
File without changes
{frontend β†’ archive/frontend}/public/next.svg RENAMED
File without changes
{frontend β†’ archive/frontend}/public/vercel.svg RENAMED
File without changes
{frontend β†’ archive/frontend}/public/window.svg RENAMED
File without changes
{frontend β†’ archive/frontend}/src/app/favicon.ico RENAMED
File without changes
{frontend β†’ archive/frontend}/src/app/globals.css RENAMED
File without changes
{frontend β†’ archive/frontend}/src/app/layout.tsx RENAMED
File without changes
{frontend β†’ archive/frontend}/src/app/page.tsx RENAMED
File without changes
{frontend β†’ archive/frontend}/tailwind.config.ts RENAMED
File without changes
{frontend β†’ archive/frontend}/tsconfig.json RENAMED
File without changes
app.py β†’ backend/app.py RENAMED
@@ -1,49 +1,64 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from transformers import GPT2Tokenizer, GPT2Model, AutoModel
 
4
  import torch as t
5
  import logging
 
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
9
  app = FastAPI()
10
 
 
11
  app.add_middleware(
12
  CORSMiddleware,
13
- allow_origins=["*"], # -> replace with our frontend URL, I think. So Vercel or something
14
- allow_methods=["POST"], # -> a POST API requests sends information to the server, i.e. login-credentials
15
  allow_headers=["*"],
16
  )
17
 
18
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # -> get tokenizer which transforms text sequence in tokens
 
 
 
 
 
 
19
  try:
20
  model = GPT2Model.from_pretrained('gpt2', output_attentions=True)
21
  except Exception as e:
22
  logger.error(f"Model loading failed: {e}")
23
  raise
24
 
25
- @app.post("/process") # test with uvicorn main:app --reload :) then open http://localhost:8000/docs
26
- async def process_text(text: str): # define process operation, i.e. what happens when a POST request has been sent
 
 
 
 
 
 
27
  try:
28
  logger.info(f"Received text: {text}")
29
- # Tokenize input
30
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
31
 
32
- # Run model
33
  with t.no_grad():
34
- outputs = model(**inputs) # ** notation is the unpack operator on dictionaries (confirm that inputs is a dictionary)
35
- attentions = outputs.attentions # Tuple of attention tensors (layers x heads)
36
 
37
  decimals = 2
38
- factor = 10**decimals
39
  attn_series = t.round(t.tensor([
40
  layer_attention.tolist() for layer_attention in attentions
41
- ], dtype=t.double).squeeze(), decimals=2).detach().cpu().tolist()
42
 
43
  return {
44
- "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]), # convert back to tokens
45
  "attention": attn_series
46
  }
47
- except Exception as e: # if post request failed
48
  logger.error(f"Error processing text: {e}")
49
- raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ from transformers import GPT2Tokenizer, GPT2Model
5
  import torch as t
6
  import logging
7
+
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
  app = FastAPI()
12
 
13
+ # Configure CORS: In production, you might restrict allowed origins
14
  app.add_middleware(
15
  CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_methods=["*"],
18
  allow_headers=["*"],
19
  )
20
 
21
+ # Mount static files (frontend) so that visiting "/" serves index.html
22
+ # Note: The directory path "../frontend" works because when running in Docker,
23
+ # our working directory is set to /app, and the frontend folder is at /app/frontend.
24
+ app.mount("/", StaticFiles(directory="../frontend", html=True), name="static")
25
+
26
+ # Load tokenizer and GPT2 model
27
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
28
  try:
29
  model = GPT2Model.from_pretrained('gpt2', output_attentions=True)
30
  except Exception as e:
31
  logger.error(f"Model loading failed: {e}")
32
  raise
33
 
34
+ @app.post("/process")
35
+ async def process_text(text: str):
36
+ """
37
+ Process the input text:
38
+ - Tokenizes the text
39
+ - Runs the GPT2 model to obtain attentions
40
+ - Returns the tokens and attention values (rounded to 2 decimals)
41
+ """
42
  try:
43
  logger.info(f"Received text: {text}")
44
+ # Tokenize input text (truncating if needed)
45
  inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
46
 
47
+ # Run the model without gradient computation (inference mode)
48
  with t.no_grad():
49
+ outputs = model(**inputs)
50
+ attentions = outputs.attentions # Tuple of attention tensors for each layer
51
 
52
  decimals = 2
53
+ # Convert attention tensors to lists with rounded decimals
54
  attn_series = t.round(t.tensor([
55
  layer_attention.tolist() for layer_attention in attentions
56
+ ], dtype=t.double).squeeze(), decimals=decimals).detach().cpu().tolist()
57
 
58
  return {
59
+ "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
60
  "attention": attn_series
61
  }
62
+ except Exception as e:
63
  logger.error(f"Error processing text: {e}")
64
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt β†’ backend/requirements.txt RENAMED
File without changes
frontend/index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>GPT2 WebApp</title>
6
+ <link rel="stylesheet" href="styles.css">
7
+ </head>
8
+ <body>
9
+ <h1>GPT2 WebApp</h1>
10
+ <form id="textForm">
11
+ <textarea id="inputText" rows="4" cols="50" placeholder="Enter text here..."></textarea><br>
12
+ <button type="submit">Process</button>
13
+ </form>
14
+ <div id="output">
15
+ <!-- Processed output will be displayed here -->
16
+ </div>
17
+ <script src="script.js"></script>
18
+ </body>
19
+ </html>
frontend/script.js ADDED
File without changes
frontend/styles.css ADDED
File without changes