Ubuntu commited on
Commit
bff2f45
·
1 Parent(s): 1823b0d
.ipynb_checkpoints/Dockerfile-checkpoint ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.8.10
3
+
4
+ RUN pip install --no-cache-dir gradio==3.27.0
5
+ # Install requirements.txt
6
+ RUN pip install --no-cache-dir pip==22.3.1 && pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1"
7
+
8
+ RUN sed -i 's http://deb.debian.org http://cdn-aws.deb.debian.org g' /etc/apt/sources.list && sed -i 's http://archive.ubuntu.com http://us-east-1.ec2.archive.ubuntu.com g' /etc/apt/sources.list && sed -i '/security/d' /etc/apt/sources.list && apt-get update && apt-get install -y git git-lfs ffmpeg libsm6 libxext6 cmake libgl1-mesa-glx && rm -rf /var/lib/apt/lists/* && git lfs install
9
+
10
+ RUN useradd -m -u 1000 user
11
+
12
+ WORKDIR /home/user/app
13
+
14
+ RUN --mount=target=requirements.txt,source=requirements.txt pip install --no-cache-dir -r requirements.txt
15
+
16
+ RUN --mount=target=/root/packages.txt,source=packages.txt sed -i 's http://deb.debian.org http://cdn-aws.deb.debian.org g' /etc/apt/sources.list && sed -i 's http://archive.ubuntu.com http://us-east-1.ec2.archive.ubuntu.com g' /etc/apt/sources.list && sed -i '/security/d' /etc/apt/sources.list && apt-get update && xargs -r -a /root/packages.txt apt-get install -y && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY --link --chown=1000 --from=lfs /app /home/user/app
19
+
20
+ COPY --link --chown=1000 ./ /home/user/app
21
+
22
+
23
+
24
+ # Set up a new user named "user" with user ID 1000
25
+
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import cv2
4
+ from encoded_video import EncodedVideo, write_video
5
+ import torch
6
+ import numpy as np
7
+ from torchvision.datasets import ImageFolder
8
+ from transformers import ViTFeatureExtractor, ViTForImageClassification, AutoFeatureExtractor, ViTMSNForImageClassification
9
+ from pathlib import Path
10
+ import pytorch_lightning as pl
11
+ from torch.utils.data import DataLoader
12
+ from torchmetrics import Accuracy
13
+ from torchvision import transforms
14
+ from PIL import Image
15
+ import PIL
16
+
17
+ HF_DATASETS_CACHE="./"
18
+
19
+
20
+ def video_identity(video,user_name,class_name,trainortest,ready):
21
+ if ready=='yes':
22
+
23
+ data_dir = Path(str(user_name)+'/train')
24
+ transform = transforms.Compose([
25
+ transforms.ToTensor(),
26
+ transforms.ConvertImageDtype(torch.float)
27
+ ])
28
+ train_ds = ImageFolder(data_dir)
29
+
30
+
31
+ test_dir = Path(str(user_name)+'/test')
32
+ test_ds = ImageFolder(test_dir)
33
+
34
+ label2id = {}
35
+ id2label = {}
36
+
37
+ for i, class_name in enumerate(train_ds.classes):
38
+ label2id[class_name] = str(i)
39
+ id2label[str(i)] = class_name
40
+
41
+ class ImageClassificationCollator:
42
+ def __init__(self, feature_extractor):
43
+ self.feature_extractor = feature_extractor
44
+
45
+ def __call__(self, batch):
46
+ encodings = self.feature_extractor([x[0] for x in batch], return_tensors='pt')
47
+ encodings['labels'] = torch.tensor([x[1] for x in batch], dtype=torch.long)
48
+ return encodings
49
+ feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
50
+ model = ViTForImageClassification.from_pretrained(
51
+ 'google/vit-base-patch16-224-in21k',
52
+ num_labels=len(label2id),
53
+ label2id=label2id,
54
+ id2label=id2label
55
+ )
56
+ collator = ImageClassificationCollator(feature_extractor)
57
+ class Classifier(pl.LightningModule):
58
+
59
+ def __init__(self, model, lr: float = 2e-5, **kwargs):
60
+ super().__init__()
61
+ self.save_hyperparameters('lr', *list(kwargs))
62
+ self.model = model
63
+ self.forward = self.model.forward
64
+ self.val_acc = Accuracy(
65
+ task='multiclass' if model.config.num_labels > 2 else 'binary',
66
+ num_classes=model.config.num_labels
67
+ )
68
+
69
+ def training_step(self, batch, batch_idx):
70
+ outputs = self(**batch)
71
+ self.log(f"train_loss", outputs.loss)
72
+ return outputs.loss
73
+
74
+ def validation_step(self, batch, batch_idx):
75
+ outputs = self(**batch)
76
+ self.log(f"val_loss", outputs.loss)
77
+ acc = self.val_acc(outputs.logits.argmax(1), batch['labels'])
78
+ self.log(f"val_acc", acc, prog_bar=True)
79
+ return outputs.loss
80
+
81
+ def configure_optimizers(self):
82
+ return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
83
+
84
+
85
+
86
+ train_loader = DataLoader(train_ds, batch_size=2, collate_fn=collator, num_workers=8, shuffle=True)
87
+ test_loader = DataLoader(test_ds, batch_size=2, collate_fn=collator, num_workers=8)
88
+
89
+
90
+ for name, param in model.named_parameters():
91
+ param.requires_grad = False
92
+ if name.startswith("classifier"): # choose whatever you like here
93
+ param.requires_grad = True
94
+
95
+ pl.seed_everything(42)
96
+ classifier = Classifier(model, lr=2e-5)
97
+ trainer = pl.Trainer(accelerator='cpu', devices=1, precision=16, max_epochs=3)
98
+
99
+ trainer.fit(classifier, train_loader, test_loader)
100
+
101
+ for batch_idx, data in enumerate(test_loader):
102
+ outputs = model(**data)
103
+ img=data['pixel_values'][0][0]
104
+ preds=str(outputs.logits.softmax(1).argmax(1))
105
+ labels=str(data['labels'])
106
+
107
+ return img, preds, labels
108
+
109
+ else:
110
+ capture = cv2.VideoCapture(video)
111
+ user_d=str(user_name)+'/'+str(trainortest)
112
+ class_d=str(user_name)+'/'+str(trainortest)+'/'+str(class_name)
113
+ if not os.path.exists(user_d):
114
+ os.makedirs(user_d)
115
+ if not os.path.exists(class_d):
116
+ os.makedirs(class_d)
117
+ frameNr = 0
118
+ while (True):
119
+
120
+ success, frame = capture.read()
121
+
122
+ if success:
123
+ cv2.imwrite(f'{class_d}/frame_{frameNr}.jpg', frame)
124
+
125
+ else:
126
+ break
127
+
128
+ frameNr = frameNr+10
129
+
130
+ img=cv2.imread(class_d+'/frame_0.jpg')
131
+
132
+ return img, trainortest, class_d
133
+ demo = gr.Interface(video_identity,
134
+ inputs=[gr.Video(source='upload'),
135
+ gr.Text(),
136
+ gr.Text(),
137
+ gr.Text(label='Which set is this? (type train or test)'),
138
+ gr.Text(label='Are you ready? (type yes or no)')],
139
+ outputs=[gr.Image(),
140
+ gr.Text(),
141
+ gr.Text()],
142
+ cache_examples=True)
143
+ demo.launch(debug=True)
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ opencv-python
2
+ encoded-video
3
+ torch
4
+ numpy
5
+ pytorch-lightning
6
+ torchvision
7
+ transformers
8
+ pathlib
Dockerfile CHANGED
@@ -1,28 +1,25 @@
1
 
2
- FROM python:3.9
3
-
4
- # Set the working directory to /code
5
- WORKDIR /code
6
-
7
- # Copy the current directory contents into the container at /code
8
- COPY ./requirements.txt /code/requirements.txt
9
 
 
10
  # Install requirements.txt
11
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Set up a new user named "user" with user ID 1000
14
 
15
- # Switch to the "user" user
16
- #USER user
17
- # Set home to the user's home directory
18
- #ENV HOME=/home/user \\
19
- # PATH=/home/user/.local/bin:$PATH
20
-
21
- # Set the working directory to the user's home directory
22
- #WORKDIR $HOME/app
23
-
24
- # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
- #COPY --chown=user . $HOME/app
26
-
27
- # Start the FastAPI app on port 7860, the default port expected by Spaces
28
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
 
2
+ FROM python:3.8.10
 
 
 
 
 
 
3
 
4
+ RUN pip install --no-cache-dir gradio==3.27.0
5
  # Install requirements.txt
6
+ RUN pip install --no-cache-dir pip==22.3.1 && pip install --no-cache-dir datasets "huggingface-hub>=0.12.1" "protobuf<4" "click<8.1"
7
+
8
+ RUN sed -i 's http://deb.debian.org http://cdn-aws.deb.debian.org g' /etc/apt/sources.list && sed -i 's http://archive.ubuntu.com http://us-east-1.ec2.archive.ubuntu.com g' /etc/apt/sources.list && sed -i '/security/d' /etc/apt/sources.list && apt-get update && apt-get install -y git git-lfs ffmpeg libsm6 libxext6 cmake libgl1-mesa-glx && rm -rf /var/lib/apt/lists/* && git lfs install
9
+
10
+ RUN useradd -m -u 1000 user
11
+
12
+ WORKDIR /home/user/app
13
+
14
+ RUN --mount=target=requirements.txt,source=requirements.txt pip install --no-cache-dir -r requirements.txt
15
+
16
+ RUN --mount=target=/root/packages.txt,source=packages.txt sed -i 's http://deb.debian.org http://cdn-aws.deb.debian.org g' /etc/apt/sources.list && sed -i 's http://archive.ubuntu.com http://us-east-1.ec2.archive.ubuntu.com g' /etc/apt/sources.list && sed -i '/security/d' /etc/apt/sources.list && apt-get update && xargs -r -a /root/packages.txt apt-get install -y && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY --link --chown=1000 --from=lfs /app /home/user/app
19
+
20
+ COPY --link --chown=1000 ./ /home/user/app
21
+
22
+
23
 
24
  # Set up a new user named "user" with user ID 1000
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,29 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
- from fastapi import FastAPI
3
- from transformers import pipeline
4
-
5
- # Create a new FastAPI app instance
6
- app = FastAPI()
7
-
8
- # Initialize the text generation pipeline
9
- # This function will be able to generate text
10
- # given an input.
11
- pipe = pipeline("text2text-generation",
12
- model="google/flan-t5-small")
13
-
14
- # Define a function to handle the GET request at `/generate`
15
- # The generate() function is defined as a FastAPI route that takes a
16
- # string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
17
- # containing the generated text under the key "output"
18
- @app.get("/generate")
19
- def generate(text: str):
20
- """
21
- Using the text2text-generation pipeline from `transformers`, generate text
22
- from the given input text. The model used is `google/flan-t5-small`, which
23
- can be found [here](<https://huggingface.co/google/flan-t5-small>).
24
- """
25
- # Use the pipeline to generate text from the given input text
26
- output = pipe(text)
27
-
28
- # Return the generated text in a JSON response
29
- return {"output": output[0]["generated_text"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import cv2
4
+ from encoded_video import EncodedVideo, write_video
5
+ import torch
6
+ import numpy as np
7
+ from torchvision.datasets import ImageFolder
8
+ from transformers import ViTFeatureExtractor, ViTForImageClassification, AutoFeatureExtractor, ViTMSNForImageClassification
9
+ from pathlib import Path
10
+ import pytorch_lightning as pl
11
+ from torch.utils.data import DataLoader
12
+ from torchmetrics import Accuracy
13
+ from torchvision import transforms
14
+ from PIL import Image
15
+ import PIL
16
 
17
+ HF_DATASETS_CACHE="./"
18
+
19
+
20
+ def video_identity(video,user_name,class_name,trainortest,ready):
21
+ if ready=='yes':
22
+
23
+ data_dir = Path(str(user_name)+'/train')
24
+ transform = transforms.Compose([
25
+ transforms.ToTensor(),
26
+ transforms.ConvertImageDtype(torch.float)
27
+ ])
28
+ train_ds = ImageFolder(data_dir)
29
+
30
+
31
+ test_dir = Path(str(user_name)+'/test')
32
+ test_ds = ImageFolder(test_dir)
33
+
34
+ label2id = {}
35
+ id2label = {}
36
+
37
+ for i, class_name in enumerate(train_ds.classes):
38
+ label2id[class_name] = str(i)
39
+ id2label[str(i)] = class_name
40
+
41
+ class ImageClassificationCollator:
42
+ def __init__(self, feature_extractor):
43
+ self.feature_extractor = feature_extractor
44
+
45
+ def __call__(self, batch):
46
+ encodings = self.feature_extractor([x[0] for x in batch], return_tensors='pt')
47
+ encodings['labels'] = torch.tensor([x[1] for x in batch], dtype=torch.long)
48
+ return encodings
49
+ feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
50
+ model = ViTForImageClassification.from_pretrained(
51
+ 'google/vit-base-patch16-224-in21k',
52
+ num_labels=len(label2id),
53
+ label2id=label2id,
54
+ id2label=id2label
55
+ )
56
+ collator = ImageClassificationCollator(feature_extractor)
57
+ class Classifier(pl.LightningModule):
58
+
59
+ def __init__(self, model, lr: float = 2e-5, **kwargs):
60
+ super().__init__()
61
+ self.save_hyperparameters('lr', *list(kwargs))
62
+ self.model = model
63
+ self.forward = self.model.forward
64
+ self.val_acc = Accuracy(
65
+ task='multiclass' if model.config.num_labels > 2 else 'binary',
66
+ num_classes=model.config.num_labels
67
+ )
68
+
69
+ def training_step(self, batch, batch_idx):
70
+ outputs = self(**batch)
71
+ self.log(f"train_loss", outputs.loss)
72
+ return outputs.loss
73
+
74
+ def validation_step(self, batch, batch_idx):
75
+ outputs = self(**batch)
76
+ self.log(f"val_loss", outputs.loss)
77
+ acc = self.val_acc(outputs.logits.argmax(1), batch['labels'])
78
+ self.log(f"val_acc", acc, prog_bar=True)
79
+ return outputs.loss
80
+
81
+ def configure_optimizers(self):
82
+ return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
83
+
84
+
85
+
86
+ train_loader = DataLoader(train_ds, batch_size=2, collate_fn=collator, num_workers=8, shuffle=True)
87
+ test_loader = DataLoader(test_ds, batch_size=2, collate_fn=collator, num_workers=8)
88
+
89
+
90
+ for name, param in model.named_parameters():
91
+ param.requires_grad = False
92
+ if name.startswith("classifier"): # choose whatever you like here
93
+ param.requires_grad = True
94
+
95
+ pl.seed_everything(42)
96
+ classifier = Classifier(model, lr=2e-5)
97
+ trainer = pl.Trainer(accelerator='cpu', devices=1, precision=16, max_epochs=3)
98
+
99
+ trainer.fit(classifier, train_loader, test_loader)
100
+
101
+ for batch_idx, data in enumerate(test_loader):
102
+ outputs = model(**data)
103
+ img=data['pixel_values'][0][0]
104
+ preds=str(outputs.logits.softmax(1).argmax(1))
105
+ labels=str(data['labels'])
106
+
107
+ return img, preds, labels
108
+
109
+ else:
110
+ capture = cv2.VideoCapture(video)
111
+ user_d=str(user_name)+'/'+str(trainortest)
112
+ class_d=str(user_name)+'/'+str(trainortest)+'/'+str(class_name)
113
+ if not os.path.exists(user_d):
114
+ os.makedirs(user_d)
115
+ if not os.path.exists(class_d):
116
+ os.makedirs(class_d)
117
+ frameNr = 0
118
+ while (True):
119
+
120
+ success, frame = capture.read()
121
+
122
+ if success:
123
+ cv2.imwrite(f'{class_d}/frame_{frameNr}.jpg', frame)
124
+
125
+ else:
126
+ break
127
+
128
+ frameNr = frameNr+10
129
+
130
+ img=cv2.imread(class_d+'/frame_0.jpg')
131
+
132
+ return img, trainortest, class_d
133
+ demo = gr.Interface(video_identity,
134
+ inputs=[gr.Video(source='upload'),
135
+ gr.Text(),
136
+ gr.Text(),
137
+ gr.Text(label='Which set is this? (type train or test)'),
138
+ gr.Text(label='Are you ready? (type yes or no)')],
139
+ outputs=[gr.Image(),
140
+ gr.Text(),
141
+ gr.Text()],
142
+ cache_examples=True)
143
+ demo.launch(debug=True)
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- fastapi==0.74.*
2
- requests==2.27.*
3
- uvicorn[standard]==0.17.*
4
- sentencepiece==0.1.*
5
- torch==1.11.*
6
- transformers==4.*
 
 
 
1
+ opencv-python
2
+ encoded-video
3
+ torch
4
+ numpy
5
+ pytorch-lightning
6
+ torchvision
7
+ transformers
8
+ pathlib