ABAO77 commited on
Commit
0e0a0b2
·
1 Parent(s): 0d475b7

[base][abaoxomtieu]: upload folder src

Browse files
app.py CHANGED
@@ -18,8 +18,6 @@ import asyncio
18
  import os
19
  import functools
20
  import threading
21
- import sys
22
- sys.path.append("./src")
23
  load_dotenv()
24
  app = FastAPI(docs_url="/")
25
  app.add_middleware(
 
18
  import os
19
  import functools
20
  import threading
 
 
21
  load_dotenv()
22
  app = FastAPI(docs_url="/")
23
  app.add_middleware(
src/config/llm.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from langchain_google_genai import GoogleGenerativeAI
2
+ llm = GoogleGenerativeAI(
3
+ model="gemini-1.5-flash",
4
+ temperature=0,
5
+ )
src/data/Screenshot 2024-09-11 080459.png ADDED
src/data/page0.jpg ADDED
src/data/resume-for-fresher-template-281.jpg ADDED
src/data/teacher-resume-example.jpg ADDED
src/data/test.jpg ADDED
src/inference/segment_inference.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as ort
2
+ from src.utils.utils_segment import preprocess, postprocess
3
+ import numpy as np
4
+
5
+
6
+ def inference(image: np.array, model_path, threshold_confidence=0.5, threshold_iou=0.7):
7
+ model = ort.InferenceSession(
8
+ model_path,
9
+ )
10
+ input = preprocess(image)
11
+ outputs = postprocess(
12
+ model.run(None, {"images": input}),
13
+ threshold_confidence=threshold_confidence,
14
+ threshold_iou=threshold_iou,
15
+ )
16
+
17
+ return outputs
18
+
19
+
20
+ if __name__ == "__main__":
21
+ model_path = "../model/segment.onnx"
22
+ image_path = "../../test.jpg"
23
+ print(inference(image_path, model_path))
src/model/segment.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e38660d6cb501bc21d33249f1e1dffd9038d57a82f27bd089746e1aa8eca53a9
3
+ size 109150130
src/notebook/data.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ train: /cvparsing-2/train/images
2
+ val: /cvparsing-2/valid/images
3
+ test: /cvparsing-2/test/images
4
+
5
+ nc: 14
6
+ names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']
src/notebook/notebook.ipynb ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 12,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "loading Roboflow workspace...\n",
13
+ "loading Roboflow project...\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stderr",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "Downloading Dataset Version Zip in cvparsing-2 to yolov9:: 100%|██████████| 63864/63864 [00:04<00:00, 15236.33it/s]"
21
+ ]
22
+ },
23
+ {
24
+ "name": "stdout",
25
+ "output_type": "stream",
26
+ "text": [
27
+ "\n"
28
+ ]
29
+ },
30
+ {
31
+ "name": "stderr",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "\n",
35
+ "Extracting Dataset Version Zip to cvparsing-2 in yolov9:: 100%|██████████| 2344/2344 [00:00<00:00, 5118.00it/s]\n"
36
+ ]
37
+ }
38
+ ],
39
+ "source": [
40
+ "!pip install roboflow\n",
41
+ "\n",
42
+ "from roboflow import Roboflow\n",
43
+ "rf = Roboflow(api_key=\"ZvM6LUyWI7hiVw6K64bt\")\n",
44
+ "project = rf.workspace(\"capitaletech-wrnth\").project(\"annotation-moxcs\")\n",
45
+ "version = project.version(2)\n",
46
+ "dataset = version.download(\"yolov8\")\n",
47
+ " "
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 1,
53
+ "metadata": {},
54
+ "outputs": [
55
+ {
56
+ "name": "stdout",
57
+ "output_type": "stream",
58
+ "text": [
59
+ "Requirement already satisfied: ultralytics in d:\\fu\\dat\\.venv\\lib\\site-packages (8.2.90)\n",
60
+ "Requirement already satisfied: numpy<2.0.0,>=1.23.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (1.26.4)\n",
61
+ "Requirement already satisfied: matplotlib>=3.3.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (3.9.2)\n",
62
+ "Requirement already satisfied: opencv-python>=4.6.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (4.10.0.84)\n",
63
+ "Requirement already satisfied: pillow>=7.1.2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (10.4.0)\n",
64
+ "Requirement already satisfied: pyyaml>=5.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (6.0.2)\n",
65
+ "Requirement already satisfied: requests>=2.23.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.32.3)\n",
66
+ "Requirement already satisfied: scipy>=1.4.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (1.14.1)\n",
67
+ "Requirement already satisfied: torch>=1.8.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.4.1)\n",
68
+ "Requirement already satisfied: torchvision>=0.9.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (0.19.1)\n",
69
+ "Requirement already satisfied: tqdm>=4.64.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (4.66.5)\n",
70
+ "Requirement already satisfied: psutil in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (6.0.0)\n",
71
+ "Requirement already satisfied: py-cpuinfo in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (9.0.0)\n",
72
+ "Requirement already satisfied: pandas>=1.1.4 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.2.2)\n",
73
+ "Requirement already satisfied: seaborn>=0.11.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (0.13.2)\n",
74
+ "Requirement already satisfied: ultralytics-thop>=2.0.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.0.6)\n",
75
+ "Requirement already satisfied: contourpy>=1.0.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.3.0)\n",
76
+ "Requirement already satisfied: cycler>=0.10 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (0.12.1)\n",
77
+ "Requirement already satisfied: fonttools>=4.22.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (4.53.1)\n",
78
+ "Requirement already satisfied: kiwisolver>=1.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.7)\n",
79
+ "Requirement already satisfied: packaging>=20.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (24.1)\n",
80
+ "Requirement already satisfied: pyparsing>=2.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (3.1.4)\n",
81
+ "Requirement already satisfied: python-dateutil>=2.7 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (2.9.0.post0)\n",
82
+ "Requirement already satisfied: pytz>=2020.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2024.1)\n",
83
+ "Requirement already satisfied: tzdata>=2022.7 in d:\\fu\\dat\\.venv\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2024.1)\n",
84
+ "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.3.2)\n",
85
+ "Requirement already satisfied: idna<4,>=2.5 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.7)\n",
86
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2.2.2)\n",
87
+ "Requirement already satisfied: certifi>=2017.4.17 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2024.8.30)\n",
88
+ "Requirement already satisfied: filelock in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.16.0)\n",
89
+ "Requirement already satisfied: typing-extensions>=4.8.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (4.12.2)\n",
90
+ "Requirement already satisfied: sympy in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (1.13.2)\n",
91
+ "Requirement already satisfied: networkx in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.3)\n",
92
+ "Requirement already satisfied: jinja2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.1.4)\n",
93
+ "Requirement already satisfied: fsspec in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (2024.9.0)\n",
94
+ "Requirement already satisfied: colorama in d:\\fu\\dat\\.venv\\lib\\site-packages (from tqdm>=4.64.0->ultralytics) (0.4.6)\n",
95
+ "Requirement already satisfied: six>=1.5 in d:\\fu\\dat\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.16.0)\n",
96
+ "Requirement already satisfied: MarkupSafe>=2.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from jinja2->torch>=1.8.0->ultralytics) (2.1.5)\n",
97
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from sympy->torch>=1.8.0->ultralytics) (1.3.0)\n"
98
+ ]
99
+ }
100
+ ],
101
+ "source": [
102
+ "!pip install ultralytics"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 6,
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "yaml_text = \"\"\"train: /cvparsing-2/train/images\n",
112
+ "val: /cvparsing-2/valid/images\n",
113
+ "test: /cvparsing-2/test/images\n",
114
+ "\n",
115
+ "nc: 14\n",
116
+ "names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']\"\"\"\n",
117
+ "\n",
118
+ "with open(\"./data.yaml\", 'w') as file:\n",
119
+ " file.write(yaml_text),\n",
120
+ "\n",
121
+ "# To display the content of the file, you can use the 'cat' command like this:\n",
122
+ "# %cat /kaggle/working/data.yaml\n"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 20,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "!yolo train model=yolov9c.yaml data=D:/FU/DAT/src/notebook/datasets/data.yaml epochs=100 imgsz=640 device=0"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 1,
137
+ "metadata": {},
138
+ "outputs": [
139
+ {
140
+ "name": "stdout",
141
+ "output_type": "stream",
142
+ "text": [
143
+ "Ultralytics YOLOv8.2.90 Python-3.11.9 torch-2.4.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 6140MiB)\n",
144
+ "Setup complete (20 CPUs, 15.7 GB RAM, 33.9/97.7 GB disk)\n"
145
+ ]
146
+ }
147
+ ],
148
+ "source": [
149
+ "# %pip install ultralytics\n",
150
+ "import ultralytics\n",
151
+ "ultralytics.checks()"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 1,
157
+ "metadata": {},
158
+ "outputs": [
159
+ {
160
+ "name": "stderr",
161
+ "output_type": "stream",
162
+ "text": [
163
+ "d:\\FU\\DAT\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
164
+ " from .autonotebook import tqdm as notebook_tqdm\n",
165
+ "d:\\FU\\DAT\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:159: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\htbqn\\.cache\\huggingface\\hub\\models--microsoft--trocr-base-handwritten. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
166
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
167
+ " warnings.warn(message)\n",
168
+ "d:\\FU\\DAT\\.venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
169
+ " warnings.warn(\n",
170
+ "Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']\n",
171
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
172
+ "d:\\FU\\DAT\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:1258: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
173
+ " warnings.warn(\n"
174
+ ]
175
+ }
176
+ ],
177
+ "source": [
178
+ "from transformers import TrOCRProcessor, VisionEncoderDecoderModel\n",
179
+ "from PIL import Image\n",
180
+ "import requests\n",
181
+ "\n",
182
+ "# load image from the IAM database\n",
183
+ "# url = 'https://fki.tic.heia-fr.ch/static/img/a01-122-02-00.jpg'\n",
184
+ "image = Image.open(r'./images.png').convert(\"RGB\")\n",
185
+ "\n",
186
+ "processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')\n",
187
+ "model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')\n",
188
+ "pixel_values = processor(images=image, return_tensors=\"pt\").pixel_values\n",
189
+ "\n",
190
+ "generated_ids = model.generate(pixel_values)\n",
191
+ "generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 3,
197
+ "metadata": {},
198
+ "outputs": [
199
+ {
200
+ "data": {
201
+ "text/plain": [
202
+ "tensor([[ 2, 288, 321, 2]])"
203
+ ]
204
+ },
205
+ "execution_count": 3,
206
+ "metadata": {},
207
+ "output_type": "execute_result"
208
+ }
209
+ ],
210
+ "source": [
211
+ "generated_ids"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 6,
217
+ "metadata": {},
218
+ "outputs": [],
219
+ "source": [
220
+ "import onnxruntime as ort\n",
221
+ "import numpy as np\n",
222
+ "from PIL import Image\n",
223
+ "\n",
224
+ "# Load the ONNX model\n",
225
+ "model_path = \"../model/section_detection.onnx\"\n",
226
+ "session = ort.InferenceSession(model_path)\n",
227
+ "\n",
228
+ "# Load and preprocess the image\n",
229
+ "image_path = 'D:/FU/DAT/src/notebook/datasets/train/images/1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg'\n",
230
+ "image = Image.open(image_path)\n",
231
+ "image_data = np.array(image).astype('float32').transpose(2, 0, 1)\n"
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 7,
237
+ "metadata": {},
238
+ "outputs": [
239
+ {
240
+ "data": {
241
+ "text/plain": [
242
+ "(1, 3, 640, 640)"
243
+ ]
244
+ },
245
+ "execution_count": 7,
246
+ "metadata": {},
247
+ "output_type": "execute_result"
248
+ }
249
+ ],
250
+ "source": [
251
+ "image_data = np.expand_dims(image_data, axis=0)\n",
252
+ "image_data.shape"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 8,
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "input_name = session.get_inputs()[0].name\n",
262
+ "output_name = session.get_outputs()[0].name"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 9,
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "result = session.run([output_name], {input_name: image_data})[0]"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": 33,
277
+ "metadata": {},
278
+ "outputs": [
279
+ {
280
+ "data": {
281
+ "text/plain": [
282
+ "(18, 8400)"
283
+ ]
284
+ },
285
+ "execution_count": 33,
286
+ "metadata": {},
287
+ "output_type": "execute_result"
288
+ }
289
+ ],
290
+ "source": [
291
+ "result[0].shape"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": null,
297
+ "metadata": {},
298
+ "outputs": [],
299
+ "source": []
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 10,
304
+ "metadata": {},
305
+ "outputs": [
306
+ {
307
+ "name": "stdout",
308
+ "output_type": "stream",
309
+ "text": [
310
+ "WARNING Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.\n",
311
+ "Loading ..\\model\\section_detection.onnx for ONNX Runtime inference...\n",
312
+ "\n",
313
+ "image 1/1 D:\\FU\\DAT\\src\\notebook\\datasets\\train\\images\\1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg: 640x640 2 Achievements, 147.6ms\n",
314
+ "Speed: 2.5ms preprocess, 147.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n"
315
+ ]
316
+ }
317
+ ],
318
+ "source": [
319
+ "from ultralytics import YOLO\n",
320
+ "\n",
321
+ "# Load the YOLOv8 model'\n",
322
+ "\n",
323
+ "# Load the exported ONNX model\n",
324
+ "onnx_model = YOLO(\"../model/section_detection.onnx\")\n",
325
+ "\n",
326
+ "# Run inference\n",
327
+ "results = onnx_model(\"D:/FU/DAT/src/notebook/datasets/train/images/1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg\")"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": 11,
333
+ "metadata": {},
334
+ "outputs": [],
335
+ "source": [
336
+ "for result in results:\n",
337
+ " boxes = result.boxes # Boxes object for bounding box outputs\n",
338
+ " masks = result.masks # Masks object for segmentation masks outputs\n",
339
+ " keypoints = result.keypoints # Keypoints object for pose outputs\n",
340
+ " probs = result.probs # Probs object for classification outputs\n",
341
+ " obb = result.obb # Oriented boxes object for OBB outputs\n",
342
+ " result.show() # display to screen\n",
343
+ " result.save(filename=\"result.jpg\") # save to disk"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": 2,
349
+ "metadata": {},
350
+ "outputs": [
351
+ {
352
+ "name": "stdout",
353
+ "output_type": "stream",
354
+ "text": [
355
+ "\n",
356
+ "You are already logged into Roboflow. To make a different login,run roboflow.login(force=True).\n"
357
+ ]
358
+ }
359
+ ],
360
+ "source": [
361
+ "!roboflow login"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 4,
367
+ "metadata": {},
368
+ "outputs": [
369
+ {
370
+ "ename": "RoboflowAPINotAuthorizedError",
371
+ "evalue": "Unauthorized access to roboflow API - check API key. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one.",
372
+ "output_type": "error",
373
+ "traceback": [
374
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
375
+ "\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)",
376
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:80\u001b[0m, in \u001b[0;36mwrap_roboflow_api_errors.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 81\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mConnectionError, \u001b[38;5;167;01mConnectionError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m error:\n",
377
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:227\u001b[0m, in \u001b[0;36mget_roboflow_model_data\u001b[1;34m(api_key, model_id, endpoint_type, device_id)\u001b[0m\n\u001b[0;32m 223\u001b[0m api_url \u001b[38;5;241m=\u001b[39m _add_params_to_url(\n\u001b[0;32m 224\u001b[0m url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAPI_BASE_URL\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint_type\u001b[38;5;241m.\u001b[39mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 225\u001b[0m params\u001b[38;5;241m=\u001b[39mparams,\n\u001b[0;32m 226\u001b[0m )\n\u001b[1;32m--> 227\u001b[0m api_data \u001b[38;5;241m=\u001b[39m \u001b[43m_get_from_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 228\u001b[0m cache\u001b[38;5;241m.\u001b[39mset(\n\u001b[0;32m 229\u001b[0m api_data_cache_key,\n\u001b[0;32m 230\u001b[0m api_data,\n\u001b[0;32m 231\u001b[0m expire\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[0;32m 232\u001b[0m )\n",
378
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:473\u001b[0m, in \u001b[0;36m_get_from_url\u001b[1;34m(url, json_response)\u001b[0m\n\u001b[0;32m 472\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(wrap_url(url))\n\u001b[1;32m--> 473\u001b[0m \u001b[43mapi_key_safe_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 474\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m json_response:\n",
379
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\utils\\requests.py:15\u001b[0m, in \u001b[0;36mapi_key_safe_raise_for_status\u001b[1;34m(response)\u001b[0m\n\u001b[0;32m 14\u001b[0m response\u001b[38;5;241m.\u001b[39murl \u001b[38;5;241m=\u001b[39m API_KEY_PATTERN\u001b[38;5;241m.\u001b[39msub(deduct_api_key, response\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m---> 15\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
380
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\requests\\models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
381
+ "\u001b[1;31mHTTPError\u001b[0m: 401 Client Error: Unauthorized for url: https://api.roboflow.com/ort/annotation-moxcs/2?nocache=true&device=ABAOXOMTIEU&dynamic=true",
382
+ "\nThe above exception was the direct cause of the following exception:\n",
383
+ "\u001b[1;31mRoboflowAPINotAuthorizedError\u001b[0m Traceback (most recent call last)",
384
+ "Cell \u001b[1;32mIn[4], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m image \u001b[38;5;241m=\u001b[39m cv2\u001b[38;5;241m.\u001b[39mimread(image_file)\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# load a pre-trained yolov8n model\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mannotation-moxcs/2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# run inference on our chosen image, image can be a url, a numpy array, a PIL image, etc.\u001b[39;00m\n\u001b[0;32m 13\u001b[0m results \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39minfer(image)[\u001b[38;5;241m0\u001b[39m]\n",
385
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\models\\utils.py:275\u001b[0m, in \u001b[0;36mget_model\u001b[1;34m(model_id, api_key, **kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_model\u001b[39m(model_id, api_key\u001b[38;5;241m=\u001b[39mAPI_KEY, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Model:\n\u001b[1;32m--> 275\u001b[0m task, model \u001b[38;5;241m=\u001b[39m \u001b[43mget_model_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ROBOFLOW_MODEL_TYPES[(task, model)](model_id, api_key\u001b[38;5;241m=\u001b[39mapi_key, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
386
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\registries\\roboflow.py:115\u001b[0m, in \u001b[0;36mget_model_type\u001b[1;34m(model_id, api_key)\u001b[0m\n\u001b[0;32m 108\u001b[0m save_model_metadata_in_cache(\n\u001b[0;32m 109\u001b[0m dataset_id\u001b[38;5;241m=\u001b[39mdataset_id,\n\u001b[0;32m 110\u001b[0m version_id\u001b[38;5;241m=\u001b[39mversion_id,\n\u001b[0;32m 111\u001b[0m project_task_type\u001b[38;5;241m=\u001b[39mproject_task_type,\n\u001b[0;32m 112\u001b[0m model_type\u001b[38;5;241m=\u001b[39mmodel_type,\n\u001b[0;32m 113\u001b[0m )\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m project_task_type, model_type\n\u001b[1;32m--> 115\u001b[0m api_data \u001b[38;5;241m=\u001b[39m \u001b[43mget_roboflow_model_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mModelEndpointType\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mORT\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mGLOBAL_DEVICE_ID\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 120\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mort\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ModelArtefactError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError loading model artifacts from Roboflow API.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
387
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:93\u001b[0m, in \u001b[0;36mwrap_roboflow_api_errors.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 91\u001b[0m error_handler \u001b[38;5;241m=\u001b[39m user_handler_override\u001b[38;5;241m.\u001b[39mget(status_code, default_handler)\n\u001b[0;32m 92\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m error_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 93\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 94\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RoboflowAPIUnsuccessfulRequestError(\n\u001b[0;32m 95\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsuccessful request to Roboflow API with response code: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 96\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merror\u001b[39;00m\n\u001b[0;32m 97\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mInvalidJSONError \u001b[38;5;28;01mas\u001b[39;00m error:\n",
388
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:60\u001b[0m, in \u001b[0;36m<lambda>\u001b[1;34m(e)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_from_lambda\u001b[39m(\n\u001b[0;32m 54\u001b[0m inner_error: \u001b[38;5;167;01mException\u001b[39;00m, exception_type: Type[\u001b[38;5;167;01mException\u001b[39;00m], message: \u001b[38;5;28mstr\u001b[39m\n\u001b[0;32m 55\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_type(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_error\u001b[39;00m\n\u001b[0;32m 59\u001b[0m DEFAULT_ERROR_HANDLERS \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m---> 60\u001b[0m \u001b[38;5;241m401\u001b[39m: \u001b[38;5;28;01mlambda\u001b[39;00m e: \u001b[43mraise_from_lambda\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43mRoboflowAPINotAuthorizedError\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUnauthorized access to roboflow API - check API key. Visit \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[0;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhttps://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 66\u001b[0m \u001b[38;5;241m404\u001b[39m: \u001b[38;5;28;01mlambda\u001b[39;00m e: raise_from_lambda(\n\u001b[0;32m 67\u001b[0m e, RoboflowAPINotNotFoundError, NOT_FOUND_ERROR_MESSAGE\n\u001b[0;32m 68\u001b[0m ),\n\u001b[0;32m 69\u001b[0m }\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap_roboflow_api_errors\u001b[39m(\n\u001b[0;32m 73\u001b[0m http_errors_handlers: Optional[\n\u001b[0;32m 74\u001b[0m Dict[\u001b[38;5;28mint\u001b[39m, Callable[[Union[requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mHTTPError]], \u001b[38;5;28;01mNone\u001b[39;00m]]\n\u001b[0;32m 75\u001b[0m ] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 76\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mcallable\u001b[39m:\n\u001b[0;32m 77\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorator\u001b[39m(function: \u001b[38;5;28mcallable\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mcallable\u001b[39m:\n",
389
+ "File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:56\u001b[0m, in \u001b[0;36mraise_from_lambda\u001b[1;34m(inner_error, exception_type, message)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_from_lambda\u001b[39m(\n\u001b[0;32m 54\u001b[0m inner_error: \u001b[38;5;167;01mException\u001b[39;00m, exception_type: Type[\u001b[38;5;167;01mException\u001b[39;00m], message: \u001b[38;5;28mstr\u001b[39m\n\u001b[0;32m 55\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 56\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_type(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_error\u001b[39;00m\n",
390
+ "\u001b[1;31mRoboflowAPINotAuthorizedError\u001b[0m: Unauthorized access to roboflow API - check API key. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one."
391
+ ]
392
+ }
393
+ ],
394
+ "source": [
395
+ "from inference import get_model\n",
396
+ "import supervision as sv\n",
397
+ "import cv2\n",
398
+ "\n",
399
+ "# define the image url to use for inference\n",
400
+ "image_file = \"taylor-swift-album-1989.jpeg\"\n",
401
+ "image = cv2.imread(image_file)\n",
402
+ "\n",
403
+ "# load a pre-trained yolov8n model\n",
404
+ "model = get_model(model_id=\"annotation-moxcs/2\")\n",
405
+ "\n",
406
+ "# run inference on our chosen image, image can be a url, a numpy array, a PIL image, etc.\n",
407
+ "results = model.infer(image)[0]\n",
408
+ "\n",
409
+ "# load the results into the supervision Detections api\n",
410
+ "detections = sv.Detections.from_inference(results)\n",
411
+ "\n",
412
+ "# create supervision annotators\n",
413
+ "bounding_box_annotator = sv.BoundingBoxAnnotator()\n",
414
+ "label_annotator = sv.LabelAnnotator()\n",
415
+ "\n",
416
+ "# annotate the image with our inference results\n",
417
+ "annotated_image = bounding_box_annotator.annotate(\n",
418
+ " scene=image, detections=detections)\n",
419
+ "annotated_image = label_annotator.annotate(\n",
420
+ " scene=annotated_image, detections=detections)\n",
421
+ "\n",
422
+ "# display the image\n",
423
+ "sv.plot_image(annotated_image)"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "execution_count": null,
429
+ "metadata": {},
430
+ "outputs": [],
431
+ "source": []
432
+ }
433
+ ],
434
+ "metadata": {
435
+ "kernelspec": {
436
+ "display_name": ".venv",
437
+ "language": "python",
438
+ "name": "python3"
439
+ },
440
+ "language_info": {
441
+ "codemirror_mode": {
442
+ "name": "ipython",
443
+ "version": 3
444
+ },
445
+ "file_extension": ".py",
446
+ "mimetype": "text/x-python",
447
+ "name": "python",
448
+ "nbconvert_exporter": "python",
449
+ "pygments_lexer": "ipython3",
450
+ "version": "3.11.9"
451
+ }
452
+ },
453
+ "nbformat": 4,
454
+ "nbformat_minor": 2
455
+ }
src/notebook/result.jpg ADDED
src/notebook/seg.ipynb ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from PIL import Image \n",
10
+ "from pytesseract import pytesseract "
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 6,
16
+ "metadata": {},
17
+ "outputs": [
18
+ {
19
+ "name": "stdout",
20
+ "output_type": "stream",
21
+ "text": [
22
+ "It was the best of\n",
23
+ "times, it was the worst\n",
24
+ "of times, it was the age\n",
25
+ "of wisdom, it was the\n",
26
+ "age of foolishness...\n"
27
+ ]
28
+ }
29
+ ],
30
+ "source": [
31
+ "from PIL import Image \n",
32
+ "from pytesseract import pytesseract \n",
33
+ " \n",
34
+ "path_to_tesseract = r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\"\n",
35
+ "image_path = r\"./images.png\"\n",
36
+ "\n",
37
+ "img = Image.open(image_path) \n",
38
+ "\n",
39
+ "\n",
40
+ "pytesseract.tesseract_cmd = path_to_tesseract \n",
41
+ "\n",
42
+ "\n",
43
+ "text = pytesseract.image_to_string(img) \n",
44
+ "\n",
45
+ "print(text[:-1])\n"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "markdown",
50
+ "metadata": {},
51
+ "source": [
52
+ "# test"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "from "
62
+ ]
63
+ }
64
+ ],
65
+ "metadata": {
66
+ "kernelspec": {
67
+ "display_name": "Python 3",
68
+ "language": "python",
69
+ "name": "python3"
70
+ },
71
+ "language_info": {
72
+ "codemirror_mode": {
73
+ "name": "ipython",
74
+ "version": 3
75
+ },
76
+ "file_extension": ".py",
77
+ "mimetype": "text/x-python",
78
+ "name": "python",
79
+ "nbconvert_exporter": "python",
80
+ "pygments_lexer": "ipython3",
81
+ "version": "3.11.9"
82
+ }
83
+ },
84
+ "nbformat": 4,
85
+ "nbformat_minor": 2
86
+ }
src/prompt/promt.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ format_prompt = """
2
+ #Role: You are an expert at correcting spelling errors from interviewee's resume information.
3
+ #Instruction:
4
+ You are provided with a dictionary containing information from the user's resume by an OCR model. It may have misspellings or wrong entries.
5
+ Please correct the spelling of each field.
6
+ Move the content of the fields to more appropriate fields if necessary.
7
+ You must not fabricate information and create new information.
8
+
9
+ You must return JSON containing the same format as the original format:
10
+
11
+ #Input:
12
+ My resume is as follows: {input}
13
+ """
src/training/segment_training.ipynb.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells":[{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2024-09-08T06:52:15.629296Z","iopub.status.busy":"2024-09-08T06:52:15.628873Z","iopub.status.idle":"2024-09-08T06:52:35.266264Z","shell.execute_reply":"2024-09-08T06:52:35.265350Z","shell.execute_reply.started":"2024-09-08T06:52:15.629258Z"},"trusted":true},"outputs":[],"source":["!pip install roboflow\n","\n","from roboflow import Roboflow\n","rf = Roboflow(api_key=\"ZvM6LUyWI7hiVw6K64bt\")\n","project = rf.workspace(\"capitaletech-wrnth\").project(\"annotation-moxcs\")\n","version = project.version(2)\n","dataset = version.download(\"yolov8\")\n"," "]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:52:38.656706Z","iopub.status.busy":"2024-09-08T06:52:38.655712Z","iopub.status.idle":"2024-09-08T06:52:38.661352Z","shell.execute_reply":"2024-09-08T06:52:38.660330Z","shell.execute_reply.started":"2024-09-08T06:52:38.656651Z"},"trusted":true},"outputs":[],"source":["import os\n","os.environ[\"WANDB_DISABLED\"] = \"true\""]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:52:39.882023Z","iopub.status.busy":"2024-09-08T06:52:39.881309Z","iopub.status.idle":"2024-09-08T06:52:54.091799Z","shell.execute_reply":"2024-09-08T06:52:54.090833Z","shell.execute_reply.started":"2024-09-08T06:52:39.881985Z"},"trusted":true},"outputs":[],"source":["!pip install ultralytics==8.0.28"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:56:25.315009Z","iopub.status.busy":"2024-09-08T06:56:25.314467Z","iopub.status.idle":"2024-09-08T06:56:25.322391Z","shell.execute_reply":"2024-09-08T06:56:25.321282Z","shell.execute_reply.started":"2024-09-08T06:56:25.314961Z"},"trusted":true},"outputs":[],"source":["yaml_text = \"\"\"train: /kaggle/working/annotation-2/train\n","val: /kaggle/working/annotation-2/valid\n","test: /kaggle/working/annotation-2/test\n","\n","names:\n","- Certifications\n","- Community\n","- Contact\n","- Education\n","- Experience\n","- Interests\n","- Languages\n","- Name\n","- Profil\n","- Projects\n","- skills\n","nc: 11\n","roboflow:\n"," license: CC BY 4.0\n"," project: annotation-moxcs\n"," url: https://universe.roboflow.com/capitaletech-wrnth/annotation-moxcs/dataset/2\n"," version: 2\n"," workspace: capitaletech-wrnth\n","\"\"\"\n","with open(\"./data.yaml\", 'w') as file:\n"," file.write(yaml_text),\n"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:56:26.844555Z","iopub.status.busy":"2024-09-08T06:56:26.844168Z","iopub.status.idle":"2024-09-08T09:22:07.888695Z","shell.execute_reply":"2024-09-08T09:22:07.887404Z","shell.execute_reply.started":"2024-09-08T06:56:26.844520Z"},"trusted":true},"outputs":[],"source":["!yolo task=segment mode=train model=yolov8m-seg.pt data=/kaggle/working/data.yaml epochs=100 imgsz=640\n","# !ls {HOME}/runs/segment/train/\n"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T09:22:07.892085Z","iopub.status.busy":"2024-09-08T09:22:07.891230Z","iopub.status.idle":"2024-09-08T09:22:25.963401Z","shell.execute_reply":"2024-09-08T09:22:25.962222Z","shell.execute_reply.started":"2024-09-08T09:22:07.892036Z"},"trusted":true},"outputs":[],"source":["!yolo export model=/kaggle/working/runs/segment/train4/weights/best.pt format=onnx "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:53:10.988929Z","iopub.status.busy":"2024-09-08T06:53:10.988489Z","iopub.status.idle":"2024-09-08T06:53:11.045752Z","shell.execute_reply":"2024-09-08T06:53:11.044376Z","shell.execute_reply.started":"2024-09-08T06:53:10.988883Z"},"trusted":true},"outputs":[],"source":["from PIL import Image\n","Image(filename=f'/kaggle/working/runs/segment/train/train_batch0.jpg', width=600)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30762,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"}},"nbformat":4,"nbformat_minor":4}
src/utils/utils_segment.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import numpy as np
3
+ import cv2
4
+ from typing import Tuple
5
+ from pytesseract import pytesseract
6
+
7
+ # path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
8
+ # pytesseract.tesseract_cmd = path_to_tesseract
9
+ class_names = [
10
+ "Certifications",
11
+ "Community",
12
+ "Contact",
13
+ "Education",
14
+ "Experience",
15
+ "Interests",
16
+ "Languages",
17
+ "Name",
18
+ "Profile",
19
+ "Projects",
20
+ "Skills",
21
+ ]
22
+ number_class_custom = int(len(class_names) + 4)
23
+ img_width, img_height = None, None
24
+ left = None
25
+ top = None
26
+ ratio = None
27
+
28
+
29
+ def preprocess(img: np.array, shape=(640, 640)) -> np.array:
30
+ global img_width, img_height, left, top, ratio
31
+ img, ratio, (left, top) = resize_and_pad(img, new_shape=shape)
32
+ img_height, img_width, _ = img.shape
33
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
34
+ img = img.transpose(2, 0, 1)
35
+ img = img.reshape(1, 3, 640, 640).astype("float32")
36
+ img = img / 255.0
37
+ return img
38
+
39
+
40
+ def extract_box(outputs):
41
+ output0 = outputs[0]
42
+ output1 = outputs[1]
43
+ output0 = output0[0].transpose()
44
+ output1 = output1[0]
45
+ boxes = output0[:, 0:number_class_custom]
46
+ masks = output0[:, number_class_custom:]
47
+ output1 = output1.reshape(32, 160 * 160)
48
+ output1 = output1.reshape(32, 160 * 160)
49
+ masks = masks @ output1
50
+ boxes = np.hstack([boxes, masks])
51
+ return boxes
52
+
53
+
54
+ def intersection(box1, box2):
55
+ box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
56
+ box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
57
+ x1 = max(box1_x1, box2_x1)
58
+ y1 = max(box1_y1, box2_y1)
59
+ x2 = min(box1_x2, box2_x2)
60
+ y2 = min(box1_y2, box2_y2)
61
+ return (x2 - x1) * (y2 - y1)
62
+
63
+
64
+ def union(box1, box2):
65
+ box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
66
+ box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
67
+ box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
68
+ box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
69
+ return box1_area + box2_area - intersection(box1, box2)
70
+
71
+
72
+ def iou(box1, box2):
73
+ return intersection(box1, box2) / union(box1, box2)
74
+
75
+
76
+ def sigmoid(z):
77
+ return 1 / (1 + np.exp(-z))
78
+
79
+
80
+ def get_mask(row, box, img_width, img_height, threshold):
81
+ mask = row.reshape(160, 160)
82
+ mask = sigmoid(mask)
83
+ mask = (mask > threshold).astype("uint8") * 255
84
+ x1, y1, x2, y2 = box
85
+ mask_x1 = round(x1 / img_width * 160)
86
+ mask_y1 = round(y1 / img_height * 160)
87
+ mask_x2 = round(x2 / img_width * 160)
88
+ mask_y2 = round(y2 / img_height * 160)
89
+ mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
90
+ img_mask = Image.fromarray(mask, "L")
91
+ img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1)))
92
+ mask = np.array(img_mask)
93
+ return mask
94
+
95
+
96
+ def get_polygon(mask):
97
+ contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
98
+ polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]]
99
+ return polygon
100
+
101
+
102
+ def postprocess(outputs, threshold_confidence, threshold_iou):
103
+ objects = []
104
+ for row in extract_box(outputs):
105
+ xc, yc, w, h = row[:4]
106
+ x1 = (xc - w / 2) / 640 * img_width
107
+ y1 = (yc - h / 2) / 640 * img_height
108
+ x2 = (xc + w / 2) / 640 * img_width
109
+ y2 = (yc + h / 2) / 640 * img_height
110
+ prob = row[4:number_class_custom].max()
111
+ if prob < threshold_confidence:
112
+ continue
113
+ class_id = row[4:number_class_custom].argmax()
114
+ label = class_names[class_id]
115
+ # mask = get_mask(
116
+ # row[number_class_custom:25684],
117
+ # (x1, y1, x2, y2),
118
+ # img_width,
119
+ # img_height,
120
+ # threshold=threshold,
121
+ # )
122
+ # polygon = get_polygon(mask)
123
+ # objects.append([x1, y1, x2, y2, label, prob, mask, polygon])
124
+ objects.append([x1, y1, x2, y2, label, prob])
125
+
126
+ # apply non-maximum suppression
127
+ objects.sort(key=lambda x: x[5], reverse=True)
128
+ result = []
129
+ while objects:
130
+ obj = objects.pop(0)
131
+ result.append(obj)
132
+ objects = [
133
+ other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou
134
+ ]
135
+ del objects
136
+
137
+ cropped_images = [
138
+ {
139
+ "box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))),
140
+ "label": obj[4],
141
+ "prob": int(obj[5]),
142
+ }
143
+ for obj in result
144
+ ]
145
+ return cropped_images
146
+
147
+
148
+ def extract_text_dict(outputs):
149
+ result_dict = {}
150
+ for output in outputs:
151
+ label = output.get("label").lower()
152
+ text = output.get("text")
153
+ if label in result_dict:
154
+ result_dict[label] += " " + text
155
+ else:
156
+ result_dict[label] = text
157
+
158
+ return result_dict
159
+
160
+
161
+ def extract_text(outputs, image_origin):
162
+ for i in range(len(outputs)):
163
+ image = crop_image(image_origin, outputs[i].get("box"))
164
+ text = pytesseract.image_to_string(image)
165
+ outputs[i].update({"text": text})
166
+ if "text" in outputs[i]:
167
+ outputs[i]["text"] += text
168
+ else:
169
+ outputs[i].update({"text": text})
170
+ return extract_text_dict(outputs)
171
+
172
+
173
+ def crop_image(image, box):
174
+
175
+ x1, y1, x2, y2 = map(int, box)
176
+ cropped_image = image[y1:y2, x1:x2]
177
+ return cropped_image
178
+
179
+
180
+ def resize_and_pad(
181
+ image: np.array,
182
+ new_shape: Tuple[int, int],
183
+ padding_color: Tuple[int] = (144, 144, 144),
184
+ ) -> np.array:
185
+ h_org, w_org = image.shape[:2]
186
+ w_new, h_new = new_shape
187
+ padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0
188
+
189
+ # Padding left to right
190
+ if h_org >= w_org:
191
+ img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new))
192
+ h, w = img_resize.shape[:2]
193
+ padd_left = (w_new - w) // 2
194
+ padd_right = w_new - w - padd_left
195
+ ratio = h_new / h_org
196
+
197
+ # Padding top to bottom
198
+ if h_org < w_org:
199
+ img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org)))
200
+ h, w = img_resize.shape[:2]
201
+ padd_top = (h_new - h) // 2
202
+ padd_bottom = h_new - h - padd_top
203
+ ratio = w_new / w_org
204
+
205
+ image = cv2.copyMakeBorder(
206
+ img_resize,
207
+ padd_top,
208
+ padd_bottom,
209
+ padd_left,
210
+ padd_right,
211
+ cv2.BORDER_CONSTANT,
212
+ None,
213
+ value=padding_color,
214
+ )
215
+
216
+ return image, ratio, (padd_left, padd_top)
217
+
218
+
219
+ def unpad_and_resize_boxes(boxes, ratio, left, top):
220
+
221
+ if len(boxes) == 0:
222
+ return boxes
223
+ boxes = np.array(boxes)
224
+ if boxes.ndim == 1:
225
+ boxes = boxes.reshape(-1, 4)
226
+ boxes[:, [0, 2]] -= left
227
+ boxes[:, [1, 3]] -= top
228
+ boxes[:, :4] /= ratio
229
+ if len(boxes) == 1:
230
+ return boxes.flatten().tolist()
231
+ else:
232
+ return boxes.tolist()
233
+
234
+
235
+ def draw_bounding_boxes(image, outputs):
236
+ # Create a copy of the image to draw on
237
+ image_with_boxes = image.copy()
238
+
239
+ # Define a list of colors for the bounding boxes
240
+ label_colors = {
241
+ "Certifications": (255, 0, 0),
242
+ "Community": (0, 255, 0),
243
+ "Contact": (0, 0, 255),
244
+ "Education": (255, 128, 0),
245
+ "Experience": (255, 0, 255),
246
+ "Interests": (128, 128, 128),
247
+ "Languages": (128, 0, 0),
248
+ "Name": (0, 128, 0),
249
+ "Profile": (0, 0, 128),
250
+ "Projects": (128, 128, 0),
251
+ "Skills": (128, 0, 128),
252
+ }
253
+
254
+ # Draw each bounding box and text
255
+ for output in outputs:
256
+ box = output["box"]
257
+ label = output["label"]
258
+ text = output.get("text", "")
259
+
260
+ # Get the color for the label
261
+ color = label_colors.get(
262
+ label, (255, 255, 255)
263
+ ) # Default to white if label not found
264
+
265
+ # Draw the bounding box
266
+ x1, y1, x2, y2 = box
267
+ cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2)
268
+ # Draw the label and text
269
+ cv2.putText(
270
+ image_with_boxes,
271
+ f"{label}",
272
+ (x1, y1 - 10),
273
+ cv2.FONT_ITALIC,
274
+ 2,
275
+ color,
276
+ 2,
277
+ )
278
+ image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)
279
+
280
+ # Convert the OpenCV image (numpy array) to a PIL image
281
+ image_pil = Image.fromarray(image_with_boxes_rgb)
282
+
283
+ return image_pil