Spaces:
Sleeping
Sleeping
[base][abaoxomtieu]: upload folder src
Browse files- app.py +0 -2
- src/config/llm.py +5 -0
- src/data/Screenshot 2024-09-11 080459.png +0 -0
- src/data/page0.jpg +0 -0
- src/data/resume-for-fresher-template-281.jpg +0 -0
- src/data/teacher-resume-example.jpg +0 -0
- src/data/test.jpg +0 -0
- src/inference/segment_inference.py +23 -0
- src/model/segment.onnx +3 -0
- src/notebook/data.yaml +6 -0
- src/notebook/notebook.ipynb +455 -0
- src/notebook/result.jpg +0 -0
- src/notebook/seg.ipynb +86 -0
- src/prompt/promt.py +13 -0
- src/training/segment_training.ipynb.ipynb +1 -0
- src/utils/utils_segment.py +283 -0
app.py
CHANGED
|
@@ -18,8 +18,6 @@ import asyncio
|
|
| 18 |
import os
|
| 19 |
import functools
|
| 20 |
import threading
|
| 21 |
-
import sys
|
| 22 |
-
sys.path.append("./src")
|
| 23 |
load_dotenv()
|
| 24 |
app = FastAPI(docs_url="/")
|
| 25 |
app.add_middleware(
|
|
|
|
| 18 |
import os
|
| 19 |
import functools
|
| 20 |
import threading
|
|
|
|
|
|
|
| 21 |
load_dotenv()
|
| 22 |
app = FastAPI(docs_url="/")
|
| 23 |
app.add_middleware(
|
src/config/llm.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_google_genai import GoogleGenerativeAI
|
| 2 |
+
llm = GoogleGenerativeAI(
|
| 3 |
+
model="gemini-1.5-flash",
|
| 4 |
+
temperature=0,
|
| 5 |
+
)
|
src/data/Screenshot 2024-09-11 080459.png
ADDED
|
src/data/page0.jpg
ADDED
|
src/data/resume-for-fresher-template-281.jpg
ADDED
|
src/data/teacher-resume-example.jpg
ADDED
|
src/data/test.jpg
ADDED
|
src/inference/segment_inference.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import onnxruntime as ort
|
| 2 |
+
from src.utils.utils_segment import preprocess, postprocess
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def inference(image: np.array, model_path, threshold_confidence=0.5, threshold_iou=0.7):
|
| 7 |
+
model = ort.InferenceSession(
|
| 8 |
+
model_path,
|
| 9 |
+
)
|
| 10 |
+
input = preprocess(image)
|
| 11 |
+
outputs = postprocess(
|
| 12 |
+
model.run(None, {"images": input}),
|
| 13 |
+
threshold_confidence=threshold_confidence,
|
| 14 |
+
threshold_iou=threshold_iou,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
return outputs
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
model_path = "../model/segment.onnx"
|
| 22 |
+
image_path = "../../test.jpg"
|
| 23 |
+
print(inference(image_path, model_path))
|
src/model/segment.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e38660d6cb501bc21d33249f1e1dffd9038d57a82f27bd089746e1aa8eca53a9
|
| 3 |
+
size 109150130
|
src/notebook/data.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train: /cvparsing-2/train/images
|
| 2 |
+
val: /cvparsing-2/valid/images
|
| 3 |
+
test: /cvparsing-2/test/images
|
| 4 |
+
|
| 5 |
+
nc: 14
|
| 6 |
+
names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']
|
src/notebook/notebook.ipynb
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 12,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [
|
| 8 |
+
{
|
| 9 |
+
"name": "stdout",
|
| 10 |
+
"output_type": "stream",
|
| 11 |
+
"text": [
|
| 12 |
+
"loading Roboflow workspace...\n",
|
| 13 |
+
"loading Roboflow project...\n"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"name": "stderr",
|
| 18 |
+
"output_type": "stream",
|
| 19 |
+
"text": [
|
| 20 |
+
"Downloading Dataset Version Zip in cvparsing-2 to yolov9:: 100%|██████████| 63864/63864 [00:04<00:00, 15236.33it/s]"
|
| 21 |
+
]
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"name": "stdout",
|
| 25 |
+
"output_type": "stream",
|
| 26 |
+
"text": [
|
| 27 |
+
"\n"
|
| 28 |
+
]
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"name": "stderr",
|
| 32 |
+
"output_type": "stream",
|
| 33 |
+
"text": [
|
| 34 |
+
"\n",
|
| 35 |
+
"Extracting Dataset Version Zip to cvparsing-2 in yolov9:: 100%|██████████| 2344/2344 [00:00<00:00, 5118.00it/s]\n"
|
| 36 |
+
]
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"source": [
|
| 40 |
+
"!pip install roboflow\n",
|
| 41 |
+
"\n",
|
| 42 |
+
"from roboflow import Roboflow\n",
|
| 43 |
+
"rf = Roboflow(api_key=\"ZvM6LUyWI7hiVw6K64bt\")\n",
|
| 44 |
+
"project = rf.workspace(\"capitaletech-wrnth\").project(\"annotation-moxcs\")\n",
|
| 45 |
+
"version = project.version(2)\n",
|
| 46 |
+
"dataset = version.download(\"yolov8\")\n",
|
| 47 |
+
" "
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"cell_type": "code",
|
| 52 |
+
"execution_count": 1,
|
| 53 |
+
"metadata": {},
|
| 54 |
+
"outputs": [
|
| 55 |
+
{
|
| 56 |
+
"name": "stdout",
|
| 57 |
+
"output_type": "stream",
|
| 58 |
+
"text": [
|
| 59 |
+
"Requirement already satisfied: ultralytics in d:\\fu\\dat\\.venv\\lib\\site-packages (8.2.90)\n",
|
| 60 |
+
"Requirement already satisfied: numpy<2.0.0,>=1.23.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (1.26.4)\n",
|
| 61 |
+
"Requirement already satisfied: matplotlib>=3.3.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (3.9.2)\n",
|
| 62 |
+
"Requirement already satisfied: opencv-python>=4.6.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (4.10.0.84)\n",
|
| 63 |
+
"Requirement already satisfied: pillow>=7.1.2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (10.4.0)\n",
|
| 64 |
+
"Requirement already satisfied: pyyaml>=5.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (6.0.2)\n",
|
| 65 |
+
"Requirement already satisfied: requests>=2.23.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.32.3)\n",
|
| 66 |
+
"Requirement already satisfied: scipy>=1.4.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (1.14.1)\n",
|
| 67 |
+
"Requirement already satisfied: torch>=1.8.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.4.1)\n",
|
| 68 |
+
"Requirement already satisfied: torchvision>=0.9.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (0.19.1)\n",
|
| 69 |
+
"Requirement already satisfied: tqdm>=4.64.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (4.66.5)\n",
|
| 70 |
+
"Requirement already satisfied: psutil in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (6.0.0)\n",
|
| 71 |
+
"Requirement already satisfied: py-cpuinfo in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (9.0.0)\n",
|
| 72 |
+
"Requirement already satisfied: pandas>=1.1.4 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.2.2)\n",
|
| 73 |
+
"Requirement already satisfied: seaborn>=0.11.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (0.13.2)\n",
|
| 74 |
+
"Requirement already satisfied: ultralytics-thop>=2.0.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from ultralytics) (2.0.6)\n",
|
| 75 |
+
"Requirement already satisfied: contourpy>=1.0.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.3.0)\n",
|
| 76 |
+
"Requirement already satisfied: cycler>=0.10 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (0.12.1)\n",
|
| 77 |
+
"Requirement already satisfied: fonttools>=4.22.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (4.53.1)\n",
|
| 78 |
+
"Requirement already satisfied: kiwisolver>=1.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.7)\n",
|
| 79 |
+
"Requirement already satisfied: packaging>=20.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (24.1)\n",
|
| 80 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (3.1.4)\n",
|
| 81 |
+
"Requirement already satisfied: python-dateutil>=2.7 in d:\\fu\\dat\\.venv\\lib\\site-packages (from matplotlib>=3.3.0->ultralytics) (2.9.0.post0)\n",
|
| 82 |
+
"Requirement already satisfied: pytz>=2020.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2024.1)\n",
|
| 83 |
+
"Requirement already satisfied: tzdata>=2022.7 in d:\\fu\\dat\\.venv\\lib\\site-packages (from pandas>=1.1.4->ultralytics) (2024.1)\n",
|
| 84 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.3.2)\n",
|
| 85 |
+
"Requirement already satisfied: idna<4,>=2.5 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (3.7)\n",
|
| 86 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2.2.2)\n",
|
| 87 |
+
"Requirement already satisfied: certifi>=2017.4.17 in d:\\fu\\dat\\.venv\\lib\\site-packages (from requests>=2.23.0->ultralytics) (2024.8.30)\n",
|
| 88 |
+
"Requirement already satisfied: filelock in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.16.0)\n",
|
| 89 |
+
"Requirement already satisfied: typing-extensions>=4.8.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (4.12.2)\n",
|
| 90 |
+
"Requirement already satisfied: sympy in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (1.13.2)\n",
|
| 91 |
+
"Requirement already satisfied: networkx in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.3)\n",
|
| 92 |
+
"Requirement already satisfied: jinja2 in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (3.1.4)\n",
|
| 93 |
+
"Requirement already satisfied: fsspec in d:\\fu\\dat\\.venv\\lib\\site-packages (from torch>=1.8.0->ultralytics) (2024.9.0)\n",
|
| 94 |
+
"Requirement already satisfied: colorama in d:\\fu\\dat\\.venv\\lib\\site-packages (from tqdm>=4.64.0->ultralytics) (0.4.6)\n",
|
| 95 |
+
"Requirement already satisfied: six>=1.5 in d:\\fu\\dat\\.venv\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.16.0)\n",
|
| 96 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from jinja2->torch>=1.8.0->ultralytics) (2.1.5)\n",
|
| 97 |
+
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in d:\\fu\\dat\\.venv\\lib\\site-packages (from sympy->torch>=1.8.0->ultralytics) (1.3.0)\n"
|
| 98 |
+
]
|
| 99 |
+
}
|
| 100 |
+
],
|
| 101 |
+
"source": [
|
| 102 |
+
"!pip install ultralytics"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"execution_count": 6,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [],
|
| 110 |
+
"source": [
|
| 111 |
+
"yaml_text = \"\"\"train: /cvparsing-2/train/images\n",
|
| 112 |
+
"val: /cvparsing-2/valid/images\n",
|
| 113 |
+
"test: /cvparsing-2/test/images\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"nc: 14\n",
|
| 116 |
+
"names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']\"\"\"\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"with open(\"./data.yaml\", 'w') as file:\n",
|
| 119 |
+
" file.write(yaml_text),\n",
|
| 120 |
+
"\n",
|
| 121 |
+
"# To display the content of the file, you can use the 'cat' command like this:\n",
|
| 122 |
+
"# %cat /kaggle/working/data.yaml\n"
|
| 123 |
+
]
|
| 124 |
+
},
|
| 125 |
+
{
|
| 126 |
+
"cell_type": "code",
|
| 127 |
+
"execution_count": 20,
|
| 128 |
+
"metadata": {},
|
| 129 |
+
"outputs": [],
|
| 130 |
+
"source": [
|
| 131 |
+
"!yolo train model=yolov9c.yaml data=D:/FU/DAT/src/notebook/datasets/data.yaml epochs=100 imgsz=640 device=0"
|
| 132 |
+
]
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"cell_type": "code",
|
| 136 |
+
"execution_count": 1,
|
| 137 |
+
"metadata": {},
|
| 138 |
+
"outputs": [
|
| 139 |
+
{
|
| 140 |
+
"name": "stdout",
|
| 141 |
+
"output_type": "stream",
|
| 142 |
+
"text": [
|
| 143 |
+
"Ultralytics YOLOv8.2.90 Python-3.11.9 torch-2.4.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 6140MiB)\n",
|
| 144 |
+
"Setup complete (20 CPUs, 15.7 GB RAM, 33.9/97.7 GB disk)\n"
|
| 145 |
+
]
|
| 146 |
+
}
|
| 147 |
+
],
|
| 148 |
+
"source": [
|
| 149 |
+
"# %pip install ultralytics\n",
|
| 150 |
+
"import ultralytics\n",
|
| 151 |
+
"ultralytics.checks()"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"cell_type": "code",
|
| 156 |
+
"execution_count": 1,
|
| 157 |
+
"metadata": {},
|
| 158 |
+
"outputs": [
|
| 159 |
+
{
|
| 160 |
+
"name": "stderr",
|
| 161 |
+
"output_type": "stream",
|
| 162 |
+
"text": [
|
| 163 |
+
"d:\\FU\\DAT\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 164 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 165 |
+
"d:\\FU\\DAT\\.venv\\Lib\\site-packages\\huggingface_hub\\file_download.py:159: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\htbqn\\.cache\\huggingface\\hub\\models--microsoft--trocr-base-handwritten. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
|
| 166 |
+
"To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
|
| 167 |
+
" warnings.warn(message)\n",
|
| 168 |
+
"d:\\FU\\DAT\\.venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
|
| 169 |
+
" warnings.warn(\n",
|
| 170 |
+
"Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']\n",
|
| 171 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
| 172 |
+
"d:\\FU\\DAT\\.venv\\Lib\\site-packages\\transformers\\generation\\utils.py:1258: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
|
| 173 |
+
" warnings.warn(\n"
|
| 174 |
+
]
|
| 175 |
+
}
|
| 176 |
+
],
|
| 177 |
+
"source": [
|
| 178 |
+
"from transformers import TrOCRProcessor, VisionEncoderDecoderModel\n",
|
| 179 |
+
"from PIL import Image\n",
|
| 180 |
+
"import requests\n",
|
| 181 |
+
"\n",
|
| 182 |
+
"# load image from the IAM database\n",
|
| 183 |
+
"# url = 'https://fki.tic.heia-fr.ch/static/img/a01-122-02-00.jpg'\n",
|
| 184 |
+
"image = Image.open(r'./images.png').convert(\"RGB\")\n",
|
| 185 |
+
"\n",
|
| 186 |
+
"processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')\n",
|
| 187 |
+
"model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')\n",
|
| 188 |
+
"pixel_values = processor(images=image, return_tensors=\"pt\").pixel_values\n",
|
| 189 |
+
"\n",
|
| 190 |
+
"generated_ids = model.generate(pixel_values)\n",
|
| 191 |
+
"generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": 3,
|
| 197 |
+
"metadata": {},
|
| 198 |
+
"outputs": [
|
| 199 |
+
{
|
| 200 |
+
"data": {
|
| 201 |
+
"text/plain": [
|
| 202 |
+
"tensor([[ 2, 288, 321, 2]])"
|
| 203 |
+
]
|
| 204 |
+
},
|
| 205 |
+
"execution_count": 3,
|
| 206 |
+
"metadata": {},
|
| 207 |
+
"output_type": "execute_result"
|
| 208 |
+
}
|
| 209 |
+
],
|
| 210 |
+
"source": [
|
| 211 |
+
"generated_ids"
|
| 212 |
+
]
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"cell_type": "code",
|
| 216 |
+
"execution_count": 6,
|
| 217 |
+
"metadata": {},
|
| 218 |
+
"outputs": [],
|
| 219 |
+
"source": [
|
| 220 |
+
"import onnxruntime as ort\n",
|
| 221 |
+
"import numpy as np\n",
|
| 222 |
+
"from PIL import Image\n",
|
| 223 |
+
"\n",
|
| 224 |
+
"# Load the ONNX model\n",
|
| 225 |
+
"model_path = \"../model/section_detection.onnx\"\n",
|
| 226 |
+
"session = ort.InferenceSession(model_path)\n",
|
| 227 |
+
"\n",
|
| 228 |
+
"# Load and preprocess the image\n",
|
| 229 |
+
"image_path = 'D:/FU/DAT/src/notebook/datasets/train/images/1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg'\n",
|
| 230 |
+
"image = Image.open(image_path)\n",
|
| 231 |
+
"image_data = np.array(image).astype('float32').transpose(2, 0, 1)\n"
|
| 232 |
+
]
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"cell_type": "code",
|
| 236 |
+
"execution_count": 7,
|
| 237 |
+
"metadata": {},
|
| 238 |
+
"outputs": [
|
| 239 |
+
{
|
| 240 |
+
"data": {
|
| 241 |
+
"text/plain": [
|
| 242 |
+
"(1, 3, 640, 640)"
|
| 243 |
+
]
|
| 244 |
+
},
|
| 245 |
+
"execution_count": 7,
|
| 246 |
+
"metadata": {},
|
| 247 |
+
"output_type": "execute_result"
|
| 248 |
+
}
|
| 249 |
+
],
|
| 250 |
+
"source": [
|
| 251 |
+
"image_data = np.expand_dims(image_data, axis=0)\n",
|
| 252 |
+
"image_data.shape"
|
| 253 |
+
]
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"cell_type": "code",
|
| 257 |
+
"execution_count": 8,
|
| 258 |
+
"metadata": {},
|
| 259 |
+
"outputs": [],
|
| 260 |
+
"source": [
|
| 261 |
+
"input_name = session.get_inputs()[0].name\n",
|
| 262 |
+
"output_name = session.get_outputs()[0].name"
|
| 263 |
+
]
|
| 264 |
+
},
|
| 265 |
+
{
|
| 266 |
+
"cell_type": "code",
|
| 267 |
+
"execution_count": 9,
|
| 268 |
+
"metadata": {},
|
| 269 |
+
"outputs": [],
|
| 270 |
+
"source": [
|
| 271 |
+
"result = session.run([output_name], {input_name: image_data})[0]"
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"cell_type": "code",
|
| 276 |
+
"execution_count": 33,
|
| 277 |
+
"metadata": {},
|
| 278 |
+
"outputs": [
|
| 279 |
+
{
|
| 280 |
+
"data": {
|
| 281 |
+
"text/plain": [
|
| 282 |
+
"(18, 8400)"
|
| 283 |
+
]
|
| 284 |
+
},
|
| 285 |
+
"execution_count": 33,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"output_type": "execute_result"
|
| 288 |
+
}
|
| 289 |
+
],
|
| 290 |
+
"source": [
|
| 291 |
+
"result[0].shape"
|
| 292 |
+
]
|
| 293 |
+
},
|
| 294 |
+
{
|
| 295 |
+
"cell_type": "code",
|
| 296 |
+
"execution_count": null,
|
| 297 |
+
"metadata": {},
|
| 298 |
+
"outputs": [],
|
| 299 |
+
"source": []
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "code",
|
| 303 |
+
"execution_count": 10,
|
| 304 |
+
"metadata": {},
|
| 305 |
+
"outputs": [
|
| 306 |
+
{
|
| 307 |
+
"name": "stdout",
|
| 308 |
+
"output_type": "stream",
|
| 309 |
+
"text": [
|
| 310 |
+
"WARNING Unable to automatically guess model task, assuming 'task=detect'. Explicitly define task for your model, i.e. 'task=detect', 'segment', 'classify','pose' or 'obb'.\n",
|
| 311 |
+
"Loading ..\\model\\section_detection.onnx for ONNX Runtime inference...\n",
|
| 312 |
+
"\n",
|
| 313 |
+
"image 1/1 D:\\FU\\DAT\\src\\notebook\\datasets\\train\\images\\1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg: 640x640 2 Achievements, 147.6ms\n",
|
| 314 |
+
"Speed: 2.5ms preprocess, 147.6ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 640)\n"
|
| 315 |
+
]
|
| 316 |
+
}
|
| 317 |
+
],
|
| 318 |
+
"source": [
|
| 319 |
+
"from ultralytics import YOLO\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"# Load the YOLOv8 model'\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"# Load the exported ONNX model\n",
|
| 324 |
+
"onnx_model = YOLO(\"../model/section_detection.onnx\")\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"# Run inference\n",
|
| 327 |
+
"results = onnx_model(\"D:/FU/DAT/src/notebook/datasets/train/images/1629756071561_jpg.rf.05f192117b5f0f8125474abdf3392f72.jpg\")"
|
| 328 |
+
]
|
| 329 |
+
},
|
| 330 |
+
{
|
| 331 |
+
"cell_type": "code",
|
| 332 |
+
"execution_count": 11,
|
| 333 |
+
"metadata": {},
|
| 334 |
+
"outputs": [],
|
| 335 |
+
"source": [
|
| 336 |
+
"for result in results:\n",
|
| 337 |
+
" boxes = result.boxes # Boxes object for bounding box outputs\n",
|
| 338 |
+
" masks = result.masks # Masks object for segmentation masks outputs\n",
|
| 339 |
+
" keypoints = result.keypoints # Keypoints object for pose outputs\n",
|
| 340 |
+
" probs = result.probs # Probs object for classification outputs\n",
|
| 341 |
+
" obb = result.obb # Oriented boxes object for OBB outputs\n",
|
| 342 |
+
" result.show() # display to screen\n",
|
| 343 |
+
" result.save(filename=\"result.jpg\") # save to disk"
|
| 344 |
+
]
|
| 345 |
+
},
|
| 346 |
+
{
|
| 347 |
+
"cell_type": "code",
|
| 348 |
+
"execution_count": 2,
|
| 349 |
+
"metadata": {},
|
| 350 |
+
"outputs": [
|
| 351 |
+
{
|
| 352 |
+
"name": "stdout",
|
| 353 |
+
"output_type": "stream",
|
| 354 |
+
"text": [
|
| 355 |
+
"\n",
|
| 356 |
+
"You are already logged into Roboflow. To make a different login,run roboflow.login(force=True).\n"
|
| 357 |
+
]
|
| 358 |
+
}
|
| 359 |
+
],
|
| 360 |
+
"source": [
|
| 361 |
+
"!roboflow login"
|
| 362 |
+
]
|
| 363 |
+
},
|
| 364 |
+
{
|
| 365 |
+
"cell_type": "code",
|
| 366 |
+
"execution_count": 4,
|
| 367 |
+
"metadata": {},
|
| 368 |
+
"outputs": [
|
| 369 |
+
{
|
| 370 |
+
"ename": "RoboflowAPINotAuthorizedError",
|
| 371 |
+
"evalue": "Unauthorized access to roboflow API - check API key. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one.",
|
| 372 |
+
"output_type": "error",
|
| 373 |
+
"traceback": [
|
| 374 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
| 375 |
+
"\u001b[1;31mHTTPError\u001b[0m Traceback (most recent call last)",
|
| 376 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:80\u001b[0m, in \u001b[0;36mwrap_roboflow_api_errors.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 81\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mConnectionError, \u001b[38;5;167;01mConnectionError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m error:\n",
|
| 377 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:227\u001b[0m, in \u001b[0;36mget_roboflow_model_data\u001b[1;34m(api_key, model_id, endpoint_type, device_id)\u001b[0m\n\u001b[0;32m 223\u001b[0m api_url \u001b[38;5;241m=\u001b[39m _add_params_to_url(\n\u001b[0;32m 224\u001b[0m url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAPI_BASE_URL\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mendpoint_type\u001b[38;5;241m.\u001b[39mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 225\u001b[0m params\u001b[38;5;241m=\u001b[39mparams,\n\u001b[0;32m 226\u001b[0m )\n\u001b[1;32m--> 227\u001b[0m api_data \u001b[38;5;241m=\u001b[39m \u001b[43m_get_from_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_url\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 228\u001b[0m cache\u001b[38;5;241m.\u001b[39mset(\n\u001b[0;32m 229\u001b[0m api_data_cache_key,\n\u001b[0;32m 230\u001b[0m api_data,\n\u001b[0;32m 231\u001b[0m expire\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[0;32m 232\u001b[0m )\n",
|
| 378 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:473\u001b[0m, in \u001b[0;36m_get_from_url\u001b[1;34m(url, json_response)\u001b[0m\n\u001b[0;32m 472\u001b[0m response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(wrap_url(url))\n\u001b[1;32m--> 473\u001b[0m \u001b[43mapi_key_safe_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 474\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m json_response:\n",
|
| 379 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\utils\\requests.py:15\u001b[0m, in \u001b[0;36mapi_key_safe_raise_for_status\u001b[1;34m(response)\u001b[0m\n\u001b[0;32m 14\u001b[0m response\u001b[38;5;241m.\u001b[39murl \u001b[38;5;241m=\u001b[39m API_KEY_PATTERN\u001b[38;5;241m.\u001b[39msub(deduct_api_key, response\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m---> 15\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
| 380 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\requests\\models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[1;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
|
| 381 |
+
"\u001b[1;31mHTTPError\u001b[0m: 401 Client Error: Unauthorized for url: https://api.roboflow.com/ort/annotation-moxcs/2?nocache=true&device=ABAOXOMTIEU&dynamic=true",
|
| 382 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
| 383 |
+
"\u001b[1;31mRoboflowAPINotAuthorizedError\u001b[0m Traceback (most recent call last)",
|
| 384 |
+
"Cell \u001b[1;32mIn[4], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m image \u001b[38;5;241m=\u001b[39m cv2\u001b[38;5;241m.\u001b[39mimread(image_file)\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# load a pre-trained yolov8n model\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mget_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mannotation-moxcs/2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 12\u001b[0m \u001b[38;5;66;03m# run inference on our chosen image, image can be a url, a numpy array, a PIL image, etc.\u001b[39;00m\n\u001b[0;32m 13\u001b[0m results \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39minfer(image)[\u001b[38;5;241m0\u001b[39m]\n",
|
| 385 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\models\\utils.py:275\u001b[0m, in \u001b[0;36mget_model\u001b[1;34m(model_id, api_key, **kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_model\u001b[39m(model_id, api_key\u001b[38;5;241m=\u001b[39mAPI_KEY, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Model:\n\u001b[1;32m--> 275\u001b[0m task, model \u001b[38;5;241m=\u001b[39m \u001b[43mget_model_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 276\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ROBOFLOW_MODEL_TYPES[(task, model)](model_id, api_key\u001b[38;5;241m=\u001b[39mapi_key, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
| 386 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\registries\\roboflow.py:115\u001b[0m, in \u001b[0;36mget_model_type\u001b[1;34m(model_id, api_key)\u001b[0m\n\u001b[0;32m 108\u001b[0m save_model_metadata_in_cache(\n\u001b[0;32m 109\u001b[0m dataset_id\u001b[38;5;241m=\u001b[39mdataset_id,\n\u001b[0;32m 110\u001b[0m version_id\u001b[38;5;241m=\u001b[39mversion_id,\n\u001b[0;32m 111\u001b[0m project_task_type\u001b[38;5;241m=\u001b[39mproject_task_type,\n\u001b[0;32m 112\u001b[0m model_type\u001b[38;5;241m=\u001b[39mmodel_type,\n\u001b[0;32m 113\u001b[0m )\n\u001b[0;32m 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m project_task_type, model_type\n\u001b[1;32m--> 115\u001b[0m api_data \u001b[38;5;241m=\u001b[39m \u001b[43mget_roboflow_model_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mModelEndpointType\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mORT\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mGLOBAL_DEVICE_ID\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 120\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mort\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 121\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_data \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 122\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ModelArtefactError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError loading model artifacts from Roboflow API.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
| 387 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:93\u001b[0m, in \u001b[0;36mwrap_roboflow_api_errors.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 91\u001b[0m error_handler \u001b[38;5;241m=\u001b[39m user_handler_override\u001b[38;5;241m.\u001b[39mget(status_code, default_handler)\n\u001b[0;32m 92\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m error_handler \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 93\u001b[0m \u001b[43merror_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 94\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RoboflowAPIUnsuccessfulRequestError(\n\u001b[0;32m 95\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsuccessful request to Roboflow API with response code: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 96\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merror\u001b[39;00m\n\u001b[0;32m 97\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mInvalidJSONError \u001b[38;5;28;01mas\u001b[39;00m error:\n",
|
| 388 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:60\u001b[0m, in \u001b[0;36m<lambda>\u001b[1;34m(e)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_from_lambda\u001b[39m(\n\u001b[0;32m 54\u001b[0m inner_error: \u001b[38;5;167;01mException\u001b[39;00m, exception_type: Type[\u001b[38;5;167;01mException\u001b[39;00m], message: \u001b[38;5;28mstr\u001b[39m\n\u001b[0;32m 55\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 56\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_type(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_error\u001b[39;00m\n\u001b[0;32m 59\u001b[0m DEFAULT_ERROR_HANDLERS \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m---> 60\u001b[0m \u001b[38;5;241m401\u001b[39m: \u001b[38;5;28;01mlambda\u001b[39;00m e: \u001b[43mraise_from_lambda\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43mRoboflowAPINotAuthorizedError\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUnauthorized access to roboflow API - check API key. Visit \u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[0;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhttps://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[0;32m 66\u001b[0m \u001b[38;5;241m404\u001b[39m: \u001b[38;5;28;01mlambda\u001b[39;00m e: raise_from_lambda(\n\u001b[0;32m 67\u001b[0m e, RoboflowAPINotNotFoundError, NOT_FOUND_ERROR_MESSAGE\n\u001b[0;32m 68\u001b[0m ),\n\u001b[0;32m 69\u001b[0m }\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap_roboflow_api_errors\u001b[39m(\n\u001b[0;32m 73\u001b[0m http_errors_handlers: Optional[\n\u001b[0;32m 74\u001b[0m Dict[\u001b[38;5;28mint\u001b[39m, Callable[[Union[requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mHTTPError]], \u001b[38;5;28;01mNone\u001b[39;00m]]\n\u001b[0;32m 75\u001b[0m ] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m 76\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mcallable\u001b[39m:\n\u001b[0;32m 77\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorator\u001b[39m(function: \u001b[38;5;28mcallable\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mcallable\u001b[39m:\n",
|
| 389 |
+
"File \u001b[1;32mc:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\inference\\core\\roboflow_api.py:56\u001b[0m, in \u001b[0;36mraise_from_lambda\u001b[1;34m(inner_error, exception_type, message)\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mraise_from_lambda\u001b[39m(\n\u001b[0;32m 54\u001b[0m inner_error: \u001b[38;5;167;01mException\u001b[39;00m, exception_type: Type[\u001b[38;5;167;01mException\u001b[39;00m], message: \u001b[38;5;28mstr\u001b[39m\n\u001b[0;32m 55\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 56\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exception_type(message) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minner_error\u001b[39;00m\n",
|
| 390 |
+
"\u001b[1;31mRoboflowAPINotAuthorizedError\u001b[0m: Unauthorized access to roboflow API - check API key. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key to learn how to retrieve one."
|
| 391 |
+
]
|
| 392 |
+
}
|
| 393 |
+
],
|
| 394 |
+
"source": [
|
| 395 |
+
"from inference import get_model\n",
|
| 396 |
+
"import supervision as sv\n",
|
| 397 |
+
"import cv2\n",
|
| 398 |
+
"\n",
|
| 399 |
+
"# define the image url to use for inference\n",
|
| 400 |
+
"image_file = \"taylor-swift-album-1989.jpeg\"\n",
|
| 401 |
+
"image = cv2.imread(image_file)\n",
|
| 402 |
+
"\n",
|
| 403 |
+
"# load a pre-trained yolov8n model\n",
|
| 404 |
+
"model = get_model(model_id=\"annotation-moxcs/2\")\n",
|
| 405 |
+
"\n",
|
| 406 |
+
"# run inference on our chosen image, image can be a url, a numpy array, a PIL image, etc.\n",
|
| 407 |
+
"results = model.infer(image)[0]\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"# load the results into the supervision Detections api\n",
|
| 410 |
+
"detections = sv.Detections.from_inference(results)\n",
|
| 411 |
+
"\n",
|
| 412 |
+
"# create supervision annotators\n",
|
| 413 |
+
"bounding_box_annotator = sv.BoundingBoxAnnotator()\n",
|
| 414 |
+
"label_annotator = sv.LabelAnnotator()\n",
|
| 415 |
+
"\n",
|
| 416 |
+
"# annotate the image with our inference results\n",
|
| 417 |
+
"annotated_image = bounding_box_annotator.annotate(\n",
|
| 418 |
+
" scene=image, detections=detections)\n",
|
| 419 |
+
"annotated_image = label_annotator.annotate(\n",
|
| 420 |
+
" scene=annotated_image, detections=detections)\n",
|
| 421 |
+
"\n",
|
| 422 |
+
"# display the image\n",
|
| 423 |
+
"sv.plot_image(annotated_image)"
|
| 424 |
+
]
|
| 425 |
+
},
|
| 426 |
+
{
|
| 427 |
+
"cell_type": "code",
|
| 428 |
+
"execution_count": null,
|
| 429 |
+
"metadata": {},
|
| 430 |
+
"outputs": [],
|
| 431 |
+
"source": []
|
| 432 |
+
}
|
| 433 |
+
],
|
| 434 |
+
"metadata": {
|
| 435 |
+
"kernelspec": {
|
| 436 |
+
"display_name": ".venv",
|
| 437 |
+
"language": "python",
|
| 438 |
+
"name": "python3"
|
| 439 |
+
},
|
| 440 |
+
"language_info": {
|
| 441 |
+
"codemirror_mode": {
|
| 442 |
+
"name": "ipython",
|
| 443 |
+
"version": 3
|
| 444 |
+
},
|
| 445 |
+
"file_extension": ".py",
|
| 446 |
+
"mimetype": "text/x-python",
|
| 447 |
+
"name": "python",
|
| 448 |
+
"nbconvert_exporter": "python",
|
| 449 |
+
"pygments_lexer": "ipython3",
|
| 450 |
+
"version": "3.11.9"
|
| 451 |
+
}
|
| 452 |
+
},
|
| 453 |
+
"nbformat": 4,
|
| 454 |
+
"nbformat_minor": 2
|
| 455 |
+
}
|
src/notebook/result.jpg
ADDED
|
src/notebook/seg.ipynb
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"from PIL import Image \n",
|
| 10 |
+
"from pytesseract import pytesseract "
|
| 11 |
+
]
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"execution_count": 6,
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"outputs": [
|
| 18 |
+
{
|
| 19 |
+
"name": "stdout",
|
| 20 |
+
"output_type": "stream",
|
| 21 |
+
"text": [
|
| 22 |
+
"It was the best of\n",
|
| 23 |
+
"times, it was the worst\n",
|
| 24 |
+
"of times, it was the age\n",
|
| 25 |
+
"of wisdom, it was the\n",
|
| 26 |
+
"age of foolishness...\n"
|
| 27 |
+
]
|
| 28 |
+
}
|
| 29 |
+
],
|
| 30 |
+
"source": [
|
| 31 |
+
"from PIL import Image \n",
|
| 32 |
+
"from pytesseract import pytesseract \n",
|
| 33 |
+
" \n",
|
| 34 |
+
"path_to_tesseract = r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\"\n",
|
| 35 |
+
"image_path = r\"./images.png\"\n",
|
| 36 |
+
"\n",
|
| 37 |
+
"img = Image.open(image_path) \n",
|
| 38 |
+
"\n",
|
| 39 |
+
"\n",
|
| 40 |
+
"pytesseract.tesseract_cmd = path_to_tesseract \n",
|
| 41 |
+
"\n",
|
| 42 |
+
"\n",
|
| 43 |
+
"text = pytesseract.image_to_string(img) \n",
|
| 44 |
+
"\n",
|
| 45 |
+
"print(text[:-1])\n"
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "markdown",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": [
|
| 52 |
+
"# test"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": null,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [],
|
| 60 |
+
"source": [
|
| 61 |
+
"from "
|
| 62 |
+
]
|
| 63 |
+
}
|
| 64 |
+
],
|
| 65 |
+
"metadata": {
|
| 66 |
+
"kernelspec": {
|
| 67 |
+
"display_name": "Python 3",
|
| 68 |
+
"language": "python",
|
| 69 |
+
"name": "python3"
|
| 70 |
+
},
|
| 71 |
+
"language_info": {
|
| 72 |
+
"codemirror_mode": {
|
| 73 |
+
"name": "ipython",
|
| 74 |
+
"version": 3
|
| 75 |
+
},
|
| 76 |
+
"file_extension": ".py",
|
| 77 |
+
"mimetype": "text/x-python",
|
| 78 |
+
"name": "python",
|
| 79 |
+
"nbconvert_exporter": "python",
|
| 80 |
+
"pygments_lexer": "ipython3",
|
| 81 |
+
"version": "3.11.9"
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"nbformat": 4,
|
| 85 |
+
"nbformat_minor": 2
|
| 86 |
+
}
|
src/prompt/promt.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
format_prompt = """
|
| 2 |
+
#Role: You are an expert at correcting spelling errors from interviewee's resume information.
|
| 3 |
+
#Instruction:
|
| 4 |
+
You are provided with a dictionary containing information from the user's resume by an OCR model. It may have misspellings or wrong entries.
|
| 5 |
+
Please correct the spelling of each field.
|
| 6 |
+
Move the content of the fields to more appropriate fields if necessary.
|
| 7 |
+
You must not fabricate information and create new information.
|
| 8 |
+
|
| 9 |
+
You must return JSON containing the same format as the original format:
|
| 10 |
+
|
| 11 |
+
#Input:
|
| 12 |
+
My resume is as follows: {input}
|
| 13 |
+
"""
|
src/training/segment_training.ipynb.ipynb
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2024-09-08T06:52:15.629296Z","iopub.status.busy":"2024-09-08T06:52:15.628873Z","iopub.status.idle":"2024-09-08T06:52:35.266264Z","shell.execute_reply":"2024-09-08T06:52:35.265350Z","shell.execute_reply.started":"2024-09-08T06:52:15.629258Z"},"trusted":true},"outputs":[],"source":["!pip install roboflow\n","\n","from roboflow import Roboflow\n","rf = Roboflow(api_key=\"ZvM6LUyWI7hiVw6K64bt\")\n","project = rf.workspace(\"capitaletech-wrnth\").project(\"annotation-moxcs\")\n","version = project.version(2)\n","dataset = version.download(\"yolov8\")\n"," "]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:52:38.656706Z","iopub.status.busy":"2024-09-08T06:52:38.655712Z","iopub.status.idle":"2024-09-08T06:52:38.661352Z","shell.execute_reply":"2024-09-08T06:52:38.660330Z","shell.execute_reply.started":"2024-09-08T06:52:38.656651Z"},"trusted":true},"outputs":[],"source":["import os\n","os.environ[\"WANDB_DISABLED\"] = \"true\""]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:52:39.882023Z","iopub.status.busy":"2024-09-08T06:52:39.881309Z","iopub.status.idle":"2024-09-08T06:52:54.091799Z","shell.execute_reply":"2024-09-08T06:52:54.090833Z","shell.execute_reply.started":"2024-09-08T06:52:39.881985Z"},"trusted":true},"outputs":[],"source":["!pip install ultralytics==8.0.28"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:56:25.315009Z","iopub.status.busy":"2024-09-08T06:56:25.314467Z","iopub.status.idle":"2024-09-08T06:56:25.322391Z","shell.execute_reply":"2024-09-08T06:56:25.321282Z","shell.execute_reply.started":"2024-09-08T06:56:25.314961Z"},"trusted":true},"outputs":[],"source":["yaml_text = \"\"\"train: /kaggle/working/annotation-2/train\n","val: /kaggle/working/annotation-2/valid\n","test: /kaggle/working/annotation-2/test\n","\n","names:\n","- Certifications\n","- Community\n","- Contact\n","- Education\n","- Experience\n","- Interests\n","- Languages\n","- Name\n","- Profil\n","- Projects\n","- skills\n","nc: 11\n","roboflow:\n"," license: CC BY 4.0\n"," project: annotation-moxcs\n"," url: https://universe.roboflow.com/capitaletech-wrnth/annotation-moxcs/dataset/2\n"," version: 2\n"," workspace: capitaletech-wrnth\n","\"\"\"\n","with open(\"./data.yaml\", 'w') as file:\n"," file.write(yaml_text),\n"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:56:26.844555Z","iopub.status.busy":"2024-09-08T06:56:26.844168Z","iopub.status.idle":"2024-09-08T09:22:07.888695Z","shell.execute_reply":"2024-09-08T09:22:07.887404Z","shell.execute_reply.started":"2024-09-08T06:56:26.844520Z"},"trusted":true},"outputs":[],"source":["!yolo task=segment mode=train model=yolov8m-seg.pt data=/kaggle/working/data.yaml epochs=100 imgsz=640\n","# !ls {HOME}/runs/segment/train/\n"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T09:22:07.892085Z","iopub.status.busy":"2024-09-08T09:22:07.891230Z","iopub.status.idle":"2024-09-08T09:22:25.963401Z","shell.execute_reply":"2024-09-08T09:22:25.962222Z","shell.execute_reply.started":"2024-09-08T09:22:07.892036Z"},"trusted":true},"outputs":[],"source":["!yolo export model=/kaggle/working/runs/segment/train4/weights/best.pt format=onnx "]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-09-08T06:53:10.988929Z","iopub.status.busy":"2024-09-08T06:53:10.988489Z","iopub.status.idle":"2024-09-08T06:53:11.045752Z","shell.execute_reply":"2024-09-08T06:53:11.044376Z","shell.execute_reply.started":"2024-09-08T06:53:10.988883Z"},"trusted":true},"outputs":[],"source":["from PIL import Image\n","Image(filename=f'/kaggle/working/runs/segment/train/train_batch0.jpg', width=600)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30762,"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"}},"nbformat":4,"nbformat_minor":4}
|
src/utils/utils_segment.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
+
from typing import Tuple
|
| 5 |
+
from pytesseract import pytesseract
|
| 6 |
+
|
| 7 |
+
# path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 8 |
+
# pytesseract.tesseract_cmd = path_to_tesseract
|
| 9 |
+
class_names = [
|
| 10 |
+
"Certifications",
|
| 11 |
+
"Community",
|
| 12 |
+
"Contact",
|
| 13 |
+
"Education",
|
| 14 |
+
"Experience",
|
| 15 |
+
"Interests",
|
| 16 |
+
"Languages",
|
| 17 |
+
"Name",
|
| 18 |
+
"Profile",
|
| 19 |
+
"Projects",
|
| 20 |
+
"Skills",
|
| 21 |
+
]
|
| 22 |
+
number_class_custom = int(len(class_names) + 4)
|
| 23 |
+
img_width, img_height = None, None
|
| 24 |
+
left = None
|
| 25 |
+
top = None
|
| 26 |
+
ratio = None
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def preprocess(img: np.array, shape=(640, 640)) -> np.array:
|
| 30 |
+
global img_width, img_height, left, top, ratio
|
| 31 |
+
img, ratio, (left, top) = resize_and_pad(img, new_shape=shape)
|
| 32 |
+
img_height, img_width, _ = img.shape
|
| 33 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 34 |
+
img = img.transpose(2, 0, 1)
|
| 35 |
+
img = img.reshape(1, 3, 640, 640).astype("float32")
|
| 36 |
+
img = img / 255.0
|
| 37 |
+
return img
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def extract_box(outputs):
|
| 41 |
+
output0 = outputs[0]
|
| 42 |
+
output1 = outputs[1]
|
| 43 |
+
output0 = output0[0].transpose()
|
| 44 |
+
output1 = output1[0]
|
| 45 |
+
boxes = output0[:, 0:number_class_custom]
|
| 46 |
+
masks = output0[:, number_class_custom:]
|
| 47 |
+
output1 = output1.reshape(32, 160 * 160)
|
| 48 |
+
output1 = output1.reshape(32, 160 * 160)
|
| 49 |
+
masks = masks @ output1
|
| 50 |
+
boxes = np.hstack([boxes, masks])
|
| 51 |
+
return boxes
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def intersection(box1, box2):
|
| 55 |
+
box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
|
| 56 |
+
box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
|
| 57 |
+
x1 = max(box1_x1, box2_x1)
|
| 58 |
+
y1 = max(box1_y1, box2_y1)
|
| 59 |
+
x2 = min(box1_x2, box2_x2)
|
| 60 |
+
y2 = min(box1_y2, box2_y2)
|
| 61 |
+
return (x2 - x1) * (y2 - y1)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def union(box1, box2):
|
| 65 |
+
box1_x1, box1_y1, box1_x2, box1_y2 = box1[:4]
|
| 66 |
+
box2_x1, box2_y1, box2_x2, box2_y2 = box2[:4]
|
| 67 |
+
box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
|
| 68 |
+
box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
|
| 69 |
+
return box1_area + box2_area - intersection(box1, box2)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def iou(box1, box2):
|
| 73 |
+
return intersection(box1, box2) / union(box1, box2)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def sigmoid(z):
|
| 77 |
+
return 1 / (1 + np.exp(-z))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def get_mask(row, box, img_width, img_height, threshold):
|
| 81 |
+
mask = row.reshape(160, 160)
|
| 82 |
+
mask = sigmoid(mask)
|
| 83 |
+
mask = (mask > threshold).astype("uint8") * 255
|
| 84 |
+
x1, y1, x2, y2 = box
|
| 85 |
+
mask_x1 = round(x1 / img_width * 160)
|
| 86 |
+
mask_y1 = round(y1 / img_height * 160)
|
| 87 |
+
mask_x2 = round(x2 / img_width * 160)
|
| 88 |
+
mask_y2 = round(y2 / img_height * 160)
|
| 89 |
+
mask = mask[mask_y1:mask_y2, mask_x1:mask_x2]
|
| 90 |
+
img_mask = Image.fromarray(mask, "L")
|
| 91 |
+
img_mask = img_mask.resize((round(x2 - x1), round(y2 - y1)))
|
| 92 |
+
mask = np.array(img_mask)
|
| 93 |
+
return mask
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def get_polygon(mask):
|
| 97 |
+
contours = cv2.findContours(mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
| 98 |
+
polygon = [[contour[0][0], contour[0][1]] for contour in contours[0][0]]
|
| 99 |
+
return polygon
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def postprocess(outputs, threshold_confidence, threshold_iou):
|
| 103 |
+
objects = []
|
| 104 |
+
for row in extract_box(outputs):
|
| 105 |
+
xc, yc, w, h = row[:4]
|
| 106 |
+
x1 = (xc - w / 2) / 640 * img_width
|
| 107 |
+
y1 = (yc - h / 2) / 640 * img_height
|
| 108 |
+
x2 = (xc + w / 2) / 640 * img_width
|
| 109 |
+
y2 = (yc + h / 2) / 640 * img_height
|
| 110 |
+
prob = row[4:number_class_custom].max()
|
| 111 |
+
if prob < threshold_confidence:
|
| 112 |
+
continue
|
| 113 |
+
class_id = row[4:number_class_custom].argmax()
|
| 114 |
+
label = class_names[class_id]
|
| 115 |
+
# mask = get_mask(
|
| 116 |
+
# row[number_class_custom:25684],
|
| 117 |
+
# (x1, y1, x2, y2),
|
| 118 |
+
# img_width,
|
| 119 |
+
# img_height,
|
| 120 |
+
# threshold=threshold,
|
| 121 |
+
# )
|
| 122 |
+
# polygon = get_polygon(mask)
|
| 123 |
+
# objects.append([x1, y1, x2, y2, label, prob, mask, polygon])
|
| 124 |
+
objects.append([x1, y1, x2, y2, label, prob])
|
| 125 |
+
|
| 126 |
+
# apply non-maximum suppression
|
| 127 |
+
objects.sort(key=lambda x: x[5], reverse=True)
|
| 128 |
+
result = []
|
| 129 |
+
while objects:
|
| 130 |
+
obj = objects.pop(0)
|
| 131 |
+
result.append(obj)
|
| 132 |
+
objects = [
|
| 133 |
+
other_obj for other_obj in objects if iou(other_obj, obj) < threshold_iou
|
| 134 |
+
]
|
| 135 |
+
del objects
|
| 136 |
+
|
| 137 |
+
cropped_images = [
|
| 138 |
+
{
|
| 139 |
+
"box": list(map(int, unpad_and_resize_boxes(obj[:4], ratio, left, top))),
|
| 140 |
+
"label": obj[4],
|
| 141 |
+
"prob": int(obj[5]),
|
| 142 |
+
}
|
| 143 |
+
for obj in result
|
| 144 |
+
]
|
| 145 |
+
return cropped_images
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def extract_text_dict(outputs):
|
| 149 |
+
result_dict = {}
|
| 150 |
+
for output in outputs:
|
| 151 |
+
label = output.get("label").lower()
|
| 152 |
+
text = output.get("text")
|
| 153 |
+
if label in result_dict:
|
| 154 |
+
result_dict[label] += " " + text
|
| 155 |
+
else:
|
| 156 |
+
result_dict[label] = text
|
| 157 |
+
|
| 158 |
+
return result_dict
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def extract_text(outputs, image_origin):
|
| 162 |
+
for i in range(len(outputs)):
|
| 163 |
+
image = crop_image(image_origin, outputs[i].get("box"))
|
| 164 |
+
text = pytesseract.image_to_string(image)
|
| 165 |
+
outputs[i].update({"text": text})
|
| 166 |
+
if "text" in outputs[i]:
|
| 167 |
+
outputs[i]["text"] += text
|
| 168 |
+
else:
|
| 169 |
+
outputs[i].update({"text": text})
|
| 170 |
+
return extract_text_dict(outputs)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def crop_image(image, box):
|
| 174 |
+
|
| 175 |
+
x1, y1, x2, y2 = map(int, box)
|
| 176 |
+
cropped_image = image[y1:y2, x1:x2]
|
| 177 |
+
return cropped_image
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def resize_and_pad(
|
| 181 |
+
image: np.array,
|
| 182 |
+
new_shape: Tuple[int, int],
|
| 183 |
+
padding_color: Tuple[int] = (144, 144, 144),
|
| 184 |
+
) -> np.array:
|
| 185 |
+
h_org, w_org = image.shape[:2]
|
| 186 |
+
w_new, h_new = new_shape
|
| 187 |
+
padd_left, padd_right, padd_top, padd_bottom = 0, 0, 0, 0
|
| 188 |
+
|
| 189 |
+
# Padding left to right
|
| 190 |
+
if h_org >= w_org:
|
| 191 |
+
img_resize = cv2.resize(image, (int(w_org * h_new / h_org), h_new))
|
| 192 |
+
h, w = img_resize.shape[:2]
|
| 193 |
+
padd_left = (w_new - w) // 2
|
| 194 |
+
padd_right = w_new - w - padd_left
|
| 195 |
+
ratio = h_new / h_org
|
| 196 |
+
|
| 197 |
+
# Padding top to bottom
|
| 198 |
+
if h_org < w_org:
|
| 199 |
+
img_resize = cv2.resize(image, (w_new, int(h_org * w_new / w_org)))
|
| 200 |
+
h, w = img_resize.shape[:2]
|
| 201 |
+
padd_top = (h_new - h) // 2
|
| 202 |
+
padd_bottom = h_new - h - padd_top
|
| 203 |
+
ratio = w_new / w_org
|
| 204 |
+
|
| 205 |
+
image = cv2.copyMakeBorder(
|
| 206 |
+
img_resize,
|
| 207 |
+
padd_top,
|
| 208 |
+
padd_bottom,
|
| 209 |
+
padd_left,
|
| 210 |
+
padd_right,
|
| 211 |
+
cv2.BORDER_CONSTANT,
|
| 212 |
+
None,
|
| 213 |
+
value=padding_color,
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
return image, ratio, (padd_left, padd_top)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
def unpad_and_resize_boxes(boxes, ratio, left, top):
|
| 220 |
+
|
| 221 |
+
if len(boxes) == 0:
|
| 222 |
+
return boxes
|
| 223 |
+
boxes = np.array(boxes)
|
| 224 |
+
if boxes.ndim == 1:
|
| 225 |
+
boxes = boxes.reshape(-1, 4)
|
| 226 |
+
boxes[:, [0, 2]] -= left
|
| 227 |
+
boxes[:, [1, 3]] -= top
|
| 228 |
+
boxes[:, :4] /= ratio
|
| 229 |
+
if len(boxes) == 1:
|
| 230 |
+
return boxes.flatten().tolist()
|
| 231 |
+
else:
|
| 232 |
+
return boxes.tolist()
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def draw_bounding_boxes(image, outputs):
|
| 236 |
+
# Create a copy of the image to draw on
|
| 237 |
+
image_with_boxes = image.copy()
|
| 238 |
+
|
| 239 |
+
# Define a list of colors for the bounding boxes
|
| 240 |
+
label_colors = {
|
| 241 |
+
"Certifications": (255, 0, 0),
|
| 242 |
+
"Community": (0, 255, 0),
|
| 243 |
+
"Contact": (0, 0, 255),
|
| 244 |
+
"Education": (255, 128, 0),
|
| 245 |
+
"Experience": (255, 0, 255),
|
| 246 |
+
"Interests": (128, 128, 128),
|
| 247 |
+
"Languages": (128, 0, 0),
|
| 248 |
+
"Name": (0, 128, 0),
|
| 249 |
+
"Profile": (0, 0, 128),
|
| 250 |
+
"Projects": (128, 128, 0),
|
| 251 |
+
"Skills": (128, 0, 128),
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
# Draw each bounding box and text
|
| 255 |
+
for output in outputs:
|
| 256 |
+
box = output["box"]
|
| 257 |
+
label = output["label"]
|
| 258 |
+
text = output.get("text", "")
|
| 259 |
+
|
| 260 |
+
# Get the color for the label
|
| 261 |
+
color = label_colors.get(
|
| 262 |
+
label, (255, 255, 255)
|
| 263 |
+
) # Default to white if label not found
|
| 264 |
+
|
| 265 |
+
# Draw the bounding box
|
| 266 |
+
x1, y1, x2, y2 = box
|
| 267 |
+
cv2.rectangle(image_with_boxes, (x1, y1), (x2, y2), color, 2)
|
| 268 |
+
# Draw the label and text
|
| 269 |
+
cv2.putText(
|
| 270 |
+
image_with_boxes,
|
| 271 |
+
f"{label}",
|
| 272 |
+
(x1, y1 - 10),
|
| 273 |
+
cv2.FONT_ITALIC,
|
| 274 |
+
2,
|
| 275 |
+
color,
|
| 276 |
+
2,
|
| 277 |
+
)
|
| 278 |
+
image_with_boxes_rgb = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)
|
| 279 |
+
|
| 280 |
+
# Convert the OpenCV image (numpy array) to a PIL image
|
| 281 |
+
image_pil = Image.fromarray(image_with_boxes_rgb)
|
| 282 |
+
|
| 283 |
+
return image_pil
|