ABAO77 commited on
Commit
930ec8d
·
1 Parent(s): 186ec5f

feat: good code

Browse files
src/notebook/notebook.ipynb CHANGED
@@ -2,65 +2,22 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "Requirement already satisfied: roboflow in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.1.44)\n",
13
- "Requirement already satisfied: certifi in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (2024.7.4)\n",
14
- "Requirement already satisfied: idna==3.7 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (3.7)\n",
15
- "Requirement already satisfied: cycler in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (0.12.1)\n",
16
- "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.4.5)\n",
17
- "Requirement already satisfied: matplotlib in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (3.9.2)\n",
18
- "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.26.4)\n",
19
- "Requirement already satisfied: opencv-python-headless==4.10.0.84 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (4.10.0.84)\n",
20
- "Requirement already satisfied: Pillow>=7.1.2 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (10.4.0)\n",
21
- "Requirement already satisfied: python-dateutil in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (2.9.0.post0)\n",
22
- "Requirement already satisfied: python-dotenv in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.0.1)\n",
23
- "Requirement already satisfied: requests in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (2.31.0)\n",
24
- "Requirement already satisfied: six in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.16.0)\n",
25
- "Requirement already satisfied: urllib3>=1.26.6 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (2.2.3)\n",
26
- "Requirement already satisfied: tqdm>=4.41.0 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (4.66.4)\n",
27
- "Requirement already satisfied: PyYAML>=5.3.1 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (6.0.1)\n",
28
- "Requirement already satisfied: requests-toolbelt in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.0.0)\n",
29
- "Requirement already satisfied: filetype in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from roboflow) (1.2.0)\n",
30
- "Requirement already satisfied: colorama in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tqdm>=4.41.0->roboflow) (0.4.4)\n",
31
- "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib->roboflow) (1.2.1)\n",
32
- "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib->roboflow) (4.53.1)\n",
33
- "Requirement already satisfied: packaging>=20.0 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib->roboflow) (24.1)\n",
34
- "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from matplotlib->roboflow) (3.1.2)\n",
35
- "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\htbqn\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->roboflow) (3.3.2)\n"
36
- ]
37
- },
38
- {
39
- "name": "stderr",
40
- "output_type": "stream",
41
- "text": [
42
- "WARNING: Ignoring invalid distribution ~angchain (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n",
43
- "WARNING: Ignoring invalid distribution ~ip (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n",
44
- "WARNING: Ignoring invalid distribution ~angchain (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n",
45
- "WARNING: Ignoring invalid distribution ~ip (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n",
46
- "WARNING: Ignoring invalid distribution ~angchain (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n",
47
- "WARNING: Ignoring invalid distribution ~ip (C:\\Users\\htbqn\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages)\n"
48
- ]
49
- },
50
  {
51
  "name": "stdout",
52
  "output_type": "stream",
53
  "text": [
54
  "loading Roboflow workspace...\n",
55
- "loading Roboflow project...\n",
56
- "Dependency ultralytics==8.0.196 is required but found version=8.2.90, to fix: `pip install ultralytics==8.0.196`\n"
57
  ]
58
  },
59
  {
60
  "name": "stderr",
61
  "output_type": "stream",
62
  "text": [
63
- "Downloading Dataset Version Zip in annotation-2 to yolov8:: 100%|██████████| 92659/92659 [00:06<00:00, 14429.98it/s]"
64
  ]
65
  },
66
  {
@@ -75,7 +32,7 @@
75
  "output_type": "stream",
76
  "text": [
77
  "\n",
78
- "Extracting Dataset Version Zip to annotation-2 in yolov8:: 100%|██████████| 3376/3376 [00:00<00:00, 3563.34it/s]\n"
79
  ]
80
  }
81
  ],
@@ -147,7 +104,7 @@
147
  },
148
  {
149
  "cell_type": "code",
150
- "execution_count": null,
151
  "metadata": {},
152
  "outputs": [],
153
  "source": [
@@ -158,10 +115,6 @@
158
  "nc: 14\n",
159
  "names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']\"\"\"\n",
160
  "\n",
161
- "# 3 + 8 -> 3\n",
162
- "# 5 + 10 + 1 -> 5\n",
163
- "# 13 + 7 -> 13\n",
164
- "\n",
165
  "with open(\"./data.yaml\", 'w') as file:\n",
166
  " file.write(yaml_text),\n",
167
  "\n",
@@ -169,77 +122,6 @@
169
  "# %cat /kaggle/working/data.yaml\n"
170
  ]
171
  },
172
- {
173
- "cell_type": "code",
174
- "execution_count": 7,
175
- "metadata": {},
176
- "outputs": [
177
- {
178
- "name": "stdout",
179
- "output_type": "stream",
180
- "text": [
181
- "Labels mapped and saved to ./test.txt\n"
182
- ]
183
- }
184
- ],
185
- "source": [
186
- "import os\n",
187
- "\n",
188
- "def map_yolo_labels(file_path, label_mapping, output_file=None):\n",
189
- " \"\"\"\n",
190
- " Remaps the class labels in a YOLOv8 label file and saves the updated labels.\n",
191
- " \n",
192
- " Parameters:\n",
193
- " - file_path: Path to the YOLOv8 label file.\n",
194
- " - label_mapping: Dictionary for remapping labels (e.g., {3: 3, 8: 3, 5: 5, 10: 5, 1: 5, 13: 13, 7: 13}).\n",
195
- " - output_file: Path to save the remapped labels (optional). If not provided, it overwrites the original file.\n",
196
- " \n",
197
- " Returns:\n",
198
- " - None\n",
199
- " \"\"\"\n",
200
- " with open(file_path, 'r') as file:\n",
201
- " lines = file.readlines()\n",
202
- "\n",
203
- " # Process each line and remap the class label\n",
204
- " updated_lines = []\n",
205
- " for line in lines:\n",
206
- " parts = line.strip().split()\n",
207
- " class_id = int(parts[0]) # The first part is the class label\n",
208
- " \n",
209
- " # Remap the class label using the mapping\n",
210
- " new_class_id = label_mapping.get(class_id, class_id)\n",
211
- " \n",
212
- " # Reconstruct the line with the new class label\n",
213
- " updated_line = f\"{new_class_id} {' '.join(parts[1:])}\\n\"\n",
214
- " updated_lines.append(updated_line)\n",
215
- " \n",
216
- " # Write to the output file (overwrite original file if output_file is not provided)\n",
217
- " if output_file is None:\n",
218
- " output_file = file_path\n",
219
- "\n",
220
- " with open(output_file, 'w') as file:\n",
221
- " file.writelines(updated_lines)\n",
222
- " \n",
223
- " print(f\"Labels mapped and saved to {output_file}\")\n",
224
- "\n",
225
- "# Example usage:\n",
226
- "label_mapping = {\n",
227
- " 3: 3, # Map 3 -> 3\n",
228
- " 8: 3, # Map 8 -> 3\n",
229
- " 5: 5, # Map 5 -> 5\n",
230
- " 10: 5, # Map 10 -> 5\n",
231
- " 1: 5, # Map 1 -> 5\n",
232
- " 13: 13, # Map 13 -> 13\n",
233
- " 7: 13 # Map 7 -> 13\n",
234
- "}\n",
235
- "\n",
236
- "# Path to your YOLOv8 label file\n",
237
- "label_file = \"./test.txt\"\n",
238
- "\n",
239
- "# Call the function to remap labels\n",
240
- "map_yolo_labels(label_file, label_mapping)\n"
241
- ]
242
- },
243
  {
244
  "cell_type": "code",
245
  "execution_count": 20,
@@ -551,7 +433,7 @@
551
  ],
552
  "metadata": {
553
  "kernelspec": {
554
- "display_name": "Python 3",
555
  "language": "python",
556
  "name": "python3"
557
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 12,
6
  "metadata": {},
7
  "outputs": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  {
9
  "name": "stdout",
10
  "output_type": "stream",
11
  "text": [
12
  "loading Roboflow workspace...\n",
13
+ "loading Roboflow project...\n"
 
14
  ]
15
  },
16
  {
17
  "name": "stderr",
18
  "output_type": "stream",
19
  "text": [
20
+ "Downloading Dataset Version Zip in cvparsing-2 to yolov9:: 100%|██████████| 63864/63864 [00:04<00:00, 15236.33it/s]"
21
  ]
22
  },
23
  {
 
32
  "output_type": "stream",
33
  "text": [
34
  "\n",
35
+ "Extracting Dataset Version Zip to cvparsing-2 in yolov9:: 100%|██████████| 2344/2344 [00:00<00:00, 5118.00it/s]\n"
36
  ]
37
  }
38
  ],
 
104
  },
105
  {
106
  "cell_type": "code",
107
+ "execution_count": 6,
108
  "metadata": {},
109
  "outputs": [],
110
  "source": [
 
115
  "nc: 14\n",
116
  "names: ['Achievement', 'Certifications', 'Community', 'Contact', 'Education', 'Experience', 'Interests', 'Languages', 'Name', 'Profil', 'Projects', 'image', 'resume', 'skills']\"\"\"\n",
117
  "\n",
 
 
 
 
118
  "with open(\"./data.yaml\", 'w') as file:\n",
119
  " file.write(yaml_text),\n",
120
  "\n",
 
122
  "# %cat /kaggle/working/data.yaml\n"
123
  ]
124
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  {
126
  "cell_type": "code",
127
  "execution_count": 20,
 
433
  ],
434
  "metadata": {
435
  "kernelspec": {
436
+ "display_name": ".venv",
437
  "language": "python",
438
  "name": "python3"
439
  },
src/prompt/promt.py CHANGED
@@ -19,38 +19,31 @@ Output must be in JSON format following the same structure as the input.
19
  ("human", "{user_input}"),
20
  ]
21
  )
22
-
23
-
24
- prompt_experience = ChatPromptTemplate.from_messages(
25
  [
26
  (
27
  "system",
28
- """
29
- # Role: You are an expert at extracting key information about projects from the "experience" section of an OCR'd resume.
30
-
31
- # Instruction:
32
- You are given a JSON object containing extracted resume data from an OCR model. This data likely contains errors like misspellings, merged words, and extracted noise. Your task is to:
33
-
34
- 1. **Pre-process the "experience" field:**
35
- * Correct misspellings using your knowledge of common resume terms and English vocabulary.
36
- * Separate merged words and remove any obvious OCR noise.
37
-
38
- 2. **Identify the "experience" field:** Locate the field labeled "experience" (or a similar label) within the JSON object.
39
-
40
- 3. **Extract project information:**
41
- * Identify project mentions: Look for keywords and phrases that indicate a project, such as "project," "developed," "implemented," "designed," "contributed to," etc.
42
- * Extract project details: For each project mentioned:
43
- * Project name or description
44
- * Role and contributions
45
- * Technologies used
46
- * Outcomes and achievements
47
-
48
- 4. **Structure the output:** Return a JSON object with a "projects" field containing an array of extracted project details. The exact format can be flexible to accommodate variations in the input data.
49
-
50
-
51
-
52
  """,
53
  ),
54
- ("human", "{user_input}"),
55
  ]
56
  )
 
19
  ("human", "{user_input}"),
20
  ]
21
  )
22
+ matching_jd_prompt = ChatPromptTemplate.from_messages(
 
 
23
  [
24
  (
25
  "system",
26
+ """
27
+ Role: Expert resume analyzer matching qualifications with the following job requirements:
28
+ {job_description}
29
+
30
+ Tasks:
31
+ 1. Analyze resume experience and skills against job requirements:
32
+ - Match skills/experience to job requirements
33
+ - Score relevance (0-100%)
34
+ - Identify key matching qualifications
35
+ 2. Provide concise reasoning explaining the score based on:
36
+ - Key matching skills and experiences
37
+ - Notable gaps
38
+ - Level of experience alignment
39
+ Output JSON format:
40
+ Output JSON:
41
+ {{
42
+ "score": float, # Overall match percentage
43
+ "reasoning": str # Clear 2-3 sentence explanation of the score
44
+ }}
 
 
 
 
 
45
  """,
46
  ),
47
+ ("human", "{resume_input}"),
48
  ]
49
  )
src/utils/utils_segment.py CHANGED
@@ -4,8 +4,6 @@ import cv2
4
  from typing import Tuple
5
  from pytesseract import pytesseract
6
 
7
- # path_to_tesseract = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
8
- # pytesseract.tesseract_cmd = path_to_tesseract
9
  class_names = [
10
  "Community",
11
  "Contact",
 
4
  from typing import Tuple
5
  from pytesseract import pytesseract
6
 
 
 
7
  class_names = [
8
  "Community",
9
  "Contact",