File size: 4,412 Bytes
5a067ab
 
 
 
4873a90
5a067ab
 
 
 
 
2ca386f
5a067ab
 
c8fab27
 
5a067ab
 
 
 
 
c8fab27
 
5a067ab
 
7d3d2eb
5a067ab
 
 
 
 
 
a0e8e60
5a067ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0e8e60
 
 
 
5a067ab
 
 
c8fab27
5a067ab
 
 
 
 
 
c8fab27
 
5a067ab
 
 
 
 
c8fab27
 
5a067ab
 
 
 
 
c8fab27
 
5a067ab
 
 
 
 
 
 
c8fab27
 
5a067ab
 
7d3d2eb
5a067ab
 
 
 
a0e8e60
5a067ab
4873a90
16e44ed
3e61f1a
 
 
 
 
4873a90
3e61f1a
 
9187cc8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from openai import OpenAI
from models import CVExtracted
client = OpenAI()

def predict(input):
    fewshot = """
    You are tasked to parse Curiculum Vitae files into JSON such format below:
    {
    “name”: string,
        “skills”: []string,
        "links": []string,
        “achievements”: []string,
        “experiences”: []{
            “start”: string,
            “end”: string,
        “designation”: string,
        “company”: string,
        “experience_description”: string
    },
    “educations”: []{
            “start”: string,
            “end”: string,
        “major”: string,
        “campus”: string,
        “gpa”: integer
    }
    }

    below is the example:

    {
    "name": "Faiq Bil Haq Izzuddin",
    "skills": [
        "Python (Matplotlib, Pandas)",
        "Seaborn,",
        "Sklearn",
        "TensorFlow",
        "Keras,",
        "NetworkX",
        "Java",
        "Excel (Linkedin Certification)",
        "Advanced SQL by Hackerrank (Certificate)",
        "PowerBI",
        "Tableau",
        "Metabase",
        "Problem Solving",
        "Basic by Hackerrank (Certificate)",
        "SpreadSheet (Google Sheet)",
        "NoSQL",
        "Athena",
        "GCP BigQuery",
        "Deep Learning",
        "ETL",
        "Computer Vision",
        "NLP",
        "OCR",
        "MLOps",
        "Hadoop",
        "PySpark."
    ],
    "links": [
        "linkedin.com/in/faiz-b-h/",
        "github.com/mfaizbh22",
        "faizzz.vercel.app",
        "kaggle.com/mfaizb"
    ],
    "experiences": [
        {
        "start": '2024-08-28T00:00:00.000Z',
        "end": null,
        "designation": "Data Analyst Engineer",
        "company": "Professional Huawei Technology Co. Ltd.",
        "experience_description": Successfully managed deployment of inhouse data warehouse and analytics automation POC using Hadoop and Spark, potential to improve ROI up to millions dollars quarterly. Developed an anti - fraud retention ML model, saved up to hundred millions rupiah monthly and was recognized by CTO. Produce numerous data pipelines and reports using Hadoop and PowerBi with automatic issue resolution."
        },
        {
        "start": '22023-12-01T00:00:00.000Z',
        "end": "2024-03-01T00:00:00.000Z",
        "designation": "Data Analyst Intern",
        "company": "Kitalulus",
        "experience_description": "Analyzed user behavior, successfully prevented over 10. 000 applicants from fraud and improved recommendation system. Evaluated and revamped ETL layer, 20 dashboards, 100 charts query and successfully achieved 90 percent cost reduction. Scraped and analyzed data, completed over 60 requests, included investor âĢĻ s report along with others"
        },
        {
        "start": "2023-08-01T00:00:00.000Z",
        "end": "2023-12-31T00:00:00.000Z",
        "designation": "Course Assistant, IF3240",
        "company": "STEI ITB",
        "experience_description": "Assisted 4 teams with up to 20 students from batch 2021 to develop an information system Helped up to 20 students to analyze problem and implement system information solution from real companies problem"
        },
        {
        "start": "2022-06-01T00:00:00.000Z",
        "end": "2023-12-01T00:00:00.000Z",
        "designation": "Backend Engineer Intern",
        "company": "Kitalulus",
        "experience_description": "Decreased manpower for warehouse management by 10 percent per month. Decreased cost for supplier and production operational cost up to Rp. 2. 5 M per week. Responsible for overwriting and optimizing the queries and databases table in postgresql by critical times."
        }
    ],
    "educations": [
        {
        "start": "2020-08-20T00:00:00.000Z",
        "end": "2025-08-25T00:00:00.000Z",
        "major": "B. Eng, Informatic Engineering",
        "campus": "Institut Teknologi Bandung ",
        "gpa": 341
        }
    ]
    }
    \n
    If the document OCR read or extraction is null, please return with empty structue. 
    """
    model_parameters  = client.beta.chat.completions.parse(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": fewshot},
            {"role": "user", "content": input},
        ],
        response_format=CVExtracted,
    )

    return model_parameters.choices[0].message.parsed