Rustamshry commited on
Commit
7128cce
·
verified ·
1 Parent(s): 597075b

Upload resume_parsing.py

Browse files
Files changed (1) hide show
  1. src/resume_parsing.py +72 -0
src/resume_parsing.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from embedding_utils import generate_embedding
4
+ from chroma_utils import add_to_resume_chroma
5
+ from llama_cloud_services import LlamaExtract
6
+ from pydantic import BaseModel, Field
7
+
8
+ os.environ["LLAMA_CLOUD_API_KEY"] = "llx-hIRlN84LTtmQNuAPg5AjS49LNj3vAEHVST0obWl5ZedgpVuW"
9
+
10
+ class ResumeSchema(BaseModel):
11
+ experience: str = Field(description="Professional work experience")
12
+ education: str = Field(description="Educational background")
13
+ skills: list[str] = Field(description="Technical and soft skills")
14
+
15
+ llama_extract = LlamaExtract()
16
+ #agent = llama_extract.create_agent(name="resume_parser", data_schema=ResumeSchema)
17
+ agent = llama_extract.get_agent(name="resume_parser")
18
+
19
+
20
+ def parse_resume_with_llm(resume_content, name, location, file_type):
21
+
22
+ try:
23
+ if file_type not in ["pdf", "docx"]:
24
+ raise ValueError("Unsupported file type. Only PDF and DOCX files are allowed.")
25
+
26
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_type}") as temp_file:
27
+ temp_file.write(resume_content)
28
+ temp_file_path = temp_file.name
29
+
30
+ try:
31
+ extracted_run = agent.extract(temp_file_path)
32
+ extracted_data = extracted_run.data # Access the 'data' attribute
33
+ except Exception as e:
34
+ raise RuntimeError(f"LlamaExtract failed: {str(e)}")
35
+
36
+ finally:
37
+ os.remove(temp_file_path)
38
+
39
+ if not extracted_data:
40
+ raise ValueError("No data extracted from the resume.")
41
+
42
+
43
+ experience = extracted_data.get("experience", "")
44
+ education = extracted_data.get("education", "")
45
+ skills = extracted_data.get("skills", [])
46
+
47
+ combined_text_for_embedding = (
48
+ f"Experience: {experience} "
49
+ f"Education: {education} "
50
+ f"Skills: {skills}"
51
+ )
52
+
53
+ embedding = generate_embedding(combined_text_for_embedding)
54
+
55
+ metadata = {
56
+ "name": name,
57
+ "location": location,
58
+ "experience": experience,
59
+ "education": education,
60
+ "skills": skills,
61
+ }
62
+
63
+ unique_id = add_to_resume_chroma(embedding, metadata)
64
+
65
+ return {"message": "Resume parsed successfully", "unique_id": unique_id}, embedding
66
+ except Exception as e:
67
+ return {"error": f"Failed to parse resume: {str(e)}"}, None
68
+
69
+
70
+
71
+
72
+