Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +12 -8
utils/mistral.py
CHANGED
|
@@ -39,24 +39,28 @@ def Data_Cleaner(text):
|
|
| 39 |
return text # Return the original text if cleaning goes wrong
|
| 40 |
'''
|
| 41 |
def Data_Cleaner(text):
|
| 42 |
-
# Use a regex pattern to extract
|
| 43 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
| 44 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
| 45 |
|
| 46 |
if match:
|
| 47 |
json_str = match.group(1).strip() # Extract JSON block
|
| 48 |
else:
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Validate and return the cleaned JSON if it's valid
|
| 53 |
try:
|
| 54 |
json_obj = json.loads(json_str) # Validate JSON
|
| 55 |
-
return json_str # Return the parsed JSON as a
|
| 56 |
except json.JSONDecodeError:
|
| 57 |
logging.error("Extracted text is not valid JSON")
|
| 58 |
-
return text # Return the original text if JSON decoding
|
| 59 |
-
|
| 60 |
|
| 61 |
# Function to call Mistral and process output
|
| 62 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
@@ -425,7 +429,7 @@ def process_resume_data(file_path):
|
|
| 425 |
"location": normalize_data(per_data.get('personal', {}).get('address', None)),
|
| 426 |
"linkedin": normalize_data(linkedin_links),
|
| 427 |
"github": normalize_data(github_links),
|
| 428 |
-
"other_links":
|
| 429 |
},
|
| 430 |
"professional": {
|
| 431 |
"technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
|
|
@@ -455,7 +459,7 @@ def process_resume_data(file_path):
|
|
| 455 |
|
| 456 |
#Appending the list if any available as a text
|
| 457 |
if result['personal']['other_links'] is not None:
|
| 458 |
-
result['personal']['other_links'] += links
|
| 459 |
|
| 460 |
#Added the validator for details, Validate contact and email
|
| 461 |
#valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
|
|
|
| 39 |
return text # Return the original text if cleaning goes wrong
|
| 40 |
'''
|
| 41 |
def Data_Cleaner(text):
|
| 42 |
+
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
| 43 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
| 44 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
| 45 |
|
| 46 |
if match:
|
| 47 |
json_str = match.group(1).strip() # Extract JSON block
|
| 48 |
else:
|
| 49 |
+
# If no match, check if text itself is a JSON object
|
| 50 |
+
try:
|
| 51 |
+
json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
|
| 52 |
+
return text # Return the parsed JSON as a dictionary
|
| 53 |
+
except json.JSONDecodeError:
|
| 54 |
+
logging.error("No valid JSON found in the text")
|
| 55 |
+
return text # Return the original text if no valid JSON is found
|
| 56 |
|
| 57 |
# Validate and return the cleaned JSON if it's valid
|
| 58 |
try:
|
| 59 |
json_obj = json.loads(json_str) # Validate JSON
|
| 60 |
+
return json_str # Return the parsed JSON as a dictionary
|
| 61 |
except json.JSONDecodeError:
|
| 62 |
logging.error("Extracted text is not valid JSON")
|
| 63 |
+
return text # Return the original text if JSON decoding fa
|
|
|
|
| 64 |
|
| 65 |
# Function to call Mistral and process output
|
| 66 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
|
|
| 429 |
"location": normalize_data(per_data.get('personal', {}).get('address', None)),
|
| 430 |
"linkedin": normalize_data(linkedin_links),
|
| 431 |
"github": normalize_data(github_links),
|
| 432 |
+
"other_links": hyperlinks
|
| 433 |
},
|
| 434 |
"professional": {
|
| 435 |
"technical_skills": normalize_data(pro_data.get('professional', {}).get('technical_skills', None)),
|
|
|
|
| 459 |
|
| 460 |
#Appending the list if any available as a text
|
| 461 |
if result['personal']['other_links'] is not None:
|
| 462 |
+
result['personal']['other_links'] += normalize_data(links)
|
| 463 |
|
| 464 |
#Added the validator for details, Validate contact and email
|
| 465 |
#valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|