job-posting / utils.py
johnometalman
implement html labels on linkedin response
fbf049d
raw
history blame
6.01 kB
import re
import requests
import json
import pandas as pd
from notion_client import Client
import logging
def is_valid_url(url):
"""Validate URL format"""
pattern = re.compile(
r'^https?://'
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'
r'localhost|'
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
r'(?::\d+)?'
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return url.startswith(('http://', 'https://')) and bool(pattern.match(url))
def process_file(file_content, file_name, api_endpoint):
"""Process uploaded file through API"""
try:
response = requests.post(
api_endpoint,
files={"file": (file_name, file_content)}
)
return handle_api_response(response)
except Exception as e:
raise Exception(f"File processing failed: {str(e)}")
def process_url(url, api_endpoint):
"""Process URL through API"""
if not is_valid_url(url):
raise ValueError("Invalid URL format")
try:
response = requests.post(
api_endpoint,
json={"websiteUrl": url},
headers={"Content-Type": "application/json"}
)
return handle_api_response(response)
except Exception as e:
raise Exception(f"URL processing failed: {str(e)}")
def handle_api_response(response):
"""Handle API response and return standardized data"""
if response.status_code == 200:
# First, try to detect if the response text looks like JSON
response_text = response.text.strip()
# Check if response looks like JSON (starts with { or [)
if response_text.startswith(('{', '[')):
try:
# Try to parse as JSON first
response_data = json.loads(response_text)
if isinstance(response_data, dict):
# Handle both direct responses and responses with 'value' field
content = response_data.get('value', response_data)
# If content is a string, try to parse it as JSON
if isinstance(content, str):
try:
if content.strip().startswith(('{', '[')):
return json.loads(content)
else:
return content
except json.JSONDecodeError:
return content
return content
return response_data
except json.JSONDecodeError:
# If JSON parsing fails, return as text
return response_text
else:
# Doesn't look like JSON, return as plain text (sad path)
return response_text
elif response.status_code == 500:
raise Exception("Multiple job postings detected. Please use the specific job posting URL.")
else:
raise Exception(f"API Error: {response.status_code}")
def display_job_data(job_data):
if isinstance(job_data, dict):
# Convert dict to DataFrame
return pd.DataFrame([job_data])
elif isinstance(job_data, str):
# Check if the string contains HTML tags
if '<' in job_data and '>' in job_data:
# Return a tuple to indicate it's HTML content
return ('html', job_data)
else:
# Return regular text
return job_data
else:
# Fallback string conversion
return str(job_data)
def process_text(text, api_endpoint):
"""Send raw text to API for processing."""
try:
response = requests.post(
api_endpoint,
json={"text": text}, # Using the expected payload format
headers={"Content-Type": "application/json"}
)
return handle_api_response(response)
except Exception as e:
raise Exception(f"Text processing failed: {str(e)}")
# Add this new function
def validate_job_fields(data):
"""Validate required fields in job data.
Returns:
tuple: (is_valid: bool, error_message: str)
"""
required_fields = {
'Company': 'Empresa',
'Job Title': 'Título del puesto',
'apply_Url': 'URL de aplicación',
'Remote': 'Modalidad'
}
missing = []
for field, display_name in required_fields.items():
field_value = data.get(field)
if pd.isna(field_value) or not str(field_value).strip():
missing.append(display_name)
if missing:
return (False, f"Campos obligatorios faltantes: {', '.join(missing)}")
return (True, None)
# Modify send_to_notion to include validation
def send_to_notion(data, database_id, token):
"""Send data to Notion after validation."""
# Validate first
is_valid, error_msg = validate_job_fields(data)
if not is_valid:
raise ValueError(error_msg)
# Original Notion logic
try:
notion = Client(auth=token)
new_page = {
"Role": {"title": [{"text": {"content": str(data["Job Title"])}}]},
"Startup": {"rich_text": [{"text": {"content": str(data["Company"])}}]},
"Apply URL": {"url": data["apply_Url"]},
"Summary": {"rich_text": [{"text": {"content": str(data.get("Description", ""))}}]},
"Location": {"rich_text": [{"text": {"content": str(data.get("Location", ""))}}]},
"Remote": {"select": {"name": str(data["Remote"])}}
}
if data.get("file_Url"):
new_page["Original file"] = {"url": data["file_Url"]}
created_page = notion.pages.create(
parent={"database_id": database_id},
properties=new_page
)
return created_page["url"]
except Exception as e:
logging.error(f"Notion API error: {str(e)}")
raise Exception(f"Error al enviar a Notion: {str(e)}")