Spaces:
Sleeping
Sleeping
| import re | |
| import requests | |
| import json | |
| import pandas as pd | |
| from notion_client import Client | |
| import logging | |
| def is_valid_url(url): | |
| """Validate URL format""" | |
| pattern = re.compile( | |
| r'^https?://' | |
| r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' | |
| r'localhost|' | |
| r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' | |
| r'(?::\d+)?' | |
| r'(?:/?|[/?]\S+)$', re.IGNORECASE) | |
| return url.startswith(('http://', 'https://')) and bool(pattern.match(url)) | |
| def process_file(file_content, file_name, api_endpoint): | |
| """Process uploaded file through API""" | |
| try: | |
| response = requests.post( | |
| api_endpoint, | |
| files={"file": (file_name, file_content)} | |
| ) | |
| return handle_api_response(response) | |
| except Exception as e: | |
| raise Exception(f"File processing failed: {str(e)}") | |
| def process_url(url, api_endpoint): | |
| """Process URL through API""" | |
| if not is_valid_url(url): | |
| raise ValueError("Invalid URL format") | |
| try: | |
| response = requests.post( | |
| api_endpoint, | |
| json={"websiteUrl": url}, | |
| headers={"Content-Type": "application/json"} | |
| ) | |
| return handle_api_response(response) | |
| except Exception as e: | |
| raise Exception(f"URL processing failed: {str(e)}") | |
| def handle_api_response(response): | |
| """Handle API response and return standardized data""" | |
| if response.status_code == 200: | |
| # First, try to detect if the response text looks like JSON | |
| response_text = response.text.strip() | |
| # Check if response looks like JSON (starts with { or [) | |
| if response_text.startswith(('{', '[')): | |
| try: | |
| # Try to parse as JSON first | |
| response_data = json.loads(response_text) | |
| if isinstance(response_data, dict): | |
| # Handle both direct responses and responses with 'value' field | |
| content = response_data.get('value', response_data) | |
| # If content is a string, try to parse it as JSON | |
| if isinstance(content, str): | |
| try: | |
| if content.strip().startswith(('{', '[')): | |
| return json.loads(content) | |
| else: | |
| return content | |
| except json.JSONDecodeError: | |
| return content | |
| return content | |
| return response_data | |
| except json.JSONDecodeError: | |
| # If JSON parsing fails, return as text | |
| return response_text | |
| else: | |
| # Doesn't look like JSON, return as plain text (sad path) | |
| return response_text | |
| elif response.status_code == 500: | |
| raise Exception("Multiple job postings detected. Please use the specific job posting URL.") | |
| else: | |
| raise Exception(f"API Error: {response.status_code}") | |
| def display_job_data(job_data): | |
| if isinstance(job_data, dict): | |
| # Convert dict to DataFrame | |
| return pd.DataFrame([job_data]) | |
| elif isinstance(job_data, str): | |
| # Check if the string contains HTML tags | |
| if '<' in job_data and '>' in job_data: | |
| # Return a tuple to indicate it's HTML content | |
| return ('html', job_data) | |
| else: | |
| # Return regular text | |
| return job_data | |
| else: | |
| # Fallback string conversion | |
| return str(job_data) | |
| def process_text(text, api_endpoint): | |
| """Send raw text to API for processing.""" | |
| try: | |
| response = requests.post( | |
| api_endpoint, | |
| json={"text": text}, # Using the expected payload format | |
| headers={"Content-Type": "application/json"} | |
| ) | |
| return handle_api_response(response) | |
| except Exception as e: | |
| raise Exception(f"Text processing failed: {str(e)}") | |
| # Add this new function | |
| def validate_job_fields(data): | |
| """Validate required fields in job data. | |
| Returns: | |
| tuple: (is_valid: bool, error_message: str) | |
| """ | |
| required_fields = { | |
| 'Company': 'Empresa', | |
| 'Job Title': 'Título del puesto', | |
| 'apply_Url': 'URL de aplicación', | |
| 'Remote': 'Modalidad' | |
| } | |
| missing = [] | |
| for field, display_name in required_fields.items(): | |
| field_value = data.get(field) | |
| if pd.isna(field_value) or not str(field_value).strip(): | |
| missing.append(display_name) | |
| if missing: | |
| return (False, f"Campos obligatorios faltantes: {', '.join(missing)}") | |
| return (True, None) | |
| # Modify send_to_notion to include validation | |
| def send_to_notion(data, database_id, token): | |
| """Send data to Notion after validation.""" | |
| # Validate first | |
| is_valid, error_msg = validate_job_fields(data) | |
| if not is_valid: | |
| raise ValueError(error_msg) | |
| # Original Notion logic | |
| try: | |
| notion = Client(auth=token) | |
| new_page = { | |
| "Role": {"title": [{"text": {"content": str(data["Job Title"])}}]}, | |
| "Startup": {"rich_text": [{"text": {"content": str(data["Company"])}}]}, | |
| "Apply URL": {"url": data["apply_Url"]}, | |
| "Summary": {"rich_text": [{"text": {"content": str(data.get("Description", ""))}}]}, | |
| "Location": {"rich_text": [{"text": {"content": str(data.get("Location", ""))}}]}, | |
| "Remote": {"select": {"name": str(data["Remote"])}} | |
| } | |
| if data.get("file_Url"): | |
| new_page["Original file"] = {"url": data["file_Url"]} | |
| created_page = notion.pages.create( | |
| parent={"database_id": database_id}, | |
| properties=new_page | |
| ) | |
| return created_page["url"] | |
| except Exception as e: | |
| logging.error(f"Notion API error: {str(e)}") | |
| raise Exception(f"Error al enviar a Notion: {str(e)}") |