Spaces:
Running
Running
| import json | |
| def clean_json_f(input_json: object) -> str: | |
| """ | |
| Cleans and formats the input JSON string to ensure it adheres to the expected schema. | |
| Args: | |
| input_json (str): The raw JSON object to be cleaned. | |
| Returns: | |
| str: A cleaned and formatted JSON string. | |
| """ | |
| try: | |
| data = input_json | |
| # Ensure 'companies' key exists and is a list | |
| if "companies" not in data or not isinstance(data["companies"], list): | |
| return json.dumps({"companies": []}, indent=2) | |
| cleaned_companies = [] | |
| for company in data["companies"]: | |
| cleaned_company = { | |
| "company_name": company.get("company_name", "").strip(), | |
| "industry_type": company.get("industry_type", "").strip(), | |
| "location": company.get("location", "").strip(), | |
| "company_size": company.get("company_size", "").strip() if company.get("company_size") else None, | |
| "street": company.get("street", "").strip() if company.get("street") else None, | |
| "city": company.get("city", "").strip() if company.get("city") else None, | |
| "state": company.get("state", "").strip() if company.get("state") else None, | |
| "country": company.get("country", "").strip() if company.get("country") else None, | |
| "phone": company.get("phone", "").strip() if company.get("phone") else None, | |
| "email": company.get("email", "").strip() if company.get("email") else None, | |
| "approx_revenue": company.get("approx_revenue", "").strip() if company.get("approx_revenue") else None, | |
| "business_type": company.get("business_type", "").strip(), | |
| "website_url": company.get("website_url", "").strip(), | |
| "score": company.get("score", None) | |
| } | |
| cleaned_companies.append(cleaned_company) | |
| cleaned_data = {"companies": cleaned_companies} | |
| print(cleaned_data) | |
| return json.dumps(cleaned_data, indent=2) | |
| except json.JSONDecodeError: | |
| return json.dumps({"companies": []}, indent=2) |