Subhajit Chakraborty
update(12)
7607c3d
import json
def clean_json_f(input_json: object) -> str:
"""
Cleans and formats the input JSON string to ensure it adheres to the expected schema.
Args:
input_json (str): The raw JSON object to be cleaned.
Returns:
str: A cleaned and formatted JSON string.
"""
try:
data = input_json
# Ensure 'companies' key exists and is a list
if "companies" not in data or not isinstance(data["companies"], list):
return json.dumps({"companies": []}, indent=2)
cleaned_companies = []
for company in data["companies"]:
cleaned_company = {
"company_name": company.get("company_name", "").strip(),
"industry_type": company.get("industry_type", "").strip(),
"location": company.get("location", "").strip(),
"company_size": company.get("company_size", "").strip() if company.get("company_size") else None,
"street": company.get("street", "").strip() if company.get("street") else None,
"city": company.get("city", "").strip() if company.get("city") else None,
"state": company.get("state", "").strip() if company.get("state") else None,
"country": company.get("country", "").strip() if company.get("country") else None,
"phone": company.get("phone", "").strip() if company.get("phone") else None,
"email": company.get("email", "").strip() if company.get("email") else None,
"approx_revenue": company.get("approx_revenue", "").strip() if company.get("approx_revenue") else None,
"business_type": company.get("business_type", "").strip(),
"website_url": company.get("website_url", "").strip(),
"score": company.get("score", None)
}
cleaned_companies.append(cleaned_company)
cleaned_data = {"companies": cleaned_companies}
print(cleaned_data)
return json.dumps(cleaned_data, indent=2)
except json.JSONDecodeError:
return json.dumps({"companies": []}, indent=2)