Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| # import openai | |
| import xmltodict | |
| from difflib import get_close_matches | |
| from langchain_openai import ChatOpenAI | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import PromptTemplate | |
| # Import the config module for logging and API key | |
| import config | |
| # Load reference data once | |
| try: | |
| with open("data_source/reference_locations.json", "r") as file: | |
| reference_locations = json.load(file) | |
| with open("data_source/reference_suppliers.json", "r") as file: | |
| reference_suppliers = json.load(file) | |
| config.logger.info("Reference data loaded successfully.") | |
| except Exception as e: | |
| config.logger.error(f"Failed to load reference data: {str(e)}") | |
| raise RuntimeError(f"Failed to load reference data: {str(e)}") | |
| def validate_incoterms_xml(file): | |
| """ | |
| Parses the XML file and validates the Incoterms location against a reference list. | |
| Args: | |
| file: The uploaded XML file. | |
| Returns: | |
| tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name. | |
| """ | |
| try: | |
| # Reset the file pointer | |
| file.seek(0) | |
| # Convert the XML file to a dictionary format for easy manipulation | |
| xml_data = xmltodict.parse(file.read()) | |
| # Extract the TransferLocationName from the XML | |
| location_name = xml_data["n0:TransportationRequestSUITERequest"][ | |
| "TransportationRequest" | |
| ]["DeliveryTerms"]["Incoterms"]["TransferLocationName"] | |
| # Check if the extracted location is valid | |
| if location_name in reference_locations: | |
| return "valid", location_name | |
| else: | |
| return "invalid", location_name | |
| except Exception: | |
| return None, None | |
| def suggest_correct_incoterm(location_name): | |
| """ | |
| Generates a correction suggestion using Langchain and finds the closest match in the reference list. | |
| Args: | |
| location_name: The incorrect location name extracted from the XML. | |
| Returns: | |
| tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location. | |
| """ | |
| try: | |
| # Define a prompt template for the AI to suggest corrections | |
| prompt = PromptTemplate( | |
| input_variables=["location_name", "reference_list"], | |
| template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.", | |
| ) | |
| # Initialize the AI model for generating suggestions | |
| llm = ChatOpenAI( | |
| model="gpt-4o", temperature=0.5, openai_api_key=config.openai_api_key | |
| ) | |
| chain = LLMChain(llm=llm, prompt=prompt) | |
| # Use the AI model to generate a suggestion based on the prompt | |
| suggestion_sentence = chain.run( | |
| location_name=location_name, reference_list=", ".join(reference_locations) | |
| ) | |
| # Find the closest match from the reference list based on the AI suggestion | |
| best_match = find_best_match(suggestion_sentence, reference_locations) | |
| # Return the AI-generated suggestion and the best match | |
| return suggestion_sentence, best_match | |
| except Exception: | |
| return None, None | |
| def validate_po_supplier_xml(file): | |
| """ | |
| Parses the XML file and validates the Supplier IDs for 4 specific tags against a reference list. | |
| Args: | |
| file: The uploaded XML file. | |
| Returns: | |
| tuple: A tuple containing the validation status ('valid' or 'invalid'), the problematic Supplier ID, and the tag path. | |
| """ | |
| tags_to_check = [ | |
| ("ShipperParty", "InternalID"), | |
| ("ShipperParty", "AdditionalInternalID"), | |
| ("ShipFromLocation", "InternalID"), | |
| ("ShipFromLocation", "PartyInternalID"), | |
| ] | |
| try: | |
| # Reset the file pointer | |
| file.seek(0) | |
| # Convert the XML file to a dictionary format for easy manipulation | |
| xml_data = xmltodict.parse(file.read()) | |
| # Extract list of valid supplier IDs from the reference file | |
| valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers] | |
| # Iterate through tags and validate Supplier IDs | |
| for parent_tag, child_tag in tags_to_check: | |
| try: | |
| supplier_id = str( | |
| xml_data["n0:TransportationRequestSUITERequest"][ | |
| "TransportationRequest" | |
| ][parent_tag][child_tag] | |
| ) | |
| if supplier_id not in valid_supplier_ids: | |
| # Return invalid status and the problematic supplier ID | |
| return "invalid", supplier_id, f"{parent_tag}/{child_tag}" | |
| except KeyError: | |
| # Skip if the tag is missing | |
| continue | |
| # If all tags are valid or missing | |
| return "valid", supplier_id, None | |
| except Exception: | |
| return None, None, None | |
| def suggest_correct_supplier(supplier_id): | |
| """ | |
| Generates a correction suggestion using Langchain and finds the closest match in the reference list. | |
| Args: | |
| supplier_id: The incorrect Supplier ID extracted from the XML. | |
| Returns: | |
| tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid Supplier ID. | |
| """ | |
| try: | |
| # Extract list of valid supplier IDs from the reference file | |
| valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers] | |
| # Define a prompt template for the AI to suggest corrections | |
| prompt = PromptTemplate( | |
| input_variables=["supplier_id", "valid_supplier_ids"], | |
| template="The Supplier ID is '{supplier_id}'. The valid Supplier IDs are: {valid_supplier_ids}. Suggest the closest correct Supplier ID.", | |
| ) | |
| # Initialize the AI model for generating suggestions | |
| llm = ChatOpenAI( | |
| model="gpt-4", temperature=0.5, openai_api_key=config.openai_api_key | |
| ) | |
| chain = LLMChain(llm=llm, prompt=prompt) | |
| # Use the AI model to generate a suggestion based on the prompt | |
| suggestion_sentence = chain.run( | |
| supplier_id=supplier_id, valid_supplier_ids=", ".join(valid_supplier_ids) | |
| ) | |
| # Find the closest match from the reference list based on the AI suggestion | |
| best_match = find_best_match(suggestion_sentence, valid_supplier_ids) | |
| # Return the AI-generated suggestion and the best match | |
| return suggestion_sentence, best_match | |
| except Exception: | |
| return None, None | |
| def find_best_match(sentence, reference_list): | |
| """ | |
| Finds the closest matching item in a reference list based on a given sentence. | |
| Args: | |
| sentence: A sentence or phrase containing potential matching items. | |
| reference_list: A list of valid items to compare against. | |
| Returns: | |
| str or None: The closest matching item from the reference list, or None if no match is found. | |
| """ | |
| try: | |
| # Split the sentence into words for comparison | |
| words_in_sentence = sentence.split() | |
| # Iterate through words to find the best match from the reference list | |
| best_match = None | |
| for word in words_in_sentence: | |
| # Clean word by removing common punctuation | |
| clean_word = word.strip(".,!?\"'") | |
| # Check for the closest match from the reference list | |
| closest_match = get_close_matches(clean_word, reference_list, n=1) | |
| if closest_match: | |
| # Return the first closest match found | |
| best_match = closest_match[0] | |
| break | |
| return best_match | |
| except Exception: | |
| return None | |
| def correct_incoterm_in_xml(file, corrected_value): | |
| """ | |
| Applies the AI-suggested correction to the XML file and returns the corrected XML. | |
| Args: | |
| file: The uploaded XML file. | |
| corrected_value: The suggested correction to apply. | |
| Returns: | |
| str: The corrected XML as a string. | |
| """ | |
| try: | |
| # Reset the file pointer | |
| file.seek(0) | |
| # Parse the XML file and apply the correction | |
| xml_data = xmltodict.parse(file.read()) | |
| xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][ | |
| "DeliveryTerms" | |
| ]["Incoterms"]["TransferLocationName"] = corrected_value | |
| # Convert the updated dictionary back to XML | |
| corrected_xml = xmltodict.unparse(xml_data, pretty=True) | |
| return corrected_xml | |
| except Exception: | |
| return None | |
| def apply_combined_corrections( | |
| file, incoterm_correction=None, supplier_correction=None | |
| ): | |
| """ | |
| Applies both Incoterm and Supplier corrections to the XML file and returns the corrected XML. | |
| Args: | |
| file: The uploaded XML file. | |
| incoterm_correction: Suggested Incoterm correction. | |
| supplier_correction: Suggested Supplier correction (new supplier ID). | |
| Returns: | |
| str: The corrected XML as a string. | |
| """ | |
| tags_to_update = [ | |
| ("ShipperParty", "InternalID"), | |
| ("ShipperParty", "AdditionalInternalID"), | |
| ("ShipFromLocation", "InternalID"), | |
| ("ShipFromLocation", "PartyInternalID"), | |
| ] | |
| try: | |
| # Reset the file pointer | |
| file.seek(0) | |
| # Parse the XML file and apply corrections | |
| xml_data = xmltodict.parse(file.read()) | |
| # Apply Incoterm correction if provided | |
| if incoterm_correction: | |
| xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][ | |
| "DeliveryTerms" | |
| ]["Incoterms"]["TransferLocationName"] = incoterm_correction | |
| # Apply Supplier correction to all relevant tags if provided | |
| if supplier_correction: | |
| for parent_tag, child_tag in tags_to_update: | |
| try: | |
| if ( | |
| child_tag | |
| in xml_data["n0:TransportationRequestSUITERequest"][ | |
| "TransportationRequest" | |
| ][parent_tag] | |
| ): | |
| xml_data["n0:TransportationRequestSUITERequest"][ | |
| "TransportationRequest" | |
| ][parent_tag][child_tag] = supplier_correction | |
| except KeyError: | |
| # Skip if tag is missing | |
| continue | |
| # Convert the updated dictionary back to XML | |
| corrected_xml = xmltodict.unparse(xml_data, pretty=True) | |
| return corrected_xml | |
| except Exception as e: | |
| raise ValueError(f"Error applying corrections: {str(e)}") | |