import os import json # import openai import xmltodict from difflib import get_close_matches from langchain_openai import ChatOpenAI from langchain.chains import LLMChain from langchain.prompts import PromptTemplate # Import the config module for logging and API key import config # Load reference data once try: with open("data_source/reference_locations.json", "r") as file: reference_locations = json.load(file) with open("data_source/reference_suppliers.json", "r") as file: reference_suppliers = json.load(file) config.logger.info("Reference data loaded successfully.") except Exception as e: config.logger.error(f"Failed to load reference data: {str(e)}") raise RuntimeError(f"Failed to load reference data: {str(e)}") def validate_incoterms_xml(file): """ Parses the XML file and validates the Incoterms location against a reference list. Args: file: The uploaded XML file. Returns: tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name. """ try: # Reset the file pointer file.seek(0) # Convert the XML file to a dictionary format for easy manipulation xml_data = xmltodict.parse(file.read()) # Extract the TransferLocationName from the XML location_name = xml_data["n0:TransportationRequestSUITERequest"][ "TransportationRequest" ]["DeliveryTerms"]["Incoterms"]["TransferLocationName"] # Check if the extracted location is valid if location_name in reference_locations: return "valid", location_name else: return "invalid", location_name except Exception: return None, None def suggest_correct_incoterm(location_name): """ Generates a correction suggestion using Langchain and finds the closest match in the reference list. Args: location_name: The incorrect location name extracted from the XML. Returns: tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location. """ try: # Define a prompt template for the AI to suggest corrections prompt = PromptTemplate( input_variables=["location_name", "reference_list"], template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.", ) # Initialize the AI model for generating suggestions llm = ChatOpenAI( model="gpt-4o", temperature=0.5, openai_api_key=config.openai_api_key ) chain = LLMChain(llm=llm, prompt=prompt) # Use the AI model to generate a suggestion based on the prompt suggestion_sentence = chain.run( location_name=location_name, reference_list=", ".join(reference_locations) ) # Find the closest match from the reference list based on the AI suggestion best_match = find_best_match(suggestion_sentence, reference_locations) # Return the AI-generated suggestion and the best match return suggestion_sentence, best_match except Exception: return None, None def validate_po_supplier_xml(file): """ Parses the XML file and validates the Supplier IDs for 4 specific tags against a reference list. Args: file: The uploaded XML file. Returns: tuple: A tuple containing the validation status ('valid' or 'invalid'), the problematic Supplier ID, and the tag path. """ tags_to_check = [ ("ShipperParty", "InternalID"), ("ShipperParty", "AdditionalInternalID"), ("ShipFromLocation", "InternalID"), ("ShipFromLocation", "PartyInternalID"), ] try: # Reset the file pointer file.seek(0) # Convert the XML file to a dictionary format for easy manipulation xml_data = xmltodict.parse(file.read()) # Extract list of valid supplier IDs from the reference file valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers] # Iterate through tags and validate Supplier IDs for parent_tag, child_tag in tags_to_check: try: supplier_id = str( xml_data["n0:TransportationRequestSUITERequest"][ "TransportationRequest" ][parent_tag][child_tag] ) if supplier_id not in valid_supplier_ids: # Return invalid status and the problematic supplier ID return "invalid", supplier_id, f"{parent_tag}/{child_tag}" except KeyError: # Skip if the tag is missing continue # If all tags are valid or missing return "valid", supplier_id, None except Exception: return None, None, None def suggest_correct_supplier(supplier_id): """ Generates a correction suggestion using Langchain and finds the closest match in the reference list. Args: supplier_id: The incorrect Supplier ID extracted from the XML. Returns: tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid Supplier ID. """ try: # Extract list of valid supplier IDs from the reference file valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers] # Define a prompt template for the AI to suggest corrections prompt = PromptTemplate( input_variables=["supplier_id", "valid_supplier_ids"], template="The Supplier ID is '{supplier_id}'. The valid Supplier IDs are: {valid_supplier_ids}. Suggest the closest correct Supplier ID.", ) # Initialize the AI model for generating suggestions llm = ChatOpenAI( model="gpt-4", temperature=0.5, openai_api_key=config.openai_api_key ) chain = LLMChain(llm=llm, prompt=prompt) # Use the AI model to generate a suggestion based on the prompt suggestion_sentence = chain.run( supplier_id=supplier_id, valid_supplier_ids=", ".join(valid_supplier_ids) ) # Find the closest match from the reference list based on the AI suggestion best_match = find_best_match(suggestion_sentence, valid_supplier_ids) # Return the AI-generated suggestion and the best match return suggestion_sentence, best_match except Exception: return None, None def find_best_match(sentence, reference_list): """ Finds the closest matching item in a reference list based on a given sentence. Args: sentence: A sentence or phrase containing potential matching items. reference_list: A list of valid items to compare against. Returns: str or None: The closest matching item from the reference list, or None if no match is found. """ try: # Split the sentence into words for comparison words_in_sentence = sentence.split() # Iterate through words to find the best match from the reference list best_match = None for word in words_in_sentence: # Clean word by removing common punctuation clean_word = word.strip(".,!?\"'") # Check for the closest match from the reference list closest_match = get_close_matches(clean_word, reference_list, n=1) if closest_match: # Return the first closest match found best_match = closest_match[0] break return best_match except Exception: return None def correct_incoterm_in_xml(file, corrected_value): """ Applies the AI-suggested correction to the XML file and returns the corrected XML. Args: file: The uploaded XML file. corrected_value: The suggested correction to apply. Returns: str: The corrected XML as a string. """ try: # Reset the file pointer file.seek(0) # Parse the XML file and apply the correction xml_data = xmltodict.parse(file.read()) xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][ "DeliveryTerms" ]["Incoterms"]["TransferLocationName"] = corrected_value # Convert the updated dictionary back to XML corrected_xml = xmltodict.unparse(xml_data, pretty=True) return corrected_xml except Exception: return None def apply_combined_corrections( file, incoterm_correction=None, supplier_correction=None ): """ Applies both Incoterm and Supplier corrections to the XML file and returns the corrected XML. Args: file: The uploaded XML file. incoterm_correction: Suggested Incoterm correction. supplier_correction: Suggested Supplier correction (new supplier ID). Returns: str: The corrected XML as a string. """ tags_to_update = [ ("ShipperParty", "InternalID"), ("ShipperParty", "AdditionalInternalID"), ("ShipFromLocation", "InternalID"), ("ShipFromLocation", "PartyInternalID"), ] try: # Reset the file pointer file.seek(0) # Parse the XML file and apply corrections xml_data = xmltodict.parse(file.read()) # Apply Incoterm correction if provided if incoterm_correction: xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][ "DeliveryTerms" ]["Incoterms"]["TransferLocationName"] = incoterm_correction # Apply Supplier correction to all relevant tags if provided if supplier_correction: for parent_tag, child_tag in tags_to_update: try: if ( child_tag in xml_data["n0:TransportationRequestSUITERequest"][ "TransportationRequest" ][parent_tag] ): xml_data["n0:TransportationRequestSUITERequest"][ "TransportationRequest" ][parent_tag][child_tag] = supplier_correction except KeyError: # Skip if tag is missing continue # Convert the updated dictionary back to XML corrected_xml = xmltodict.unparse(xml_data, pretty=True) return corrected_xml except Exception as e: raise ValueError(f"Error applying corrections: {str(e)}")