xml-verifier-backup / app /backend.py
nghweigeok's picture
Add new feature: Verify and correct purchase order supplier id
b794a09 verified
import os
import json
# import openai
import xmltodict
from difflib import get_close_matches
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Import the config module for logging and API key
import config
# Load reference data once
try:
with open("data_source/reference_locations.json", "r") as file:
reference_locations = json.load(file)
with open("data_source/reference_suppliers.json", "r") as file:
reference_suppliers = json.load(file)
config.logger.info("Reference data loaded successfully.")
except Exception as e:
config.logger.error(f"Failed to load reference data: {str(e)}")
raise RuntimeError(f"Failed to load reference data: {str(e)}")
def validate_incoterms_xml(file):
"""
Parses the XML file and validates the Incoterms location against a reference list.
Args:
file: The uploaded XML file.
Returns:
tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name.
"""
try:
# Reset the file pointer
file.seek(0)
# Convert the XML file to a dictionary format for easy manipulation
xml_data = xmltodict.parse(file.read())
# Extract the TransferLocationName from the XML
location_name = xml_data["n0:TransportationRequestSUITERequest"][
"TransportationRequest"
]["DeliveryTerms"]["Incoterms"]["TransferLocationName"]
# Check if the extracted location is valid
if location_name in reference_locations:
return "valid", location_name
else:
return "invalid", location_name
except Exception:
return None, None
def suggest_correct_incoterm(location_name):
"""
Generates a correction suggestion using Langchain and finds the closest match in the reference list.
Args:
location_name: The incorrect location name extracted from the XML.
Returns:
tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location.
"""
try:
# Define a prompt template for the AI to suggest corrections
prompt = PromptTemplate(
input_variables=["location_name", "reference_list"],
template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.",
)
# Initialize the AI model for generating suggestions
llm = ChatOpenAI(
model="gpt-4o", temperature=0.5, openai_api_key=config.openai_api_key
)
chain = LLMChain(llm=llm, prompt=prompt)
# Use the AI model to generate a suggestion based on the prompt
suggestion_sentence = chain.run(
location_name=location_name, reference_list=", ".join(reference_locations)
)
# Find the closest match from the reference list based on the AI suggestion
best_match = find_best_match(suggestion_sentence, reference_locations)
# Return the AI-generated suggestion and the best match
return suggestion_sentence, best_match
except Exception:
return None, None
def validate_po_supplier_xml(file):
"""
Parses the XML file and validates the Supplier IDs for 4 specific tags against a reference list.
Args:
file: The uploaded XML file.
Returns:
tuple: A tuple containing the validation status ('valid' or 'invalid'), the problematic Supplier ID, and the tag path.
"""
tags_to_check = [
("ShipperParty", "InternalID"),
("ShipperParty", "AdditionalInternalID"),
("ShipFromLocation", "InternalID"),
("ShipFromLocation", "PartyInternalID"),
]
try:
# Reset the file pointer
file.seek(0)
# Convert the XML file to a dictionary format for easy manipulation
xml_data = xmltodict.parse(file.read())
# Extract list of valid supplier IDs from the reference file
valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers]
# Iterate through tags and validate Supplier IDs
for parent_tag, child_tag in tags_to_check:
try:
supplier_id = str(
xml_data["n0:TransportationRequestSUITERequest"][
"TransportationRequest"
][parent_tag][child_tag]
)
if supplier_id not in valid_supplier_ids:
# Return invalid status and the problematic supplier ID
return "invalid", supplier_id, f"{parent_tag}/{child_tag}"
except KeyError:
# Skip if the tag is missing
continue
# If all tags are valid or missing
return "valid", supplier_id, None
except Exception:
return None, None, None
def suggest_correct_supplier(supplier_id):
"""
Generates a correction suggestion using Langchain and finds the closest match in the reference list.
Args:
supplier_id: The incorrect Supplier ID extracted from the XML.
Returns:
tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid Supplier ID.
"""
try:
# Extract list of valid supplier IDs from the reference file
valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers]
# Define a prompt template for the AI to suggest corrections
prompt = PromptTemplate(
input_variables=["supplier_id", "valid_supplier_ids"],
template="The Supplier ID is '{supplier_id}'. The valid Supplier IDs are: {valid_supplier_ids}. Suggest the closest correct Supplier ID.",
)
# Initialize the AI model for generating suggestions
llm = ChatOpenAI(
model="gpt-4", temperature=0.5, openai_api_key=config.openai_api_key
)
chain = LLMChain(llm=llm, prompt=prompt)
# Use the AI model to generate a suggestion based on the prompt
suggestion_sentence = chain.run(
supplier_id=supplier_id, valid_supplier_ids=", ".join(valid_supplier_ids)
)
# Find the closest match from the reference list based on the AI suggestion
best_match = find_best_match(suggestion_sentence, valid_supplier_ids)
# Return the AI-generated suggestion and the best match
return suggestion_sentence, best_match
except Exception:
return None, None
def find_best_match(sentence, reference_list):
"""
Finds the closest matching item in a reference list based on a given sentence.
Args:
sentence: A sentence or phrase containing potential matching items.
reference_list: A list of valid items to compare against.
Returns:
str or None: The closest matching item from the reference list, or None if no match is found.
"""
try:
# Split the sentence into words for comparison
words_in_sentence = sentence.split()
# Iterate through words to find the best match from the reference list
best_match = None
for word in words_in_sentence:
# Clean word by removing common punctuation
clean_word = word.strip(".,!?\"'")
# Check for the closest match from the reference list
closest_match = get_close_matches(clean_word, reference_list, n=1)
if closest_match:
# Return the first closest match found
best_match = closest_match[0]
break
return best_match
except Exception:
return None
def correct_incoterm_in_xml(file, corrected_value):
"""
Applies the AI-suggested correction to the XML file and returns the corrected XML.
Args:
file: The uploaded XML file.
corrected_value: The suggested correction to apply.
Returns:
str: The corrected XML as a string.
"""
try:
# Reset the file pointer
file.seek(0)
# Parse the XML file and apply the correction
xml_data = xmltodict.parse(file.read())
xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][
"DeliveryTerms"
]["Incoterms"]["TransferLocationName"] = corrected_value
# Convert the updated dictionary back to XML
corrected_xml = xmltodict.unparse(xml_data, pretty=True)
return corrected_xml
except Exception:
return None
def apply_combined_corrections(
file, incoterm_correction=None, supplier_correction=None
):
"""
Applies both Incoterm and Supplier corrections to the XML file and returns the corrected XML.
Args:
file: The uploaded XML file.
incoterm_correction: Suggested Incoterm correction.
supplier_correction: Suggested Supplier correction (new supplier ID).
Returns:
str: The corrected XML as a string.
"""
tags_to_update = [
("ShipperParty", "InternalID"),
("ShipperParty", "AdditionalInternalID"),
("ShipFromLocation", "InternalID"),
("ShipFromLocation", "PartyInternalID"),
]
try:
# Reset the file pointer
file.seek(0)
# Parse the XML file and apply corrections
xml_data = xmltodict.parse(file.read())
# Apply Incoterm correction if provided
if incoterm_correction:
xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][
"DeliveryTerms"
]["Incoterms"]["TransferLocationName"] = incoterm_correction
# Apply Supplier correction to all relevant tags if provided
if supplier_correction:
for parent_tag, child_tag in tags_to_update:
try:
if (
child_tag
in xml_data["n0:TransportationRequestSUITERequest"][
"TransportationRequest"
][parent_tag]
):
xml_data["n0:TransportationRequestSUITERequest"][
"TransportationRequest"
][parent_tag][child_tag] = supplier_correction
except KeyError:
# Skip if tag is missing
continue
# Convert the updated dictionary back to XML
corrected_xml = xmltodict.unparse(xml_data, pretty=True)
return corrected_xml
except Exception as e:
raise ValueError(f"Error applying corrections: {str(e)}")