Spaces:

hgng
/

xml-verifier-backup

Sleeping

App Files Files Community

xml-verifier-backup / app /backend.py

nghweigeok

Add new feature: Verify and correct purchase order supplier id

b794a09 verified over 1 year ago

raw

history blame contribute delete

10.6 kB

	import os
	import json

	# import openai
	import xmltodict
	from difflib import get_close_matches
	from langchain_openai import ChatOpenAI
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate

	# Import the config module for logging and API key
	import config

	# Load reference data once
	try:
	with open("data_source/reference_locations.json", "r") as file:
	reference_locations = json.load(file)

	with open("data_source/reference_suppliers.json", "r") as file:
	reference_suppliers = json.load(file)

	config.logger.info("Reference data loaded successfully.")
	except Exception as e:
	config.logger.error(f"Failed to load reference data: {str(e)}")
	raise RuntimeError(f"Failed to load reference data: {str(e)}")


	def validate_incoterms_xml(file):
	"""
	Parses the XML file and validates the Incoterms location against a reference list.

	Args:
	file: The uploaded XML file.

	Returns:
	tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name.
	"""
	try:
	# Reset the file pointer
	file.seek(0)

	# Convert the XML file to a dictionary format for easy manipulation
	xml_data = xmltodict.parse(file.read())

	# Extract the TransferLocationName from the XML
	location_name = xml_data["n0:TransportationRequestSUITERequest"][
	"TransportationRequest"
	]["DeliveryTerms"]["Incoterms"]["TransferLocationName"]

	# Check if the extracted location is valid
	if location_name in reference_locations:
	return "valid", location_name
	else:
	return "invalid", location_name
	except Exception:
	return None, None


	def suggest_correct_incoterm(location_name):
	"""
	Generates a correction suggestion using Langchain and finds the closest match in the reference list.

	Args:
	location_name: The incorrect location name extracted from the XML.

	Returns:
	tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location.
	"""
	try:
	# Define a prompt template for the AI to suggest corrections
	prompt = PromptTemplate(
	input_variables=["location_name", "reference_list"],
	template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.",
	)

	# Initialize the AI model for generating suggestions
	llm = ChatOpenAI(
	model="gpt-4o", temperature=0.5, openai_api_key=config.openai_api_key
	)
	chain = LLMChain(llm=llm, prompt=prompt)

	# Use the AI model to generate a suggestion based on the prompt
	suggestion_sentence = chain.run(
	location_name=location_name, reference_list=", ".join(reference_locations)
	)

	# Find the closest match from the reference list based on the AI suggestion
	best_match = find_best_match(suggestion_sentence, reference_locations)

	# Return the AI-generated suggestion and the best match
	return suggestion_sentence, best_match

	except Exception:
	return None, None


	def validate_po_supplier_xml(file):
	"""
	Parses the XML file and validates the Supplier IDs for 4 specific tags against a reference list.

	Args:
	file: The uploaded XML file.

	Returns:
	tuple: A tuple containing the validation status ('valid' or 'invalid'), the problematic Supplier ID, and the tag path.
	"""
	tags_to_check = [
	("ShipperParty", "InternalID"),
	("ShipperParty", "AdditionalInternalID"),
	("ShipFromLocation", "InternalID"),
	("ShipFromLocation", "PartyInternalID"),
	]

	try:
	# Reset the file pointer
	file.seek(0)

	# Convert the XML file to a dictionary format for easy manipulation
	xml_data = xmltodict.parse(file.read())

	# Extract list of valid supplier IDs from the reference file
	valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers]

	# Iterate through tags and validate Supplier IDs
	for parent_tag, child_tag in tags_to_check:
	try:
	supplier_id = str(
	xml_data["n0:TransportationRequestSUITERequest"][
	"TransportationRequest"
	][parent_tag][child_tag]
	)

	if supplier_id not in valid_supplier_ids:
	# Return invalid status and the problematic supplier ID
	return "invalid", supplier_id, f"{parent_tag}/{child_tag}"
	except KeyError:
	# Skip if the tag is missing
	continue

	# If all tags are valid or missing
	return "valid", supplier_id, None
	except Exception:
	return None, None, None


	def suggest_correct_supplier(supplier_id):
	"""
	Generates a correction suggestion using Langchain and finds the closest match in the reference list.

	Args:
	supplier_id: The incorrect Supplier ID extracted from the XML.

	Returns:
	tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid Supplier ID.
	"""
	try:
	# Extract list of valid supplier IDs from the reference file
	valid_supplier_ids = [str(supplier["id"]) for supplier in reference_suppliers]

	# Define a prompt template for the AI to suggest corrections
	prompt = PromptTemplate(
	input_variables=["supplier_id", "valid_supplier_ids"],
	template="The Supplier ID is '{supplier_id}'. The valid Supplier IDs are: {valid_supplier_ids}. Suggest the closest correct Supplier ID.",
	)

	# Initialize the AI model for generating suggestions
	llm = ChatOpenAI(
	model="gpt-4", temperature=0.5, openai_api_key=config.openai_api_key
	)
	chain = LLMChain(llm=llm, prompt=prompt)

	# Use the AI model to generate a suggestion based on the prompt
	suggestion_sentence = chain.run(
	supplier_id=supplier_id, valid_supplier_ids=", ".join(valid_supplier_ids)
	)

	# Find the closest match from the reference list based on the AI suggestion
	best_match = find_best_match(suggestion_sentence, valid_supplier_ids)

	# Return the AI-generated suggestion and the best match
	return suggestion_sentence, best_match

	except Exception:
	return None, None


	def find_best_match(sentence, reference_list):
	"""
	Finds the closest matching item in a reference list based on a given sentence.

	Args:
	sentence: A sentence or phrase containing potential matching items.
	reference_list: A list of valid items to compare against.

	Returns:
	str or None: The closest matching item from the reference list, or None if no match is found.
	"""
	try:
	# Split the sentence into words for comparison
	words_in_sentence = sentence.split()

	# Iterate through words to find the best match from the reference list
	best_match = None
	for word in words_in_sentence:
	# Clean word by removing common punctuation
	clean_word = word.strip(".,!?\"'")
	# Check for the closest match from the reference list
	closest_match = get_close_matches(clean_word, reference_list, n=1)
	if closest_match:
	# Return the first closest match found
	best_match = closest_match[0]
	break
	return best_match

	except Exception:
	return None


	def correct_incoterm_in_xml(file, corrected_value):
	"""
	Applies the AI-suggested correction to the XML file and returns the corrected XML.

	Args:
	file: The uploaded XML file.
	corrected_value: The suggested correction to apply.

	Returns:
	str: The corrected XML as a string.
	"""
	try:
	# Reset the file pointer
	file.seek(0)

	# Parse the XML file and apply the correction
	xml_data = xmltodict.parse(file.read())
	xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][
	"DeliveryTerms"
	]["Incoterms"]["TransferLocationName"] = corrected_value

	# Convert the updated dictionary back to XML
	corrected_xml = xmltodict.unparse(xml_data, pretty=True)
	return corrected_xml

	except Exception:
	return None


	def apply_combined_corrections(
	file, incoterm_correction=None, supplier_correction=None
	):
	"""
	Applies both Incoterm and Supplier corrections to the XML file and returns the corrected XML.

	Args:
	file: The uploaded XML file.
	incoterm_correction: Suggested Incoterm correction.
	supplier_correction: Suggested Supplier correction (new supplier ID).

	Returns:
	str: The corrected XML as a string.
	"""
	tags_to_update = [
	("ShipperParty", "InternalID"),
	("ShipperParty", "AdditionalInternalID"),
	("ShipFromLocation", "InternalID"),
	("ShipFromLocation", "PartyInternalID"),
	]

	try:
	# Reset the file pointer
	file.seek(0)

	# Parse the XML file and apply corrections
	xml_data = xmltodict.parse(file.read())

	# Apply Incoterm correction if provided
	if incoterm_correction:
	xml_data["n0:TransportationRequestSUITERequest"]["TransportationRequest"][
	"DeliveryTerms"
	]["Incoterms"]["TransferLocationName"] = incoterm_correction

	# Apply Supplier correction to all relevant tags if provided
	if supplier_correction:
	for parent_tag, child_tag in tags_to_update:
	try:
	if (
	child_tag
	in xml_data["n0:TransportationRequestSUITERequest"][
	"TransportationRequest"
	][parent_tag]
	):
	xml_data["n0:TransportationRequestSUITERequest"][
	"TransportationRequest"
	][parent_tag][child_tag] = supplier_correction
	except KeyError:
	# Skip if tag is missing
	continue

	# Convert the updated dictionary back to XML
	corrected_xml = xmltodict.unparse(xml_data, pretty=True)
	return corrected_xml
	except Exception as e:
	raise ValueError(f"Error applying corrections: {str(e)}")