Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- Dockerfile +32 -0
- app/app.py +87 -0
- app/backend.py +111 -0
- app/config.py +22 -0
- data_source/reference_locations.json +8 -0
- requirements.txt +8 -0
Dockerfile
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use the latest official Python image as the base
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory to the root directory
|
| 5 |
+
WORKDIR /
|
| 6 |
+
|
| 7 |
+
# Install system dependencies required to build some Python packages
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
python3-pip \
|
| 10 |
+
python3-dev \
|
| 11 |
+
cmake \
|
| 12 |
+
libfreetype6-dev \
|
| 13 |
+
libxft-dev \
|
| 14 |
+
libpcre2-dev \
|
| 15 |
+
liblzma-dev \
|
| 16 |
+
&& apt-get clean
|
| 17 |
+
|
| 18 |
+
# Copy requirements.txt from the project root to the container's root directory
|
| 19 |
+
COPY requirements.txt /requirements.txt
|
| 20 |
+
|
| 21 |
+
# Install Python dependencies from requirements.txt
|
| 22 |
+
RUN pip3 install --no-cache-dir -r /requirements.txt
|
| 23 |
+
|
| 24 |
+
# Copy the application code and data to their respective directories
|
| 25 |
+
COPY app /app
|
| 26 |
+
COPY data_source /data_source
|
| 27 |
+
|
| 28 |
+
# Expose the port Streamlit will run on
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
# Command to run the Streamlit app
|
| 32 |
+
CMD ["streamlit", "run", "app/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
|
app/app.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import xmltodict
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
# Import backend functions
|
| 7 |
+
from backend import parse_and_validate_xml, get_correction_suggestion
|
| 8 |
+
|
| 9 |
+
# File paths for reference locations and corrected XML output
|
| 10 |
+
LOCATIONS = "./data_source/reference_locations.json"
|
| 11 |
+
CORRECTED = "corrected_file.xml"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
"""
|
| 16 |
+
The main function sets up the Streamlit web application interface for XML validation.
|
| 17 |
+
It allows users to upload XML files, validates Incoterms location against a reference list,
|
| 18 |
+
and provides AI-based suggestions for corrections if needed.
|
| 19 |
+
"""
|
| 20 |
+
# Configure Streamlit page layout and initial settings
|
| 21 |
+
st.set_page_config(
|
| 22 |
+
page_title="TM XML Validator",
|
| 23 |
+
layout="centered",
|
| 24 |
+
initial_sidebar_state="auto",
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Display the title and description of the app
|
| 28 |
+
st.title("TM XML Validator")
|
| 29 |
+
st.write(
|
| 30 |
+
"Upload your XML file to validate the Incoterms location and get automatic corrections if needed."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Load the JSON file containing valid Incoterms locations
|
| 34 |
+
with open(LOCATIONS, "r") as f:
|
| 35 |
+
reference_locations = json.load(f) # Load a list of valid locations
|
| 36 |
+
|
| 37 |
+
# File upload widget for XML files
|
| 38 |
+
uploaded_file = st.file_uploader("Upload an XML file", type="xml")
|
| 39 |
+
|
| 40 |
+
if uploaded_file:
|
| 41 |
+
# Validate the uploaded XML file
|
| 42 |
+
status, original_location = parse_and_validate_xml(
|
| 43 |
+
uploaded_file, reference_locations
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Display validation results based on the XML content
|
| 47 |
+
if status == "valid":
|
| 48 |
+
st.success("The XML file is valid! The location is correctly mapped.")
|
| 49 |
+
st.markdown(f"**Location Name:** {original_location}")
|
| 50 |
+
elif status == "invalid":
|
| 51 |
+
st.error("The XML file has an error!")
|
| 52 |
+
st.markdown(f"**Incorrect Location Name:** {original_location}")
|
| 53 |
+
|
| 54 |
+
# Get AI-generated correction suggestion and the closest matching valid location
|
| 55 |
+
suggestion_sentence, ai_suggestion = get_correction_suggestion(
|
| 56 |
+
original_location, reference_locations
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
if ai_suggestion:
|
| 60 |
+
st.warning(f"Suggested Correction: {suggestion_sentence}")
|
| 61 |
+
|
| 62 |
+
# Prepare the corrected XML file using the AI suggestion
|
| 63 |
+
try:
|
| 64 |
+
# Convert XML to a dictionary, modify the incorrect location, and convert it back
|
| 65 |
+
uploaded_file.seek(0) # Reset file pointer to the beginning
|
| 66 |
+
xml_data = xmltodict.parse(uploaded_file.read())
|
| 67 |
+
xml_data["n0:TransportationRequestSUITERequest"][
|
| 68 |
+
"TransportationRequest"
|
| 69 |
+
]["DeliveryTerms"]["Incoterms"][
|
| 70 |
+
"TransferLocationName"
|
| 71 |
+
] = ai_suggestion # Apply only the best-matched location
|
| 72 |
+
corrected_xml = xmltodict.unparse(xml_data, pretty=True)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
st.error(f"Error correcting XML: {e}")
|
| 75 |
+
|
| 76 |
+
# Provide a download button for the corrected XML file
|
| 77 |
+
st.download_button(
|
| 78 |
+
label="Accept AI Suggestion and Download Corrected XML",
|
| 79 |
+
data=corrected_xml,
|
| 80 |
+
file_name=CORRECTED,
|
| 81 |
+
mime="application/xml",
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# Entry point of the app
|
| 86 |
+
if __name__ == "__main__":
|
| 87 |
+
main()
|
app/backend.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import openai
|
| 4 |
+
import xmltodict
|
| 5 |
+
from difflib import get_close_matches
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from langchain.chains import LLMChain
|
| 8 |
+
from langchain.prompts import PromptTemplate
|
| 9 |
+
|
| 10 |
+
# Import configuration module
|
| 11 |
+
import config
|
| 12 |
+
|
| 13 |
+
# Initialize OpenAI client with API key from the configuration
|
| 14 |
+
try:
|
| 15 |
+
openai.api_key = config.openai_api_key
|
| 16 |
+
config.logger.info("OpenAI client initialized successfully.")
|
| 17 |
+
except Exception as e:
|
| 18 |
+
config.logger.error(f"Error initializing OpenAI client: {e}")
|
| 19 |
+
raise e
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def parse_and_validate_xml(file, reference_list):
|
| 23 |
+
"""
|
| 24 |
+
Parses the XML file and validates the Incoterms location against a reference list.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
file: The XML file uploaded by the user.
|
| 28 |
+
reference_list: A list of valid Incoterms locations for validation.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name.
|
| 32 |
+
"""
|
| 33 |
+
try:
|
| 34 |
+
# Convert the XML file to a dictionary format for easy manipulation
|
| 35 |
+
xml_data = xmltodict.parse(file.read())
|
| 36 |
+
# Extract the TransferLocationName from the XML
|
| 37 |
+
location_name = xml_data["n0:TransportationRequestSUITERequest"][
|
| 38 |
+
"TransportationRequest"
|
| 39 |
+
]["DeliveryTerms"]["Incoterms"]["TransferLocationName"]
|
| 40 |
+
|
| 41 |
+
# Check if the extracted location is valid
|
| 42 |
+
if location_name in reference_list:
|
| 43 |
+
return "valid", location_name
|
| 44 |
+
else:
|
| 45 |
+
return "invalid", location_name
|
| 46 |
+
except Exception:
|
| 47 |
+
return None, None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_correction_suggestion(location_name, reference_list):
|
| 51 |
+
"""
|
| 52 |
+
Generates a correction suggestion using Langchain and finds the closest match in the reference list.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
location_name: The incorrect location name extracted from the XML.
|
| 56 |
+
reference_list: A list of valid Incoterms locations for validation.
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location.
|
| 60 |
+
"""
|
| 61 |
+
# Define a prompt template for the AI to suggest corrections
|
| 62 |
+
prompt = PromptTemplate(
|
| 63 |
+
input_variables=["location_name", "reference_list"],
|
| 64 |
+
template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.",
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Initialize the AI model for generating suggestions
|
| 68 |
+
llm = ChatOpenAI(
|
| 69 |
+
model="gpt-4o", temperature=0.5, openai_api_key=os.getenv("OPENAI_API_KEY")
|
| 70 |
+
)
|
| 71 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
| 72 |
+
|
| 73 |
+
# Use the AI model to generate a suggestion based on the prompt
|
| 74 |
+
suggestion_sentence = chain.run(
|
| 75 |
+
location_name=location_name, reference_list=", ".join(reference_list)
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Find the closest match from the reference list based on the AI suggestion
|
| 79 |
+
best_match = find_closest_match(suggestion_sentence, reference_list)
|
| 80 |
+
|
| 81 |
+
# Return the AI-generated suggestion and the best match
|
| 82 |
+
return suggestion_sentence, best_match
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def find_closest_match(sentence, reference_list):
|
| 86 |
+
"""
|
| 87 |
+
Finds the closest matching word in a reference list from a given sentence.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
sentence: A sentence containing potential location names.
|
| 91 |
+
reference_list: A list of valid Incoterms locations for comparison.
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
str or None: The closest matching location from the reference list, or None if no match is found.
|
| 95 |
+
"""
|
| 96 |
+
# Split the sentence into words for comparison
|
| 97 |
+
words_in_sentence = sentence.split()
|
| 98 |
+
|
| 99 |
+
# Iterate through words to find the best match from the reference list
|
| 100 |
+
best_match = None
|
| 101 |
+
for word in words_in_sentence:
|
| 102 |
+
# Clean word by removing common punctuation
|
| 103 |
+
clean_word = word.strip(".,!?\"'")
|
| 104 |
+
# Check for the closest match from the reference list
|
| 105 |
+
closest_match = get_close_matches(clean_word, reference_list, n=1)
|
| 106 |
+
if closest_match:
|
| 107 |
+
# Return the first closest match found
|
| 108 |
+
best_match = closest_match[0]
|
| 109 |
+
break
|
| 110 |
+
|
| 111 |
+
return best_match
|
app/config.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Initialize logging configuration
|
| 6 |
+
logging.basicConfig(level=logging.INFO)
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
# Load environment variables from the .env file
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
# Retrieve API keys from environment variables
|
| 13 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
| 14 |
+
|
| 15 |
+
# Validate the presence of API keys
|
| 16 |
+
if not openai_api_key:
|
| 17 |
+
# Log an error message and raise an exception if any API key is missing
|
| 18 |
+
logger.error("API keys are not set properly.")
|
| 19 |
+
raise ValueError("API keys for OpenAI must be set in the .env file.")
|
| 20 |
+
else:
|
| 21 |
+
# Log a success message if both API keys are present
|
| 22 |
+
logger.info("API keys loaded successfully.")
|
data_source/reference_locations.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"Hamburg",
|
| 3 |
+
"New York",
|
| 4 |
+
"Los Angeles",
|
| 5 |
+
"Shanghai",
|
| 6 |
+
"Rotterdam",
|
| 7 |
+
"Singapore"
|
| 8 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python-dotenv
|
| 2 |
+
streamlit
|
| 3 |
+
openai
|
| 4 |
+
langchain
|
| 5 |
+
xmltodict
|
| 6 |
+
pandas
|
| 7 |
+
langchain-openai
|
| 8 |
+
langchain-community
|