nghweigeok commited on
Commit
edc5316
·
verified ·
1 Parent(s): 134fe14

Upload 6 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the latest official Python image as the base
2
+ FROM python:3.11-slim
3
+
4
+ # Set the working directory to the root directory
5
+ WORKDIR /
6
+
7
+ # Install system dependencies required to build some Python packages
8
+ RUN apt-get update && apt-get install -y \
9
+ python3-pip \
10
+ python3-dev \
11
+ cmake \
12
+ libfreetype6-dev \
13
+ libxft-dev \
14
+ libpcre2-dev \
15
+ liblzma-dev \
16
+ && apt-get clean
17
+
18
+ # Copy requirements.txt from the project root to the container's root directory
19
+ COPY requirements.txt /requirements.txt
20
+
21
+ # Install Python dependencies from requirements.txt
22
+ RUN pip3 install --no-cache-dir -r /requirements.txt
23
+
24
+ # Copy the application code and data to their respective directories
25
+ COPY app /app
26
+ COPY data_source /data_source
27
+
28
+ # Expose the port Streamlit will run on
29
+ EXPOSE 7860
30
+
31
+ # Command to run the Streamlit app
32
+ CMD ["streamlit", "run", "app/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app/app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import xmltodict
4
+ import streamlit as st
5
+
6
+ # Import backend functions
7
+ from backend import parse_and_validate_xml, get_correction_suggestion
8
+
9
+ # File paths for reference locations and corrected XML output
10
+ LOCATIONS = "./data_source/reference_locations.json"
11
+ CORRECTED = "corrected_file.xml"
12
+
13
+
14
+ def main():
15
+ """
16
+ The main function sets up the Streamlit web application interface for XML validation.
17
+ It allows users to upload XML files, validates Incoterms location against a reference list,
18
+ and provides AI-based suggestions for corrections if needed.
19
+ """
20
+ # Configure Streamlit page layout and initial settings
21
+ st.set_page_config(
22
+ page_title="TM XML Validator",
23
+ layout="centered",
24
+ initial_sidebar_state="auto",
25
+ )
26
+
27
+ # Display the title and description of the app
28
+ st.title("TM XML Validator")
29
+ st.write(
30
+ "Upload your XML file to validate the Incoterms location and get automatic corrections if needed."
31
+ )
32
+
33
+ # Load the JSON file containing valid Incoterms locations
34
+ with open(LOCATIONS, "r") as f:
35
+ reference_locations = json.load(f) # Load a list of valid locations
36
+
37
+ # File upload widget for XML files
38
+ uploaded_file = st.file_uploader("Upload an XML file", type="xml")
39
+
40
+ if uploaded_file:
41
+ # Validate the uploaded XML file
42
+ status, original_location = parse_and_validate_xml(
43
+ uploaded_file, reference_locations
44
+ )
45
+
46
+ # Display validation results based on the XML content
47
+ if status == "valid":
48
+ st.success("The XML file is valid! The location is correctly mapped.")
49
+ st.markdown(f"**Location Name:** {original_location}")
50
+ elif status == "invalid":
51
+ st.error("The XML file has an error!")
52
+ st.markdown(f"**Incorrect Location Name:** {original_location}")
53
+
54
+ # Get AI-generated correction suggestion and the closest matching valid location
55
+ suggestion_sentence, ai_suggestion = get_correction_suggestion(
56
+ original_location, reference_locations
57
+ )
58
+
59
+ if ai_suggestion:
60
+ st.warning(f"Suggested Correction: {suggestion_sentence}")
61
+
62
+ # Prepare the corrected XML file using the AI suggestion
63
+ try:
64
+ # Convert XML to a dictionary, modify the incorrect location, and convert it back
65
+ uploaded_file.seek(0) # Reset file pointer to the beginning
66
+ xml_data = xmltodict.parse(uploaded_file.read())
67
+ xml_data["n0:TransportationRequestSUITERequest"][
68
+ "TransportationRequest"
69
+ ]["DeliveryTerms"]["Incoterms"][
70
+ "TransferLocationName"
71
+ ] = ai_suggestion # Apply only the best-matched location
72
+ corrected_xml = xmltodict.unparse(xml_data, pretty=True)
73
+ except Exception as e:
74
+ st.error(f"Error correcting XML: {e}")
75
+
76
+ # Provide a download button for the corrected XML file
77
+ st.download_button(
78
+ label="Accept AI Suggestion and Download Corrected XML",
79
+ data=corrected_xml,
80
+ file_name=CORRECTED,
81
+ mime="application/xml",
82
+ )
83
+
84
+
85
+ # Entry point of the app
86
+ if __name__ == "__main__":
87
+ main()
app/backend.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import openai
4
+ import xmltodict
5
+ from difflib import get_close_matches
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain.chains import LLMChain
8
+ from langchain.prompts import PromptTemplate
9
+
10
+ # Import configuration module
11
+ import config
12
+
13
+ # Initialize OpenAI client with API key from the configuration
14
+ try:
15
+ openai.api_key = config.openai_api_key
16
+ config.logger.info("OpenAI client initialized successfully.")
17
+ except Exception as e:
18
+ config.logger.error(f"Error initializing OpenAI client: {e}")
19
+ raise e
20
+
21
+
22
+ def parse_and_validate_xml(file, reference_list):
23
+ """
24
+ Parses the XML file and validates the Incoterms location against a reference list.
25
+
26
+ Args:
27
+ file: The XML file uploaded by the user.
28
+ reference_list: A list of valid Incoterms locations for validation.
29
+
30
+ Returns:
31
+ tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name.
32
+ """
33
+ try:
34
+ # Convert the XML file to a dictionary format for easy manipulation
35
+ xml_data = xmltodict.parse(file.read())
36
+ # Extract the TransferLocationName from the XML
37
+ location_name = xml_data["n0:TransportationRequestSUITERequest"][
38
+ "TransportationRequest"
39
+ ]["DeliveryTerms"]["Incoterms"]["TransferLocationName"]
40
+
41
+ # Check if the extracted location is valid
42
+ if location_name in reference_list:
43
+ return "valid", location_name
44
+ else:
45
+ return "invalid", location_name
46
+ except Exception:
47
+ return None, None
48
+
49
+
50
+ def get_correction_suggestion(location_name, reference_list):
51
+ """
52
+ Generates a correction suggestion using Langchain and finds the closest match in the reference list.
53
+
54
+ Args:
55
+ location_name: The incorrect location name extracted from the XML.
56
+ reference_list: A list of valid Incoterms locations for validation.
57
+
58
+ Returns:
59
+ tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location.
60
+ """
61
+ # Define a prompt template for the AI to suggest corrections
62
+ prompt = PromptTemplate(
63
+ input_variables=["location_name", "reference_list"],
64
+ template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.",
65
+ )
66
+
67
+ # Initialize the AI model for generating suggestions
68
+ llm = ChatOpenAI(
69
+ model="gpt-4o", temperature=0.5, openai_api_key=os.getenv("OPENAI_API_KEY")
70
+ )
71
+ chain = LLMChain(llm=llm, prompt=prompt)
72
+
73
+ # Use the AI model to generate a suggestion based on the prompt
74
+ suggestion_sentence = chain.run(
75
+ location_name=location_name, reference_list=", ".join(reference_list)
76
+ )
77
+
78
+ # Find the closest match from the reference list based on the AI suggestion
79
+ best_match = find_closest_match(suggestion_sentence, reference_list)
80
+
81
+ # Return the AI-generated suggestion and the best match
82
+ return suggestion_sentence, best_match
83
+
84
+
85
+ def find_closest_match(sentence, reference_list):
86
+ """
87
+ Finds the closest matching word in a reference list from a given sentence.
88
+
89
+ Args:
90
+ sentence: A sentence containing potential location names.
91
+ reference_list: A list of valid Incoterms locations for comparison.
92
+
93
+ Returns:
94
+ str or None: The closest matching location from the reference list, or None if no match is found.
95
+ """
96
+ # Split the sentence into words for comparison
97
+ words_in_sentence = sentence.split()
98
+
99
+ # Iterate through words to find the best match from the reference list
100
+ best_match = None
101
+ for word in words_in_sentence:
102
+ # Clean word by removing common punctuation
103
+ clean_word = word.strip(".,!?\"'")
104
+ # Check for the closest match from the reference list
105
+ closest_match = get_close_matches(clean_word, reference_list, n=1)
106
+ if closest_match:
107
+ # Return the first closest match found
108
+ best_match = closest_match[0]
109
+ break
110
+
111
+ return best_match
app/config.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+ # Initialize logging configuration
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
+
9
+ # Load environment variables from the .env file
10
+ load_dotenv()
11
+
12
+ # Retrieve API keys from environment variables
13
+ openai_api_key = os.getenv("OPENAI_API_KEY")
14
+
15
+ # Validate the presence of API keys
16
+ if not openai_api_key:
17
+ # Log an error message and raise an exception if any API key is missing
18
+ logger.error("API keys are not set properly.")
19
+ raise ValueError("API keys for OpenAI must be set in the .env file.")
20
+ else:
21
+ # Log a success message if both API keys are present
22
+ logger.info("API keys loaded successfully.")
data_source/reference_locations.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "Hamburg",
3
+ "New York",
4
+ "Los Angeles",
5
+ "Shanghai",
6
+ "Rotterdam",
7
+ "Singapore"
8
+ ]
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ streamlit
3
+ openai
4
+ langchain
5
+ xmltodict
6
+ pandas
7
+ langchain-openai
8
+ langchain-community