Spaces:

hgng
/

xml-verifier-backup

Sleeping

App Files Files Community

nghweigeok commited on Oct 28, 2024

Commit

edc5316

verified ·

1 Parent(s): 134fe14

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +32 -0
app/app.py +87 -0
app/backend.py +111 -0
app/config.py +22 -0
data_source/reference_locations.json +8 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+# Use the latest official Python image as the base
+FROM python:3.11-slim
+# Set the working directory to the root directory
+WORKDIR /
+# Install system dependencies required to build some Python packages
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    python3-dev \
+    cmake \
+    libfreetype6-dev \
+    libxft-dev \
+    libpcre2-dev \
+    liblzma-dev \
+    && apt-get clean
+# Copy requirements.txt from the project root to the container's root directory
+COPY requirements.txt /requirements.txt
+# Install Python dependencies from requirements.txt
+RUN pip3 install --no-cache-dir -r /requirements.txt
+# Copy the application code and data to their respective directories
+COPY app /app
+COPY data_source /data_source
+# Expose the port Streamlit will run on
+EXPOSE 7860
+# Command to run the Streamlit app
+CMD ["streamlit", "run", "app/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]

app/app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import json
+import xmltodict
+import streamlit as st
+# Import backend functions
+from backend import parse_and_validate_xml, get_correction_suggestion
+# File paths for reference locations and corrected XML output
+LOCATIONS = "./data_source/reference_locations.json"
+CORRECTED = "corrected_file.xml"
+def main():
+    """
+    The main function sets up the Streamlit web application interface for XML validation.
+    It allows users to upload XML files, validates Incoterms location against a reference list,
+    and provides AI-based suggestions for corrections if needed.
+    """
+    # Configure Streamlit page layout and initial settings
+    st.set_page_config(
+        page_title="TM XML Validator",
+        layout="centered",
+        initial_sidebar_state="auto",
+    )
+    # Display the title and description of the app
+    st.title("TM XML Validator")
+    st.write(
+        "Upload your XML file to validate the Incoterms location and get automatic corrections if needed."
+    )
+    # Load the JSON file containing valid Incoterms locations
+    with open(LOCATIONS, "r") as f:
+        reference_locations = json.load(f)  # Load a list of valid locations
+    # File upload widget for XML files
+    uploaded_file = st.file_uploader("Upload an XML file", type="xml")
+    if uploaded_file:
+        # Validate the uploaded XML file
+        status, original_location = parse_and_validate_xml(
+            uploaded_file, reference_locations
+        )
+        # Display validation results based on the XML content
+        if status == "valid":
+            st.success("The XML file is valid! The location is correctly mapped.")
+            st.markdown(f"**Location Name:** {original_location}")
+        elif status == "invalid":
+            st.error("The XML file has an error!")
+            st.markdown(f"**Incorrect Location Name:** {original_location}")
+            # Get AI-generated correction suggestion and the closest matching valid location
+            suggestion_sentence, ai_suggestion = get_correction_suggestion(
+                original_location, reference_locations
+            )
+            if ai_suggestion:
+                st.warning(f"Suggested Correction: {suggestion_sentence}")
+                # Prepare the corrected XML file using the AI suggestion
+                try:
+                    # Convert XML to a dictionary, modify the incorrect location, and convert it back
+                    uploaded_file.seek(0)  # Reset file pointer to the beginning
+                    xml_data = xmltodict.parse(uploaded_file.read())
+                    xml_data["n0:TransportationRequestSUITERequest"][
+                        "TransportationRequest"
+                    ]["DeliveryTerms"]["Incoterms"][
+                        "TransferLocationName"
+                    ] = ai_suggestion  # Apply only the best-matched location
+                    corrected_xml = xmltodict.unparse(xml_data, pretty=True)
+                except Exception as e:
+                    st.error(f"Error correcting XML: {e}")
+                # Provide a download button for the corrected XML file
+                st.download_button(
+                    label="Accept AI Suggestion and Download Corrected XML",
+                    data=corrected_xml,
+                    file_name=CORRECTED,
+                    mime="application/xml",
+                )
+# Entry point of the app
+if __name__ == "__main__":
+    main()

app/backend.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import json
+import openai
+import xmltodict
+from difflib import get_close_matches
+from langchain_openai import ChatOpenAI
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+# Import configuration module
+import config
+# Initialize OpenAI client with API key from the configuration
+try:
+    openai.api_key = config.openai_api_key
+    config.logger.info("OpenAI client initialized successfully.")
+except Exception as e:
+    config.logger.error(f"Error initializing OpenAI client: {e}")
+    raise e
+def parse_and_validate_xml(file, reference_list):
+    """
+    Parses the XML file and validates the Incoterms location against a reference list.
+    Args:
+        file: The XML file uploaded by the user.
+        reference_list: A list of valid Incoterms locations for validation.
+    Returns:
+        tuple: A tuple containing the validation status ('valid' or 'invalid') and the original location name.
+    """
+    try:
+        # Convert the XML file to a dictionary format for easy manipulation
+        xml_data = xmltodict.parse(file.read())
+        # Extract the TransferLocationName from the XML
+        location_name = xml_data["n0:TransportationRequestSUITERequest"][
+            "TransportationRequest"
+        ]["DeliveryTerms"]["Incoterms"]["TransferLocationName"]
+        # Check if the extracted location is valid
+        if location_name in reference_list:
+            return "valid", location_name
+        else:
+            return "invalid", location_name
+    except Exception:
+        return None, None
+def get_correction_suggestion(location_name, reference_list):
+    """
+    Generates a correction suggestion using Langchain and finds the closest match in the reference list.
+    Args:
+        location_name: The incorrect location name extracted from the XML.
+        reference_list: A list of valid Incoterms locations for validation.
+    Returns:
+        tuple: A tuple containing the AI-generated suggestion sentence and the best-matched valid location.
+    """
+    # Define a prompt template for the AI to suggest corrections
+    prompt = PromptTemplate(
+        input_variables=["location_name", "reference_list"],
+        template="The location name is '{location_name}'. The reference list is: {reference_list}. Suggest the closest correct location.",
+    )
+    # Initialize the AI model for generating suggestions
+    llm = ChatOpenAI(
+        model="gpt-4o", temperature=0.5, openai_api_key=os.getenv("OPENAI_API_KEY")
+    )
+    chain = LLMChain(llm=llm, prompt=prompt)
+    # Use the AI model to generate a suggestion based on the prompt
+    suggestion_sentence = chain.run(
+        location_name=location_name, reference_list=", ".join(reference_list)
+    )
+    # Find the closest match from the reference list based on the AI suggestion
+    best_match = find_closest_match(suggestion_sentence, reference_list)
+    # Return the AI-generated suggestion and the best match
+    return suggestion_sentence, best_match
+def find_closest_match(sentence, reference_list):
+    """
+    Finds the closest matching word in a reference list from a given sentence.
+    Args:
+        sentence: A sentence containing potential location names.
+        reference_list: A list of valid Incoterms locations for comparison.
+    Returns:
+        str or None: The closest matching location from the reference list, or None if no match is found.
+    """
+    # Split the sentence into words for comparison
+    words_in_sentence = sentence.split()
+    # Iterate through words to find the best match from the reference list
+    best_match = None
+    for word in words_in_sentence:
+        # Clean word by removing common punctuation
+        clean_word = word.strip(".,!?\"'")
+        # Check for the closest match from the reference list
+        closest_match = get_close_matches(clean_word, reference_list, n=1)
+        if closest_match:
+            # Return the first closest match found
+            best_match = closest_match[0]
+            break
+    return best_match

app/config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import logging
+import os
+from dotenv import load_dotenv
+# Initialize logging configuration
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables from the .env file
+load_dotenv()
+# Retrieve API keys from environment variables
+openai_api_key = os.getenv("OPENAI_API_KEY")
+# Validate the presence of API keys
+if not openai_api_key:
+    # Log an error message and raise an exception if any API key is missing
+    logger.error("API keys are not set properly.")
+    raise ValueError("API keys for OpenAI must be set in the .env file.")
+else:
+    # Log a success message if both API keys are present
+    logger.info("API keys loaded successfully.")

data_source/reference_locations.json ADDED Viewed

	@@ -0,0 +1,8 @@

+[
+    "Hamburg",
+    "New York",
+    "Los Angeles",
+    "Shanghai",
+    "Rotterdam",
+    "Singapore"
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+python-dotenv
+streamlit
+openai
+langchain
+xmltodict
+pandas
+langchain-openai
+langchain-community