Spaces:

SuriRaja
/

UC3-Raja

Sleeping

App Files Files Community

SuriRaja commited on Nov 19, 2024

Commit

b11b6bf

verified ·

1 Parent(s): a8af9de

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -123

app.py CHANGED Viewed

@@ -1,141 +1,78 @@
-import os
-import openai
 import streamlit as st
-import base64
-# Set up OpenAI API key
-openai.api_key = os.getenv("OPENAI_API_KEY")  # Replace with your API key if needed
-# Helper function to encode a file to base64
-def encode_file_to_base64(uploaded_file):
-    """Encode a file to base64."""
-    return base64.b64encode(uploaded_file.read()).decode("utf-8")
-# Function to call GPT for parsing
-def call_gpt_for_parsing(option, encoded_pdf, instructions):
-    """Send option, encoded PDF, and parsing instructions to GPT for processing."""
-    prompt = f"""
-    Welcome to PMP Auto-PO Generator. Please parse the provided PDF file based on the selected option and instructions.
-    Selected Option: {option}
-    Instructions:
-    {instructions}
-    PDF File (Base64 Encoded):
-    {encoded_pdf}
-    Return the parsed data in JSON format.
-    """
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",  # Use GPT-4 for higher accuracy if needed
-        messages=[{"role": "user", "content": prompt}],
-        max_tokens=3000
-    )
-    return response['choices'][0]['message']['content']
-# Instruction sets for each option
-instruction_sets = {
-    "Toshiba": """
-    Extract columns: Pos., Item Code, Unit, Delivery Date, Quantity, Basic Price, Discount, Cur., Amount, Sub Total.
-    Follow specific instructions for Item Code extraction:
-    - Identify Item Code blocks starting with a numeric code (e.g., 155569003011).
-    - Include all subsequent lines (e.g., descriptions, additional codes) until a new numeric block or section begins.
-    - Maintain the exact line order and formatting, preserving sub-lines.
-    """,
-    "BHEL": """
-    Extract columns: SI No, Material Description, Unit, Quantity, Dely Qty, Dely Date, Unit Rate, Value.
-    Follow instructions for Material Description block extraction:
-    - Include primary description (e.g., BPS 017507).
-    - Add Material Number, HSN Code, GST percentage.
-    """,
-    "Federal Electric": """
-    Extract columns: S. No, Material No, Material Description, Qty, Unit, Price, Delivery Date, Total Value, Vat%, Amount Incl.VAT.
-    Ensure all relevant data fields are included and validated.
-    """,
-    "AL NISF": """
-    Extract columns: Item, Description, Qty, Unit, Unit Price, Total Price.
-    Follow detailed instructions for structuring descriptions:
-    - Add a bold header 'DESCRIPTION'.
-    - Include Computer Code Number, Product Name, Designation Number, Dimensions, Serial Number, and Manufacturing Year.
-    """,
-    "Others": """
-    Perform dynamic field mapping to extract all relevant data fields.
-    - Ensure the fields are captured accurately.
-    """
-}
-# App State for Multi-Step Interaction
-if "step" not in st.session_state:
-    st.session_state["step"] = 1  # Initialize the step counter
-    st.session_state["selected_option"] = None
-    st.session_state["encoded_pdf"] = None
-    st.session_state["parsed_output"] = None
 # Streamlit app
 def main():
-    st.title("PMP Auto-PO Generator")
     # Step 1: Welcome and Option Selection
-    if st.session_state["step"] == 1:
-        st.write("Welcome to PMP Auto-PO Generator!")
-        st.write("Please choose from the following options:")
-        options = ["Toshiba", "BHEL", "Federal Electric", "AL NISF", "Others"]
-        selected_option = st.selectbox("Select an option:", options)
-        if st.button("Next"):
-            if not selected_option:
-                st.warning("Please select an option to proceed.")
-            else:
-                st.session_state["selected_option"] = selected_option
-                st.session_state["step"] = 2
     # Step 2: File Upload
-    elif st.session_state["step"] == 2:
-        st.write(f"Thanks for selecting {st.session_state['selected_option']}. Please upload your PO file.")
-        uploaded_file = st.file_uploader("Upload your PO file (PDF format only):", type=["pdf"])
-        if uploaded_file:
-            if uploaded_file.type != "application/pdf":
-                st.error("Invalid file format. Please upload a PDF file.")
-            else:
-                st.session_state["encoded_pdf"] = encode_file_to_base64(uploaded_file)
-                st.session_state["step"] = 3
-    # Step 3: Call GPT for Processing
-    elif st.session_state["step"] == 3:
-        st.write("Processing your file with GPT...")
-        instructions = instruction_sets[st.session_state["selected_option"]]
-        try:
-            parsed_output = call_gpt_for_parsing(
-                st.session_state["selected_option"],
-                st.session_state["encoded_pdf"],
-                instructions
-            )
-            st.session_state["parsed_output"] = parsed_output
-            st.session_state["step"] = 4
-        except Exception as e:
-            st.error(f"Error during GPT processing: {e}")
-    # Step 4: Review and Download
-    elif st.session_state["step"] == 4:
-        st.write("Parsing successful! Below is the extracted data:")
-        st.json(st.session_state["parsed_output"])
-        if st.button("Download as JSON"):
-            st.download_button(
-                label="Download JSON",
-                data=st.session_state["parsed_output"],
-                file_name="parsed_output.json",
-                mime="application/json"
-            )
-        if st.button("Start Over"):
-            st.session_state["step"] = 1
-            st.session_state["selected_option"] = None
-            st.session_state["encoded_pdf"] = None
-            st.session_state["parsed_output"] = None
 if __name__ == "__main__":
     main()

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load GPT-Neo model and tokenizer from Hugging Face
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
+    return tokenizer, model
+# Helper function to generate text
+def generate_response(prompt, tokenizer, model):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, max_new_tokens=500)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Streamlit app
 def main():
+    st.title("PMP Auto-PO Generator (Free GPT-Neo Model)")
     # Step 1: Welcome and Option Selection
+    st.write("Welcome! Please choose an option:")
+    options = ["Toshiba", "BHEL", "Federal Electric", "AL NISF", "Others"]
+    selected_option = st.selectbox("Select an option:", options)
+    if not selected_option:
+        st.warning("Please select an option to proceed.")
+        return
     # Step 2: File Upload
+    uploaded_file = st.file_uploader("Upload your PO file (PDF format only):", type=["pdf"])
+    if not uploaded_file:
+        st.warning("Please upload a PDF file to proceed.")
+        return
+    # Step 3: Instructions for Parsing
+    instructions = st.text_area(
+        "Provide custom parsing instructions (based on your selection):",
+        placeholder="E.g., Extract columns like Pos., Item Code, Unit, etc."
+    )
+    if not instructions:
+        st.warning("Please provide parsing instructions to proceed.")
+        return
+    # Combine all inputs for the model prompt
+    prompt = f"""
+    Parse the following Purchase Order (PO) data based on the instructions provided.
+    Selected Option: {selected_option}
+    Instructions: {instructions}
+    PDF Content (Simulated for demo):
+    {uploaded_file.name} is the uploaded PDF. Extract the required details accordingly.
+    """
+    # Load model and tokenizer
+    st.write("Loading model...")
+    tokenizer, model = load_model()
+    # Generate response
+    st.write("Generating response...")
+    response = generate_response(prompt, tokenizer, model)
+    # Display results
+    st.write("Parsed Output:")
+    st.text_area("GPT-Neo Response", value=response, height=300)
+    # Download as JSON
+    if st.button("Download Response as JSON"):
+        st.download_button(
+            label="Download JSON",
+            data=response,
+            file_name="parsed_output.json",
+            mime="application/json"
+        )
 if __name__ == "__main__":
     main()