SuriRaja commited on
Commit
b11b6bf
·
verified ·
1 Parent(s): a8af9de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -123
app.py CHANGED
@@ -1,141 +1,78 @@
1
- import os
2
- import openai
3
  import streamlit as st
4
- import base64
5
 
6
- # Set up OpenAI API key
7
- openai.api_key = os.getenv("OPENAI_API_KEY") # Replace with your API key if needed
 
 
 
 
8
 
9
- # Helper function to encode a file to base64
10
- def encode_file_to_base64(uploaded_file):
11
- """Encode a file to base64."""
12
- return base64.b64encode(uploaded_file.read()).decode("utf-8")
13
-
14
- # Function to call GPT for parsing
15
- def call_gpt_for_parsing(option, encoded_pdf, instructions):
16
- """Send option, encoded PDF, and parsing instructions to GPT for processing."""
17
- prompt = f"""
18
- Welcome to PMP Auto-PO Generator. Please parse the provided PDF file based on the selected option and instructions.
19
-
20
- Selected Option: {option}
21
-
22
- Instructions:
23
- {instructions}
24
-
25
- PDF File (Base64 Encoded):
26
- {encoded_pdf}
27
-
28
- Return the parsed data in JSON format.
29
- """
30
- response = openai.ChatCompletion.create(
31
- model="gpt-3.5-turbo", # Use GPT-4 for higher accuracy if needed
32
- messages=[{"role": "user", "content": prompt}],
33
- max_tokens=3000
34
- )
35
- return response['choices'][0]['message']['content']
36
-
37
- # Instruction sets for each option
38
- instruction_sets = {
39
- "Toshiba": """
40
- Extract columns: Pos., Item Code, Unit, Delivery Date, Quantity, Basic Price, Discount, Cur., Amount, Sub Total.
41
- Follow specific instructions for Item Code extraction:
42
- - Identify Item Code blocks starting with a numeric code (e.g., 155569003011).
43
- - Include all subsequent lines (e.g., descriptions, additional codes) until a new numeric block or section begins.
44
- - Maintain the exact line order and formatting, preserving sub-lines.
45
- """,
46
- "BHEL": """
47
- Extract columns: SI No, Material Description, Unit, Quantity, Dely Qty, Dely Date, Unit Rate, Value.
48
- Follow instructions for Material Description block extraction:
49
- - Include primary description (e.g., BPS 017507).
50
- - Add Material Number, HSN Code, GST percentage.
51
- """,
52
- "Federal Electric": """
53
- Extract columns: S. No, Material No, Material Description, Qty, Unit, Price, Delivery Date, Total Value, Vat%, Amount Incl.VAT.
54
- Ensure all relevant data fields are included and validated.
55
- """,
56
- "AL NISF": """
57
- Extract columns: Item, Description, Qty, Unit, Unit Price, Total Price.
58
- Follow detailed instructions for structuring descriptions:
59
- - Add a bold header 'DESCRIPTION'.
60
- - Include Computer Code Number, Product Name, Designation Number, Dimensions, Serial Number, and Manufacturing Year.
61
- """,
62
- "Others": """
63
- Perform dynamic field mapping to extract all relevant data fields.
64
- - Ensure the fields are captured accurately.
65
- """
66
- }
67
-
68
- # App State for Multi-Step Interaction
69
- if "step" not in st.session_state:
70
- st.session_state["step"] = 1 # Initialize the step counter
71
- st.session_state["selected_option"] = None
72
- st.session_state["encoded_pdf"] = None
73
- st.session_state["parsed_output"] = None
74
 
75
  # Streamlit app
76
  def main():
77
- st.title("PMP Auto-PO Generator")
78
 
79
  # Step 1: Welcome and Option Selection
80
- if st.session_state["step"] == 1:
81
- st.write("Welcome to PMP Auto-PO Generator!")
82
- st.write("Please choose from the following options:")
83
- options = ["Toshiba", "BHEL", "Federal Electric", "AL NISF", "Others"]
84
- selected_option = st.selectbox("Select an option:", options)
85
 
86
- if st.button("Next"):
87
- if not selected_option:
88
- st.warning("Please select an option to proceed.")
89
- else:
90
- st.session_state["selected_option"] = selected_option
91
- st.session_state["step"] = 2
92
 
93
  # Step 2: File Upload
94
- elif st.session_state["step"] == 2:
95
- st.write(f"Thanks for selecting {st.session_state['selected_option']}. Please upload your PO file.")
96
- uploaded_file = st.file_uploader("Upload your PO file (PDF format only):", type=["pdf"])
97
-
98
- if uploaded_file:
99
- if uploaded_file.type != "application/pdf":
100
- st.error("Invalid file format. Please upload a PDF file.")
101
- else:
102
- st.session_state["encoded_pdf"] = encode_file_to_base64(uploaded_file)
103
- st.session_state["step"] = 3
 
 
 
104
 
105
- # Step 3: Call GPT for Processing
106
- elif st.session_state["step"] == 3:
107
- st.write("Processing your file with GPT...")
108
- instructions = instruction_sets[st.session_state["selected_option"]]
109
 
110
- try:
111
- parsed_output = call_gpt_for_parsing(
112
- st.session_state["selected_option"],
113
- st.session_state["encoded_pdf"],
114
- instructions
115
- )
116
- st.session_state["parsed_output"] = parsed_output
117
- st.session_state["step"] = 4
118
- except Exception as e:
119
- st.error(f"Error during GPT processing: {e}")
120
 
121
- # Step 4: Review and Download
122
- elif st.session_state["step"] == 4:
123
- st.write("Parsing successful! Below is the extracted data:")
124
- st.json(st.session_state["parsed_output"])
125
 
126
- if st.button("Download as JSON"):
127
- st.download_button(
128
- label="Download JSON",
129
- data=st.session_state["parsed_output"],
130
- file_name="parsed_output.json",
131
- mime="application/json"
132
- )
133
-
134
- if st.button("Start Over"):
135
- st.session_state["step"] = 1
136
- st.session_state["selected_option"] = None
137
- st.session_state["encoded_pdf"] = None
138
- st.session_state["parsed_output"] = None
 
 
 
 
 
 
 
139
 
140
  if __name__ == "__main__":
141
  main()
 
 
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
 
4
+ # Load GPT-Neo model and tokenizer from Hugging Face
5
+ @st.cache_resource
6
+ def load_model():
7
+ tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
8
+ model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
9
+ return tokenizer, model
10
 
11
+ # Helper function to generate text
12
+ def generate_response(prompt, tokenizer, model):
13
+ inputs = tokenizer(prompt, return_tensors="pt")
14
+ outputs = model.generate(**inputs, max_new_tokens=500)
15
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Streamlit app
18
  def main():
19
+ st.title("PMP Auto-PO Generator (Free GPT-Neo Model)")
20
 
21
  # Step 1: Welcome and Option Selection
22
+ st.write("Welcome! Please choose an option:")
23
+ options = ["Toshiba", "BHEL", "Federal Electric", "AL NISF", "Others"]
24
+ selected_option = st.selectbox("Select an option:", options)
 
 
25
 
26
+ if not selected_option:
27
+ st.warning("Please select an option to proceed.")
28
+ return
 
 
 
29
 
30
  # Step 2: File Upload
31
+ uploaded_file = st.file_uploader("Upload your PO file (PDF format only):", type=["pdf"])
32
+ if not uploaded_file:
33
+ st.warning("Please upload a PDF file to proceed.")
34
+ return
35
+
36
+ # Step 3: Instructions for Parsing
37
+ instructions = st.text_area(
38
+ "Provide custom parsing instructions (based on your selection):",
39
+ placeholder="E.g., Extract columns like Pos., Item Code, Unit, etc."
40
+ )
41
+ if not instructions:
42
+ st.warning("Please provide parsing instructions to proceed.")
43
+ return
44
 
45
+ # Combine all inputs for the model prompt
46
+ prompt = f"""
47
+ Parse the following Purchase Order (PO) data based on the instructions provided.
 
48
 
49
+ Selected Option: {selected_option}
50
+ Instructions: {instructions}
 
 
 
 
 
 
 
 
51
 
52
+ PDF Content (Simulated for demo):
53
+ {uploaded_file.name} is the uploaded PDF. Extract the required details accordingly.
54
+ """
 
55
 
56
+ # Load model and tokenizer
57
+ st.write("Loading model...")
58
+ tokenizer, model = load_model()
59
+
60
+ # Generate response
61
+ st.write("Generating response...")
62
+ response = generate_response(prompt, tokenizer, model)
63
+
64
+ # Display results
65
+ st.write("Parsed Output:")
66
+ st.text_area("GPT-Neo Response", value=response, height=300)
67
+
68
+ # Download as JSON
69
+ if st.button("Download Response as JSON"):
70
+ st.download_button(
71
+ label="Download JSON",
72
+ data=response,
73
+ file_name="parsed_output.json",
74
+ mime="application/json"
75
+ )
76
 
77
  if __name__ == "__main__":
78
  main()