Rsnarsna commited on
Commit
881fe15
·
verified ·
1 Parent(s): 655cd42

Upload 9 files

Browse files
Files changed (9) hide show
  1. DB_tableCreationQuery.txt +20 -0
  2. app.py +57 -0
  3. config.py +25 -0
  4. email_reader.py +73 -0
  5. llm_model.py +30 -0
  6. main.py +24 -0
  7. prompt_response_analyser.py +78 -0
  8. requirements.txt +8 -0
  9. utils.py +172 -0
DB_tableCreationQuery.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # this is table creation - db query !
3
+ """
4
+ CREATE TABLE `shipment_details` (
5
+ `id` bigint(20) UNSIGNED NOT NULL,
6
+ `origin` varchar(255) DEFAULT NULL,
7
+ `destination` varchar(255) DEFAULT NULL,
8
+ `expected_shipment_datetime` text DEFAULT NULL,
9
+ `types_of_service` varchar(255) DEFAULT NULL,
10
+ `warehouse` varchar(255) DEFAULT NULL,
11
+ `description` text DEFAULT NULL,
12
+ `quantities` text DEFAULT NULL,
13
+ `carrier_details` varchar(255) DEFAULT NULL,
14
+ `sender` varchar(255) DEFAULT NULL,
15
+ `receiver` varchar(255) DEFAULT NULL,
16
+ `cc` varchar(255) DEFAULT NULL,
17
+ `bcc` varchar(255) DEFAULT NULL,
18
+ `subject` varchar(255) DEFAULT NULL
19
+ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
20
+ """
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st # type: ignore
2
+ import sys
3
+ from io import StringIO
4
+ from main import start, stop
5
+
6
+ # Function to capture print statements into the log box
7
+ class StreamToText:
8
+ def __init__(self):
9
+ self.log = ""
10
+
11
+ def write(self, text):
12
+ self.log += text
13
+ st.text_area("Logs", value=self.log, height=200)
14
+
15
+ def flush(self):
16
+ pass
17
+
18
+ # Create instance of custom logger
19
+ logger = StreamToText()
20
+
21
+ # Assign stdout to our custom logger
22
+ sys.stdout = logger
23
+
24
+ # Global variable to control the running state
25
+ running = False
26
+
27
+ # Function to start the email reading process
28
+ def start_llm():
29
+ global running
30
+ if not running:
31
+ running = True
32
+ print("Starting the process...")
33
+ start()
34
+ else:
35
+ print("The process is already running.")
36
+
37
+ # Function to stop the email reading process
38
+ def stop_llm():
39
+ global running
40
+ if running:
41
+ print("Stopping the process...")
42
+ stop() # Call stop function from main.py
43
+ running = False
44
+ else:
45
+ print("The process is not running.")
46
+
47
+ # Streamlit UI
48
+ st.title("Streamlit LLM App")
49
+
50
+ # Display buttons for starting and stopping the core function
51
+ if st.button("Start"):
52
+ start_llm()
53
+
54
+ if st.button("Stop"):
55
+ stop_llm()
56
+
57
+ # Log box will continuously update with print outputs
config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # constants.py
2
+
3
+ IMAP_SERVER = 'imap.gmail.com'
4
+ EMAIL_ADDRESS = 'narayanansubramani14@gmail.com'
5
+ PASSWORD = 'gclc wsnx kywt uvqy '
6
+
7
+ OPENAI_API_KEY = ''#'sk-proj-AnXUB4dJmJliZETCm67WT3BlbkFJ7dp4DAXRFQ6pXAyK7gWE'
8
+ # IMAP_SERVER = 'imap.gmail.com'
9
+ # EMAIL_ADDRESS = 'redmind.uiautomation@gmail.com'
10
+ # PASSWORD = 'jymzapycraiheubg'
11
+
12
+ # h99d6-103-25-46-162.ngrok-free.app
13
+ DB_CONFIG = {
14
+ 'host': '99d6-103-25-46-162.ngrok-free.app',
15
+ 'user': 'root',
16
+ 'password': '',
17
+ 'database': 'shipment_details'
18
+ }
19
+ #{
20
+ # 'host': '217.21.88.10',
21
+ # 'user': 'u852023448_twitter_bot',
22
+ # 'password': 'Bl0RhmTX6~',
23
+ # 'database': 'u852023448_twitter_bot'
24
+ # }
25
+
email_reader.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # email_reader.py
3
+ from .config import IMAP_SERVER, EMAIL_ADDRESS, PASSWORD, DB_CONFIG
4
+ import email, imaplib, json
5
+ from .utils import get_details, insert_data
6
+
7
+ def read_email():
8
+ try:
9
+ mail = imaplib.IMAP4_SSL(IMAP_SERVER)
10
+ mail.login(EMAIL_ADDRESS, PASSWORD)
11
+ mail.select('inbox')
12
+
13
+ # Search for unread emails
14
+ status, messages = mail.search(None, 'UNSEEN')
15
+ message_ids = messages[0].split()
16
+
17
+ # Process each unread email
18
+ for message_id in message_ids:
19
+ try:
20
+ # Fetch the email
21
+ status, data = mail.fetch(message_id, '(RFC822)')
22
+ raw_email = data[0][1]
23
+ email_message = email.message_from_bytes(raw_email)
24
+
25
+ # Extract email metadata
26
+ sender = email_message['From']
27
+ receiver = email_message['To']
28
+ cc = email_message['Cc']
29
+ bcc = email_message['Bcc']
30
+ subject = email_message['Subject']
31
+ date = email_message['Date']
32
+
33
+ # print('sender : >>',type(sender), sender)
34
+ # Extract the email body
35
+ email_body = ""
36
+ if email_message.is_multipart():
37
+ for part in email_message.walk():
38
+ if part.get_content_type() == 'text/plain':
39
+ email_body = part.get_payload(decode=True).decode('utf-8')
40
+ break
41
+ else:
42
+ email_body = email_message.get_payload(decode=True).decode('utf-8')
43
+
44
+ extracted_details = get_details(email_body)
45
+ # print(type(extracted_details_str))
46
+ # extracted_details = json.loads(extracted_details_str)
47
+ # print(extracted_details)
48
+ print(type(extracted_details))
49
+ # Combine metadata and extracted details
50
+ meta_data = {
51
+ 'sender': sender,
52
+ 'receiver': receiver,
53
+ 'cc': cc,
54
+ 'bcc': bcc,
55
+ 'subject': subject
56
+ }
57
+ # print(type(meta_data))
58
+ extracted_details.update(meta_data)
59
+
60
+ print('full data about email ! ...::',extracted_details)
61
+ insert_data(extracted_details)
62
+ print('email analysed succesfully !\n')
63
+
64
+ except Exception as e:
65
+ print(f"Error processing email ID {message_id}: {e}")
66
+
67
+ # Close the connection
68
+ mail.close()
69
+ mail.logout()
70
+
71
+ except Exception as e:
72
+ print(f"Error reading emails: {e}")
73
+
llm_model.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # !pip install -q transformers torch bitsandbytes accelerate
2
+
3
+ import torch # type: ignore
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
5
+ from .utils import prompt_
6
+
7
+ def load_llm_model():
8
+ try:
9
+ # Check if GPU is available and set the device accordingly
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ print(f"Using device: {device}")
12
+
13
+ # Load the tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True, cache_dir='./phi3_mini')
15
+
16
+ # Load the model in 8-bit precision directly
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ "microsoft/Phi-3-mini-128k-instruct",
19
+ load_in_8bit=True, # Directly use load_in_8bit
20
+ device_map="auto", # Automatically map layers to available device (GPU/CPU)
21
+ trust_remote_code=True,
22
+ cache_dir='./phi3_mini'
23
+ )
24
+
25
+ # Create a pipeline for text generation
26
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
27
+ output = pipe("we are start to analyse email!", max_new_tokens=200)
28
+ print(output[0])
29
+ except Exception as e:
30
+ print(f"Error initializing LLM: {e}")
main.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+
3
+ from .email_reader import read_email
4
+ import time
5
+
6
+ # Global variable to control the running state
7
+ running = False
8
+
9
+ def start():
10
+ global running
11
+ running = True
12
+ while running:
13
+ try:
14
+ print('Started running...')
15
+ read_email()
16
+ except Exception as e:
17
+ print(f"Error in main loop: {e}")
18
+ print('%' * 100)
19
+ time.sleep(10) # Sleep for 10 seconds before the next iteration
20
+
21
+ def stop():
22
+ global running
23
+ running = False
24
+ print("Stopped running.")
prompt_response_analyser.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #Prompt_Analyses.py
3
+
4
+ from .utils import prompt_
5
+
6
+ def extract_info(output):
7
+ text = output[0]['generated_text']
8
+ txt = text[len(prompt_):]
9
+
10
+
11
+ # Find the position of the first '{' character
12
+ start_index = txt.find('{')
13
+
14
+ # Find the position of the corresponding '}' character
15
+ end_index = txt.find('}', start_index)
16
+ ex_data = ''
17
+ # Extract the text between '{' and '}'
18
+ if start_index != -1 and end_index != -1:
19
+ extracted_data = txt[start_index:end_index + 1].strip()
20
+ cleaned_string = extracted_data.strip("{}")
21
+ print(extracted_data)
22
+ else:
23
+ print("Curly braces '{ }' not found in the text.")
24
+ start_index = txt.find('\n\nEx')
25
+ end_index = txt.find('\n\n', start_index)
26
+ if start_index != -1:
27
+ ex_data = txt[start_index:].strip()
28
+ print(ex_data)
29
+ else:
30
+ print("Curly braces '{ }' not found in the text.")
31
+
32
+ # data = ex_data
33
+ d = ex_data[23:]
34
+ info = d.replace('\n', '')
35
+ info.strip()
36
+ cleaned_string = ', '.join(part.strip() for part in info.split(',') if part.strip())
37
+
38
+ # Split the string into key-value pairs using a comma followed by a space
39
+ key_value_pairs = cleaned_string.split(", ")
40
+
41
+ # Create a dictionary to hold the extracted data
42
+ data_dict = {}
43
+
44
+ # Iterate through the key-value pairs and populate the dictionary
45
+ for pair in key_value_pairs:
46
+ # Split the pair into key and value at the first colon
47
+ if ": " in pair:
48
+ key, value = pair.split(": ", 2) # Use maxsplit=1 to handle values with colons
49
+ data_dict[key.strip()] = value.strip() # Strip any extra whitespace
50
+ # Display the resulting dictionary
51
+ # print(data_dict)
52
+ # print(type(data_dict))
53
+
54
+
55
+ # Extract the part starting from "description:" and ending with "quantities:"
56
+ start_key = "description:"
57
+ end_key = "quantities:"
58
+
59
+ # Find the start and end positions
60
+ start_index = cleaned_string.find(start_key) + len(start_key)
61
+ end_index = cleaned_string.find(end_key)
62
+
63
+ # Extract the description by slicing the string
64
+ description = cleaned_string[start_index:end_index].strip()
65
+ data_dict['description'] = description
66
+ # Print the extracted description
67
+
68
+
69
+ # print(data_dict['origin'],'\n',
70
+ # data_dict['destination'],'\n',
71
+ # data_dict['expected_shipment_datetime'],'\n',
72
+ # data_dict['types_of_service'],'\n',
73
+ # data_dict['warehouse'],'\n',
74
+ # data_dict['description'],'\n',
75
+ # data_dict['quantities'],'\n',
76
+ # data_dict['carrier_details'])
77
+ # print(description)
78
+ return data_dict
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ mysql-connector-python
3
+ bitsandbytes
4
+ accelerate
5
+ pytorch
6
+ sentencepiece
7
+ flash-attn
8
+ streamlit
utils.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utils.py
2
+ import mysql.connector # type: ignore
3
+ from .config import DB_CONFIG
4
+ from .llm_model import pipe
5
+ from . import prompt_response_analyser
6
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
7
+
8
+ output_format = {
9
+ "origin": "",
10
+ "destination": "",
11
+ "Expected_shipment_datetime": "",
12
+ "Types of service": "",
13
+ "Warehouse": "",
14
+ "Description": "",
15
+ "Quantities": "",
16
+ "Carrier_details": ""
17
+ }
18
+
19
+ prompt = f"""
20
+ System prompt: You will be provided with an email containing shipment details. Your task is to extract specific information based on the given instructions.
21
+
22
+ Instructions:
23
+ 1. The input email may contain irrelevant information. Focus only on extracting details about future shipments.
24
+ 2. The output should be in JSON format. If a type of information is not found, it should be marked as null.
25
+ 3. Extract the following information:
26
+ - origin: The origin location of the consignment.
27
+ - destination: The destination location of the consignment.
28
+ - expected_shipment_datetime: The expected date and time of delivery to the warehouse (format: yyyy-mm-dd hh:mm:ss).
29
+ - types_of_service: The type of service (AIR, LCL, FCL). AIR can be mentioned as flight, aeroplane, or any mode of air transport. LCL is a Less-Container Load, and FCL is a Full-Container Load.
30
+ - warehouse: The name of the warehouse.
31
+ - description: A brief description of the email (ASN).
32
+ - quantities: The number of items in the shipment.
33
+ - carrier_details: The details of the carrier.
34
+ 4. the output extracted information contains must be in this format:
35
+ {{
36
+ "origin": "",
37
+ "destination": "",
38
+ "expected_shipment_datetime": "",
39
+ "types_of_service": "",
40
+ "warehouse": "",
41
+ "description": "",
42
+ "quantities": "",
43
+ "carrier_details": ""
44
+ }}
45
+ Examples:
46
+
47
+ 1. Email: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024. This consignment includes various electronics, with an estimated quantity of 200 units. The service type for this shipment is AIR, provided by our reliable carrier, Sky Logistics.
48
+ Extracted Information:
49
+ origin: Hamburg,
50
+ destination: New York,
51
+ expected_shipment_datetime: 2024-08-15 00:00:000,
52
+ types_of_service: AIR,
53
+ warehouse: Sky Logistics,
54
+ description: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024.,
55
+ quantities: 200 units,
56
+ carrier_details: Sky Logistics
57
+
58
+ 2. Email: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024. The shipment consists of mixed goods, mainly textiles, with a total of 500 pieces. This delivery will be handled through LCL service by Ocean Freight Co.
59
+ Extracted Information:
60
+ origin: Shanghai,
61
+ destination: Los Angeles,
62
+ expected_shipment_datetime: 2024-07-30 00:00:0000,
63
+ types_of_service: LCL,
64
+ warehouse: Ocean Freight Co.,
65
+ description: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024.,
66
+ quantities: 500 pieces,
67
+ carrier_details: Ocean Freight Co.
68
+
69
+ 3. Email: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024. This batch contains furniture items, totaling 150 pieces. It is managed by Global Carriers.
70
+ Extracted Information:
71
+ origin: Mumbai,
72
+ destination: London,
73
+ expected_shipment_datetime: 2024-08-22 00:00:00000,
74
+ types_of_service: null,
75
+ warehouse: Global Carriers,
76
+ description: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024.,
77
+ quantities: 150 pieces,
78
+ carrier_details: Global Carriers
79
+
80
+ 4. Email: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024. The cargo includes automotive parts, summing up to 350 units. This shipment will be transported via AIR service, operated by Jet Logistics.
81
+ Extracted Information:
82
+ origin: Tokyo,
83
+ destination: Sydney,
84
+ expected_shipment_datetime: 2024-09-10 00:00:0000,
85
+ types_of_service: AIR,
86
+ warehouse: Jet Logistics,
87
+ description: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024.,
88
+ quantities: 350 units,
89
+ carrier_details: Jet Logistics
90
+
91
+ 5. Email: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The shipment encompasses various household goods, with a total quantity of 400 items. We have arranged for this to be shipped using LCL service, provided by Sea Wave Transport.
92
+ Extracted Information:
93
+ origin: Berlin,
94
+ destination: Toronto,
95
+ expected_shipment_datetime: null,
96
+ types_of_service: LCL,
97
+ warehouse: Sea Wave Transport,
98
+ description: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The expected arrival is on August 5, 2024.,
99
+ quantities: 400 items,
100
+ carrier_details: Sea Wave Transport
101
+
102
+ Output: {output_format}
103
+ """
104
+
105
+ def insert_data(extracted_details):
106
+ try:
107
+ print('started !!')
108
+ # Initialize MySQL database connection
109
+ mydb = mysql.connector.connect(**DB_CONFIG)
110
+ cursor = mydb.cursor()
111
+ print('db connecyed ! ''')
112
+ # Check if any of the required fields are empty
113
+ required_fields = [
114
+ 'origin', 'destination', 'expected_shipment_datetime',
115
+ 'types_of_service', 'warehouse', 'description',
116
+ 'quantities', 'carrier_details'
117
+ ]
118
+ if all(extracted_details.get(field) in ["", None] for field in required_fields):
119
+ print("Skipping insertion: All specified extracted values are empty.")
120
+ return
121
+ sql = """
122
+ INSERT INTO shipment_details (origin, destination, expected_shipment_datetime, types_of_service, warehouse, description, quantities, carrier_details, sender, receiver, cc, bcc, subject)
123
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
124
+ """
125
+ # print(data_dict['origin'],'\n',
126
+ # data_dict['destination'],'\n',
127
+ # data_dict['expected_shipment_datetime'],'\n',
128
+ # data_dict['types_of_service'],'\n',
129
+ # data_dict['warehouse'],'\n',
130
+ # data_dict['description'],'\n',
131
+ # data_dict['quantities'],'\n',
132
+ # data_dict['carrier_details'])
133
+ # print(description)
134
+ print('ready to update ! ///')
135
+ val = (
136
+ extracted_details.get('origin'),
137
+ extracted_details.get('destination'),
138
+ extracted_details.get('expected_shipment_datetime'),
139
+ extracted_details.get('types_of_service'),
140
+ extracted_details.get('warehouse'),
141
+ extracted_details.get('description'),
142
+ extracted_details.get('quantities'),
143
+ extracted_details.get('carrier_details'),
144
+ extracted_details.get('sender'),
145
+ extracted_details.get('receiver'),
146
+ extracted_details.get('cc'),
147
+ extracted_details.get('bcc'),
148
+ extracted_details.get('subject')
149
+ )
150
+ cursor.execute(sql, val)
151
+ print('data inserted successfully ! ...')
152
+ mydb.commit()
153
+
154
+ except mysql.connector.Error as e:
155
+ print(f"Database error: {e}")
156
+ except Exception as e:
157
+ print(f"Error inserting data: {e}")
158
+
159
+
160
+
161
+ def get_details(mail):
162
+ # Example usage
163
+ # prompt = "Once upon a time"
164
+ global prompt_
165
+ prompt_ = prompt + mail
166
+ output = pipe(prompt_, max_new_tokens=400)
167
+ print(output[0])
168
+ extracted_info = prompt_response_analyser.extract_info(output)
169
+ print(extracted_info)
170
+
171
+ return extracted_info
172
+