Spaces:
Sleeping
Sleeping
File size: 15,816 Bytes
881fe15 8fbd491 4cdc302 ad06a1e 4cdc302 752fa69 4cdc302 f3a9330 c100f16 42ec208 4cdc302 be5f6bf 752fa69 5152346 c100f16 5152346 881fe15 5152346 752fa69 8fbd491 752fa69 5152346 752fa69 5152346 8fbd491 5152346 8fbd491 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 |
import streamlit as st # type: ignore
import sys
from io import StringIO
import email, imaplib, json, asyncio
import torch, time # type: ignore
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
import mysql.connector # type: ignore
#config.py
IMAP_SERVER = 'imap.gmail.com'
EMAIL_ADDRESS = 'narayanansubramani14@gmail.com'
PASSWORD = 'gclc wsnx kywt uvqy '
DB_CONFIG = {
'host': '99d6-103-25-46-162.ngrok-free.app',
'user': 'root',
'password': '',
'database': 'shipment_details'
}
def read_email():
try:
mail = imaplib.IMAP4_SSL(IMAP_SERVER)
mail.login(EMAIL_ADDRESS, PASSWORD)
mail.select('inbox')
# Search for unread emails
status, messages = mail.search(None, 'UNSEEN')
message_ids = messages[0].split()
# Process each unread email
for message_id in message_ids:
try:
# Fetch the email
status, data = mail.fetch(message_id, '(RFC822)')
raw_email = data[0][1]
email_message = email.message_from_bytes(raw_email)
# Extract email metadata
sender = email_message['From']
receiver = email_message['To']
cc = email_message['Cc']
bcc = email_message['Bcc']
subject = email_message['Subject']
date = email_message['Date']
# print('sender : >>',type(sender), sender)
# Extract the email body
email_body = ""
if email_message.is_multipart():
for part in email_message.walk():
if part.get_content_type() == 'text/plain':
email_body = part.get_payload(decode=True).decode('utf-8')
break
else:
email_body = email_message.get_payload(decode=True).decode('utf-8')
extracted_details = get_details(email_body)
# print(type(extracted_details_str))
# extracted_details = json.loads(extracted_details_str)
# print(extracted_details)
print(type(extracted_details))
# Combine metadata and extracted details
meta_data = {
'sender': sender,
'receiver': receiver,
'cc': cc,
'bcc': bcc,
'subject': subject
}
# print(type(meta_data))
extracted_details.update(meta_data)
print('full data about email ! ...::',extracted_details)
insert_data(extracted_details)
print('email analysed succesfully !\n')
except Exception as e:
print(f"Error processing email ID {message_id}: {e}")
# Close the connection
mail.close()
mail.logout()
except Exception as e:
print(f"Error reading emails: {e}")
async def load_llm_model():
try:
# Check if GPU is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True, cache_dir='./phi3_mini')
# Load the model in 8-bit precision directly
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3-mini-128k-instruct",
load_in_8bit=True, # Directly use load_in_8bit
device_map="auto", # Automatically map layers to available device (GPU/CPU)
trust_remote_code=True,
cache_dir='./phi3_mini'
)
global pipe
# Create a pipeline for text generation
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
# output = pipe("we are start to analyse email!", max_new_tokens=200)
# print(output[0])
await asyncio.sleep(1)
except Exception as e:
print(f"Error initializing LLM: {e}")
def extract_info(output):
text = output[0]['generated_text']
txt = text[len(prompt_):]
# Find the position of the first '{' character
start_index = txt.find('{')
# Find the position of the corresponding '}' character
end_index = txt.find('}', start_index)
ex_data = ''
# Extract the text between '{' and '}'
if start_index != -1 and end_index != -1:
extracted_data = txt[start_index:end_index + 1].strip()
cleaned_string = extracted_data.strip("{}")
print(extracted_data)
else:
print("Curly braces '{ }' not found in the text.")
start_index = txt.find('\n\nEx')
end_index = txt.find('\n\n', start_index)
if start_index != -1:
ex_data = txt[start_index:].strip()
print(ex_data)
else:
print("Curly braces '{ }' not found in the text.")
# data = ex_data
d = ex_data[23:]
info = d.replace('\n', '')
info.strip()
cleaned_string = ', '.join(part.strip() for part in info.split(',') if part.strip())
# Split the string into key-value pairs using a comma followed by a space
key_value_pairs = cleaned_string.split(", ")
# Create a dictionary to hold the extracted data
data_dict = {}
# Iterate through the key-value pairs and populate the dictionary
for pair in key_value_pairs:
# Split the pair into key and value at the first colon
if ": " in pair:
key, value = pair.split(": ", 2) # Use maxsplit=1 to handle values with colons
data_dict[key.strip()] = value.strip() # Strip any extra whitespace
# Display the resulting dictionary
# print(data_dict)
# print(type(data_dict))
# Extract the part starting from "description:" and ending with "quantities:"
start_key = "description:"
end_key = "quantities:"
# Find the start and end positions
start_index = cleaned_string.find(start_key) + len(start_key)
end_index = cleaned_string.find(end_key)
# Extract the description by slicing the string
description = cleaned_string[start_index:end_index].strip()
data_dict['description'] = description
# Print the extracted description
# print(data_dict['origin'],'\n',
# data_dict['destination'],'\n',
# data_dict['expected_shipment_datetime'],'\n',
# data_dict['types_of_service'],'\n',
# data_dict['warehouse'],'\n',
# data_dict['description'],'\n',
# data_dict['quantities'],'\n',
# data_dict['carrier_details'])
# print(description)
return data_dict
output_format = {
"origin": "",
"destination": "",
"Expected_shipment_datetime": "",
"Types of service": "",
"Warehouse": "",
"Description": "",
"Quantities": "",
"Carrier_details": ""
}
prompt = f"""
System prompt: You will be provided with an email containing shipment details. Your task is to extract specific information based on the given instructions.
Instructions:
1. The input email may contain irrelevant information. Focus only on extracting details about future shipments.
2. The output should be in JSON format. If a type of information is not found, it should be marked as null.
3. Extract the following information:
- origin: The origin location of the consignment.
- destination: The destination location of the consignment.
- expected_shipment_datetime: The expected date and time of delivery to the warehouse (format: yyyy-mm-dd hh:mm:ss).
- types_of_service: The type of service (AIR, LCL, FCL). AIR can be mentioned as flight, aeroplane, or any mode of air transport. LCL is a Less-Container Load, and FCL is a Full-Container Load.
- warehouse: The name of the warehouse.
- description: A brief description of the email (ASN).
- quantities: The number of items in the shipment.
- carrier_details: The details of the carrier.
4. the output extracted information contains must be in this format:
{{
"origin": "",
"destination": "",
"expected_shipment_datetime": "",
"types_of_service": "",
"warehouse": "",
"description": "",
"quantities": "",
"carrier_details": ""
}}
Examples:
1. Email: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024. This consignment includes various electronics, with an estimated quantity of 200 units. The service type for this shipment is AIR, provided by our reliable carrier, Sky Logistics.
Extracted Information:
origin: Hamburg,
destination: New York,
expected_shipment_datetime: 2024-08-15 00:00:000,
types_of_service: AIR,
warehouse: Sky Logistics,
description: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024.,
quantities: 200 units,
carrier_details: Sky Logistics
2. Email: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024. The shipment consists of mixed goods, mainly textiles, with a total of 500 pieces. This delivery will be handled through LCL service by Ocean Freight Co.
Extracted Information:
origin: Shanghai,
destination: Los Angeles,
expected_shipment_datetime: 2024-07-30 00:00:0000,
types_of_service: LCL,
warehouse: Ocean Freight Co.,
description: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024.,
quantities: 500 pieces,
carrier_details: Ocean Freight Co.
3. Email: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024. This batch contains furniture items, totaling 150 pieces. It is managed by Global Carriers.
Extracted Information:
origin: Mumbai,
destination: London,
expected_shipment_datetime: 2024-08-22 00:00:00000,
types_of_service: null,
warehouse: Global Carriers,
description: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024.,
quantities: 150 pieces,
carrier_details: Global Carriers
4. Email: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024. The cargo includes automotive parts, summing up to 350 units. This shipment will be transported via AIR service, operated by Jet Logistics.
Extracted Information:
origin: Tokyo,
destination: Sydney,
expected_shipment_datetime: 2024-09-10 00:00:0000,
types_of_service: AIR,
warehouse: Jet Logistics,
description: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024.,
quantities: 350 units,
carrier_details: Jet Logistics
5. Email: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The shipment encompasses various household goods, with a total quantity of 400 items. We have arranged for this to be shipped using LCL service, provided by Sea Wave Transport.
Extracted Information:
origin: Berlin,
destination: Toronto,
expected_shipment_datetime: null,
types_of_service: LCL,
warehouse: Sea Wave Transport,
description: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The expected arrival is on August 5, 2024.,
quantities: 400 items,
carrier_details: Sea Wave Transport
Output: {output_format}
"""
def insert_data(extracted_details):
try:
print('started !!')
# Initialize MySQL database connection
mydb = mysql.connector.connect(**DB_CONFIG)
cursor = mydb.cursor()
print('db connecyed ! ''')
# Check if any of the required fields are empty
required_fields = [
'origin', 'destination', 'expected_shipment_datetime',
'types_of_service', 'warehouse', 'description',
'quantities', 'carrier_details'
]
if all(extracted_details.get(field) in ["", None] for field in required_fields):
print("Skipping insertion: All specified extracted values are empty.")
return
sql = """
INSERT INTO shipment_details (origin, destination, expected_shipment_datetime, types_of_service, warehouse, description, quantities, carrier_details, sender, receiver, cc, bcc, subject)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
# print(data_dict['origin'],'\n',
# data_dict['destination'],'\n',
# data_dict['expected_shipment_datetime'],'\n',
# data_dict['types_of_service'],'\n',
# data_dict['warehouse'],'\n',
# data_dict['description'],'\n',
# data_dict['quantities'],'\n',
# data_dict['carrier_details'])
# print(description)
print('ready to update ! ///')
val = (
extracted_details.get('origin'),
extracted_details.get('destination'),
extracted_details.get('expected_shipment_datetime'),
extracted_details.get('types_of_service'),
extracted_details.get('warehouse'),
extracted_details.get('description'),
extracted_details.get('quantities'),
extracted_details.get('carrier_details'),
extracted_details.get('sender'),
extracted_details.get('receiver'),
extracted_details.get('cc'),
extracted_details.get('bcc'),
extracted_details.get('subject')
)
cursor.execute(sql, val)
print('data inserted successfully ! ...')
mydb.commit()
except mysql.connector.Error as e:
print(f"Database error: {e}")
except Exception as e:
print(f"Error inserting data: {e}")
def get_details(mail):
# Example usage
# prompt = "Once upon a time"
global prompt_
prompt_ = prompt + mail
output = pipe(prompt_, max_new_tokens=400)
print(output[0])
extracted_info = extract_info(output)
print(extracted_info)
return extracted_info
def change_bg_color(color):
# Change background color using CSS in Streamlit
st.markdown(
f"""
<style>
.stApp {{
background-color: {color};
}}
</style>
""",
unsafe_allow_html=True
)
async def start(running):
while running:
try:
print('Started running...')
await read_email()
except Exception as e:
print(f"Error in main loop: {e}")
time.sleep(10) # Sleep for 10 seconds before the next iteration
def stop():
global running
running = False
print("Stopped running.")
# Streamlit UI
st.title("Streamlit LLM App")
# Display buttons for starting and stopping the core function
if st.button("Start"):
st.write('Model loading...') # Display a label when the model is loading
change_bg_color("lightyellow") # Change background color to yellow
asyncio.load_llm_model() # Load the model
change_bg_color("lightblue")
st.success('Model loaded successfully!')
if st.button('Analyse'):
change_bg_color('white')
st.write('analyze started ! ...') # Show success message when model is loaded
asyncio.start('True')
change_bg_color("lightgreen") # Change background color to green after loading
if st.button("Stop"):
st.write('Process stopped.') # Display stop message
change_bg_color("white") # Reset background color
asyncio.start('False')
|