File size: 15,816 Bytes
881fe15
 
 
8fbd491
4cdc302
 
 
 
ad06a1e
4cdc302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752fa69
4cdc302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3a9330
c100f16
 
42ec208
4cdc302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be5f6bf
 
 
 
 
 
 
 
 
 
 
 
 
752fa69
5152346
 
 
c100f16
5152346
 
 
 
 
 
 
 
 
881fe15
 
 
 
 
5152346
752fa69
8fbd491
752fa69
5152346
 
752fa69
5152346
8fbd491
5152346
 
 
 
 
8fbd491
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
import streamlit as st # type: ignore
import sys
from io import StringIO
import email, imaplib, json, asyncio
import torch, time # type: ignore
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # type: ignore
import mysql.connector # type: ignore

#config.py
IMAP_SERVER = 'imap.gmail.com'
EMAIL_ADDRESS = 'narayanansubramani14@gmail.com'
PASSWORD = 'gclc wsnx kywt uvqy '

DB_CONFIG = {
    'host': '99d6-103-25-46-162.ngrok-free.app',
    'user': 'root',
    'password': '',
    'database': 'shipment_details'
    }

def read_email():
    try:
        mail = imaplib.IMAP4_SSL(IMAP_SERVER)
        mail.login(EMAIL_ADDRESS, PASSWORD)
        mail.select('inbox')

        # Search for unread emails
        status, messages = mail.search(None, 'UNSEEN')
        message_ids = messages[0].split()

        # Process each unread email
        for message_id in message_ids:
            try:
                # Fetch the email
                status, data = mail.fetch(message_id, '(RFC822)')
                raw_email = data[0][1]
                email_message = email.message_from_bytes(raw_email)

                # Extract email metadata
                sender = email_message['From']
                receiver = email_message['To']
                cc = email_message['Cc']
                bcc = email_message['Bcc']
                subject = email_message['Subject']
                date = email_message['Date']

                # print('sender : >>',type(sender), sender)
                # Extract the email body
                email_body = ""
                if email_message.is_multipart():
                    for part in email_message.walk():
                        if part.get_content_type() == 'text/plain':
                            email_body = part.get_payload(decode=True).decode('utf-8')
                            break
                else:
                    email_body = email_message.get_payload(decode=True).decode('utf-8')

                extracted_details = get_details(email_body)
                # print(type(extracted_details_str))
                # extracted_details = json.loads(extracted_details_str)
                # print(extracted_details)
                print(type(extracted_details))
                # Combine metadata and extracted details
                meta_data = {
                    'sender': sender,
                    'receiver': receiver,
                    'cc': cc,
                    'bcc': bcc,
                    'subject': subject
                }
                # print(type(meta_data))
                extracted_details.update(meta_data)

                print('full data about email ! ...::',extracted_details)
                insert_data(extracted_details)
                print('email analysed succesfully !\n')

            except Exception as e:
                print(f"Error processing email ID {message_id}: {e}")

        # Close the connection
        mail.close()
        mail.logout()

    except Exception as e:
        print(f"Error reading emails: {e}")

async def load_llm_model():
    try:
        # Check if GPU is available and set the device accordingly
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {device}")

        # Load the tokenizer
        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", trust_remote_code=True, cache_dir='./phi3_mini')

        # Load the model in 8-bit precision directly
        model = AutoModelForCausalLM.from_pretrained(
            "microsoft/Phi-3-mini-128k-instruct",
            load_in_8bit=True,  # Directly use load_in_8bit
            device_map="auto",  # Automatically map layers to available device (GPU/CPU)
            trust_remote_code=True,
            cache_dir='./phi3_mini'
        )
        global pipe
        # Create a pipeline for text generation
        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
        # output = pipe("we are start to analyse email!", max_new_tokens=200)
        # print(output[0])
        await asyncio.sleep(1)
    except Exception as e:
        print(f"Error initializing LLM: {e}")

def extract_info(output):
  text = output[0]['generated_text']
  txt = text[len(prompt_):]


  # Find the position of the first '{' character
  start_index = txt.find('{')

  # Find the position of the corresponding '}' character
  end_index = txt.find('}', start_index)
  ex_data = ''
  # Extract the text between '{' and '}'
  if start_index != -1 and end_index != -1:
    extracted_data = txt[start_index:end_index + 1].strip()
    cleaned_string = extracted_data.strip("{}")
    print(extracted_data)
  else:
    print("Curly braces '{ }' not found in the text.")
    start_index = txt.find('\n\nEx')
    end_index = txt.find('\n\n', start_index)
    if start_index != -1:
      ex_data = txt[start_index:].strip()
      print(ex_data)
    else:
      print("Curly braces '{ }' not found in the text.")

  # data = ex_data
  d = ex_data[23:]
  info = d.replace('\n', '')
  info.strip()
  cleaned_string = ', '.join(part.strip() for part in info.split(',') if part.strip())

  # Split the string into key-value pairs using a comma followed by a space
  key_value_pairs = cleaned_string.split(", ")

  # Create a dictionary to hold the extracted data
  data_dict = {}

  # Iterate through the key-value pairs and populate the dictionary
  for pair in key_value_pairs:
      # Split the pair into key and value at the first colon
      if ": " in pair:
          key, value = pair.split(": ", 2)  # Use maxsplit=1 to handle values with colons
          data_dict[key.strip()] = value.strip()  # Strip any extra whitespace
  # Display the resulting dictionary
  # print(data_dict)
  # print(type(data_dict))


  # Extract the part starting from "description:" and ending with "quantities:"
  start_key = "description:"
  end_key = "quantities:"

  # Find the start and end positions
  start_index = cleaned_string.find(start_key) + len(start_key)
  end_index = cleaned_string.find(end_key)

  # Extract the description by slicing the string
  description = cleaned_string[start_index:end_index].strip()
  data_dict['description'] = description
  # Print the extracted description


  # print(data_dict['origin'],'\n',
  # data_dict['destination'],'\n',
  # data_dict['expected_shipment_datetime'],'\n',
  # data_dict['types_of_service'],'\n',
  # data_dict['warehouse'],'\n',
  # data_dict['description'],'\n',
  # data_dict['quantities'],'\n',
  # data_dict['carrier_details'])
  # print(description)
  return data_dict

output_format = {
    "origin": "",
    "destination": "",
    "Expected_shipment_datetime": "",
    "Types of service": "",
    "Warehouse": "",
    "Description": "",
    "Quantities": "",
    "Carrier_details": ""
  }

prompt = f"""
System prompt: You will be provided with an email containing shipment details. Your task is to extract specific information based on the given instructions.

Instructions:
1. The input email may contain irrelevant information. Focus only on extracting details about future shipments.
2. The output should be in JSON format. If a type of information is not found, it should be marked as null.
3. Extract the following information:
    - origin: The origin location of the consignment.
    - destination: The destination location of the consignment.
    - expected_shipment_datetime: The expected date and time of delivery to the warehouse (format: yyyy-mm-dd hh:mm:ss).
    - types_of_service: The type of service (AIR, LCL, FCL). AIR can be mentioned as flight, aeroplane, or any mode of air transport. LCL is a Less-Container Load, and FCL is a Full-Container Load.
    - warehouse: The name of the warehouse.
    - description: A brief description of the email (ASN).
    - quantities: The number of items in the shipment.
    - carrier_details: The details of the carrier.
4. the output extracted information contains must be in this format:
{{
    "origin": "",
    "destination": "",
    "expected_shipment_datetime": "",
    "types_of_service": "",
    "warehouse": "",
    "description": "",
    "quantities": "",
    "carrier_details": ""
}}
Examples:

1. Email: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024. This consignment includes various electronics, with an estimated quantity of 200 units. The service type for this shipment is AIR, provided by our reliable carrier, Sky Logistics.
   Extracted Information:
        origin: Hamburg,
        destination: New York,
        expected_shipment_datetime: 2024-08-15 00:00:000,
        types_of_service: AIR,
        warehouse: Sky Logistics,
        description: We are pleased to inform you of an upcoming shipment originating from Hamburg and destined for New York. The shipment is expected to arrive on August 15, 2024.,
        quantities: 200 units,
        carrier_details: Sky Logistics

2. Email: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024. The shipment consists of mixed goods, mainly textiles, with a total of 500 pieces. This delivery will be handled through LCL service by Ocean Freight Co.
   Extracted Information:
        origin: Shanghai,
        destination: Los Angeles,
        expected_shipment_datetime: 2024-07-30 00:00:0000,
        types_of_service: LCL,
        warehouse: Ocean Freight Co.,
        description: Please be advised of a shipment from our supplier in Shanghai heading to Los Angeles. The expected date of arrival is July 30, 2024.,
        quantities: 500 pieces,
        carrier_details: Ocean Freight Co.

3. Email: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024. This batch contains furniture items, totaling 150 pieces. It is managed by Global Carriers.
   Extracted Information:
        origin: Mumbai,
        destination: London,
        expected_shipment_datetime: 2024-08-22 00:00:00000,
        types_of_service: null,
        warehouse: Global Carriers,
        description: A new shipment is on its way from Mumbai to London, scheduled to reach by August 22, 2024.,
        quantities: 150 pieces,
        carrier_details: Global Carriers

4. Email: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024. The cargo includes automotive parts, summing up to 350 units. This shipment will be transported via AIR service, operated by Jet Logistics.
   Extracted Information:
        origin: Tokyo,
        destination: Sydney,
        expected_shipment_datetime: 2024-09-10 00:00:0000,
        types_of_service: AIR,
        warehouse: Jet Logistics,
        description: We are notifying you about a shipment dispatched from Tokyo, heading towards Sydney, with an estimated arrival date of September 10, 2024.,
        quantities: 350 units,
        carrier_details: Jet Logistics

5. Email: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The shipment encompasses various household goods, with a total quantity of 400 items. We have arranged for this to be shipped using LCL service, provided by Sea Wave Transport.
   Extracted Information:
        origin: Berlin,
        destination: Toronto,
        expected_shipment_datetime: null,
        types_of_service: LCL,
        warehouse: Sea Wave Transport,
        description: Kindly note the details of a forthcoming shipment from Berlin to Toronto. The expected arrival is on August 5, 2024.,
        quantities: 400 items,
        carrier_details: Sea Wave Transport

Output: {output_format}
"""

def insert_data(extracted_details):
    try:
        print('started !!')
        # Initialize MySQL database connection
        mydb = mysql.connector.connect(**DB_CONFIG)
        cursor = mydb.cursor()
        print('db connecyed ! ''')
        # Check if any of the required fields are empty
        required_fields = [
            'origin', 'destination', 'expected_shipment_datetime',
            'types_of_service', 'warehouse', 'description',
            'quantities', 'carrier_details'
        ]
        if all(extracted_details.get(field) in ["", None] for field in required_fields):
            print("Skipping insertion: All specified extracted values are empty.")
            return
        sql = """
        INSERT INTO shipment_details (origin, destination, expected_shipment_datetime, types_of_service, warehouse, description, quantities, carrier_details, sender, receiver, cc, bcc, subject)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        # print(data_dict['origin'],'\n',
        # data_dict['destination'],'\n',
        # data_dict['expected_shipment_datetime'],'\n',
        # data_dict['types_of_service'],'\n',
        # data_dict['warehouse'],'\n',
        # data_dict['description'],'\n',
        # data_dict['quantities'],'\n',
        # data_dict['carrier_details'])
        # print(description)
        print('ready to update ! ///')
        val = (
            extracted_details.get('origin'),
            extracted_details.get('destination'),
            extracted_details.get('expected_shipment_datetime'),
            extracted_details.get('types_of_service'),
            extracted_details.get('warehouse'),
            extracted_details.get('description'),
            extracted_details.get('quantities'),
            extracted_details.get('carrier_details'),
            extracted_details.get('sender'),
            extracted_details.get('receiver'),
            extracted_details.get('cc'),
            extracted_details.get('bcc'),
            extracted_details.get('subject')
        )
        cursor.execute(sql, val)
        print('data inserted successfully ! ...')
        mydb.commit()

    except mysql.connector.Error as e:
        print(f"Database error: {e}")
    except Exception as e:
        print(f"Error inserting data: {e}")

def get_details(mail):
    # Example usage
    # prompt = "Once upon a time"
    global prompt_
    prompt_ = prompt + mail
    output = pipe(prompt_, max_new_tokens=400)
    print(output[0])
    extracted_info = extract_info(output)
    print(extracted_info)

    return extracted_info

def change_bg_color(color):
    # Change background color using CSS in Streamlit
    st.markdown(
        f"""
        <style>
        .stApp {{
            background-color: {color};
        }}
        </style>
        """,
        unsafe_allow_html=True
    )

async def start(running):
    while running:
        try:
            print('Started running...')
            await read_email()
        except Exception as e:
            print(f"Error in main loop: {e}")
        time.sleep(10)  # Sleep for 10 seconds before the next iteration

def stop():
    global running
    running = False
    print("Stopped running.")

# Streamlit UI
st.title("Streamlit LLM App")

# Display buttons for starting and stopping the core function
if st.button("Start"):
    st.write('Model loading...')  # Display a label when the model is loading
    change_bg_color("lightyellow")  # Change background color to yellow
    asyncio.load_llm_model()  # Load the model
    change_bg_color("lightblue")
    st.success('Model loaded successfully!')
    if st.button('Analyse'):
        change_bg_color('white')
        st.write('analyze started ! ...') # Show success message when model is loaded
        asyncio.start('True')
        change_bg_color("lightgreen")  # Change background color to green after loading

if st.button("Stop"):
    st.write('Process stopped.')  # Display stop message
    change_bg_color("white")  # Reset background color
    asyncio.start('False')