Upload folder using huggingface_hub
Browse files- __pycache__/classification.cpython-38.pyc +0 -0
- app.py +28 -0
- checkpoint-770.zip +3 -0
- checkpoint-770/config.json +48 -0
- checkpoint-770/merges.txt +0 -0
- checkpoint-770/model.safetensors +3 -0
- checkpoint-770/optimizer.pt +3 -0
- checkpoint-770/rng_state.pth +3 -0
- checkpoint-770/scheduler.pt +3 -0
- checkpoint-770/special_tokens_map.json +51 -0
- checkpoint-770/tokenizer.json +0 -0
- checkpoint-770/tokenizer_config.json +58 -0
- checkpoint-770/trainer_state.json +0 -0
- checkpoint-770/training_args.bin +3 -0
- checkpoint-770/vocab.json +0 -0
- classification.py +96 -0
- prompt.py +52 -0
- requirements.txt +9 -0
__pycache__/classification.cpython-38.pyc
ADDED
|
Binary file (5.49 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import classification
|
| 4 |
+
|
| 5 |
+
app = Flask(__name__)
|
| 6 |
+
CORS(app)
|
| 7 |
+
|
| 8 |
+
@app.route('/api/classify', methods=['POST'])
|
| 9 |
+
def classify_email():
|
| 10 |
+
data = request.json
|
| 11 |
+
|
| 12 |
+
if not data or ('subject' not in data and 'body' not in data):
|
| 13 |
+
return jsonify({'error': 'Missing email content'}), 400
|
| 14 |
+
|
| 15 |
+
subject = data.get('subject', '')
|
| 16 |
+
body = data.get('body', '')
|
| 17 |
+
|
| 18 |
+
email = f"""
|
| 19 |
+
Subject: {subject}
|
| 20 |
+
Body: {body}
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
result = classification.classify(email)
|
| 24 |
+
|
| 25 |
+
return result
|
| 26 |
+
|
| 27 |
+
if __name__ == '__main__':
|
| 28 |
+
app.run(debug=True, port=5007)
|
checkpoint-770.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:440847c43b6723ad6dec74aa701f01bdd3783d440233b69f99b10aaf1565b05d
|
| 3 |
+
size 617581195
|
checkpoint-770/config.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"RobertaForSequenceClassification"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"eos_token_id": 2,
|
| 9 |
+
"gradient_checkpointing": false,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"id2label": {
|
| 14 |
+
"0": "Intent_Amendment_Abstraction",
|
| 15 |
+
"1": "Intent_Clause_Protect",
|
| 16 |
+
"2": "Intent_Company_research",
|
| 17 |
+
"3": "Intent_Comparison_LOI_Lease",
|
| 18 |
+
"4": "Intent_Lease_Abstraction",
|
| 19 |
+
"5": "Intent_Lease_Listings_Comparison",
|
| 20 |
+
"6": "Intent_Sales_Listings_Comparison",
|
| 21 |
+
"7": "Intent_Transaction_Date_navigator"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"intermediate_size": 3072,
|
| 25 |
+
"label2id": {
|
| 26 |
+
"Intent_Amendment_Abstraction": 0,
|
| 27 |
+
"Intent_Clause_Protect": 1,
|
| 28 |
+
"Intent_Company_research": 2,
|
| 29 |
+
"Intent_Comparison_LOI_Lease": 3,
|
| 30 |
+
"Intent_Lease_Abstraction": 4,
|
| 31 |
+
"Intent_Lease_Listings_Comparison": 5,
|
| 32 |
+
"Intent_Sales_Listings_Comparison": 6,
|
| 33 |
+
"Intent_Transaction_Date_navigator": 7
|
| 34 |
+
},
|
| 35 |
+
"layer_norm_eps": 1e-05,
|
| 36 |
+
"max_position_embeddings": 514,
|
| 37 |
+
"model_type": "roberta",
|
| 38 |
+
"num_attention_heads": 12,
|
| 39 |
+
"num_hidden_layers": 12,
|
| 40 |
+
"pad_token_id": 1,
|
| 41 |
+
"position_embedding_type": "absolute",
|
| 42 |
+
"problem_type": "single_label_classification",
|
| 43 |
+
"torch_dtype": "float32",
|
| 44 |
+
"transformers_version": "4.51.3",
|
| 45 |
+
"type_vocab_size": 1,
|
| 46 |
+
"use_cache": true,
|
| 47 |
+
"vocab_size": 50265
|
| 48 |
+
}
|
checkpoint-770/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-770/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d157cae8ab980d89e5d426c84adeeed7a7c7aa988652ee0db903f0e554d5c96
|
| 3 |
+
size 498631280
|
checkpoint-770/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c2d013e8a08903506b83e98e7795cebb6ed24e8d8cfa6be6ab33da27d900b37
|
| 3 |
+
size 254292026
|
checkpoint-770/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5c63f61290f5421d6c6c8d31fafa5315c374c21561b10e72403200f87627ab2
|
| 3 |
+
size 14244
|
checkpoint-770/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b911e9d190a52c2fdaf1fc288a99472d1155b896916224bae1f6948903948c82
|
| 3 |
+
size 1064
|
checkpoint-770/special_tokens_map.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"cls_token": {
|
| 10 |
+
"content": "<s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"eos_token": {
|
| 17 |
+
"content": "</s>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": true,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"mask_token": {
|
| 24 |
+
"content": "<mask>",
|
| 25 |
+
"lstrip": true,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"pad_token": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": true,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
},
|
| 37 |
+
"sep_token": {
|
| 38 |
+
"content": "</s>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": true,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false
|
| 43 |
+
},
|
| 44 |
+
"unk_token": {
|
| 45 |
+
"content": "<unk>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": true,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false
|
| 50 |
+
}
|
| 51 |
+
}
|
checkpoint-770/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-770/tokenizer_config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<s>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
+
"content": "<pad>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"2": {
|
| 21 |
+
"content": "</s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": true,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"3": {
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"50264": {
|
| 37 |
+
"content": "<mask>",
|
| 38 |
+
"lstrip": true,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": false,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"errors": "replace",
|
| 50 |
+
"extra_special_tokens": {},
|
| 51 |
+
"mask_token": "<mask>",
|
| 52 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 53 |
+
"pad_token": "<pad>",
|
| 54 |
+
"sep_token": "</s>",
|
| 55 |
+
"tokenizer_class": "RobertaTokenizer",
|
| 56 |
+
"trim_offsets": true,
|
| 57 |
+
"unk_token": "<unk>"
|
| 58 |
+
}
|
checkpoint-770/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-770/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c7a683e8060061a252378604e6aed2dc6535a81e8f3f319e777d656331eab38
|
| 3 |
+
size 5240
|
checkpoint-770/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
classification.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from scipy.special import softmax
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 4 |
+
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained("/home/nobroker/Downloads/emailclassification/backend/checkpoint-770")
|
| 6 |
+
model = AutoModelForSequenceClassification.from_pretrained("/home/nobroker/Downloads/emailclassification/backend/checkpoint-770")
|
| 7 |
+
|
| 8 |
+
def classify(email) :
|
| 9 |
+
|
| 10 |
+
promptForIntentclassification = """
|
| 11 |
+
You are an AI who is an expert email analyzer and classification system specializing in real estate and legal documentation.
|
| 12 |
+
Your task is to accurately classify incoming emails into one of 8 predefined business intents. You must also identify emails that
|
| 13 |
+
contain mixed intents or fall outside these 8 categories.
|
| 14 |
+
|
| 15 |
+
**Here are the 8 predefined business intents:**
|
| 16 |
+
|
| 17 |
+
0. **Intent_Amendment_Abstraction**: Emails requesting the extraction of new terms or highlighting changes introduced by a lease amendment compared to the original lease.
|
| 18 |
+
1. **Intent_Clause_Protect**: Emails requesting a review of lease clauses to detect potentially risky, missing, or unfavorable terms (e.g., subletting rights, break clauses, indemnity, assignment terms, unreasonable liabilities, compliance issues).
|
| 19 |
+
2. **Intent_Company_research**: Emails seeking background information or due diligence on a company involved in a transaction (e.g., credibility, litigation history, public disputes, bankruptcies, financial health, track record).
|
| 20 |
+
3. **Intent_Comparison_LOI_Lease**: Emails asking to compare a Letter of Intent (LOI) with a final lease agreement to identify discrepancies, changes, or deviations in terms (e.g., TI allowances, common area maintenance, termination clauses).
|
| 21 |
+
4. **Intent_Lease_Abstraction**: Emails requesting the extraction of key lease metadata and clauses (e.g., rent, term, landlord, tenant, renewal options, escalation schedules, important dates, responsibilities).
|
| 22 |
+
5. **Intent_Lease_Listings_Comparison**: Emails asking to compare multiple lease listing summaries for properties, focusing on identifying the best terms, overlaps, gaps, per square foot pricing, and tenant-friendly clauses.
|
| 23 |
+
6. **Intent_Sales_Listings_Comparison**: Emails asking to compare multiple sales listing summaries for properties, focusing on metrics like pricing, square footage, capitalization rate (cap rate), and average price per square foot (PSF).
|
| 24 |
+
7. **Intent_Transaction_Date_navigator**: Emails focused on extracting, scheduling, or managing transaction-related dates (e.g., escrow, closing, notice periods, possession dates, due diligence deadlines, funding deadlines, inspection dates).
|
| 25 |
+
|
| 26 |
+
**If an email clearly contains elements of more than one of the above intents, or if its primary intent does not fit any of the 8 categories, classify it as "Intent_Mixed_Other".** This "Intent_Mixed_Other" category is crucial for handling complex or out-of-scope requests.
|
| 27 |
+
|
| 28 |
+
**Output Format:**
|
| 29 |
+
|
| 30 |
+
For each email, provide *only* the most appropriate intent label. Do not include any additional text or explanation strictly.
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
Here are some **Example Emails:**
|
| 34 |
+
|
| 35 |
+
**Email 1:**
|
| 36 |
+
Subject: Lease Summary for 123 Main St
|
| 37 |
+
Body: Hi team, please summarize the key terms of the lease for the 123 Main St property. I need to know the base rent, commencement and expiry dates, renewal options, and escalation schedule. Thanks!
|
| 38 |
+
|
| 39 |
+
**Classification 1:** Intent_Lease_Abstraction
|
| 40 |
+
|
| 41 |
+
**Email 2:**
|
| 42 |
+
Subject: LOI vs. Lease Discrepancies - 789 Oak Ave
|
| 43 |
+
Body: Hey, I need help comparing the LOI we submitted for 789 Oak Ave with the final lease. Can you identify any deviations, especially around TI allowances and common area maintenance? Appreciate your help.
|
| 44 |
+
|
| 45 |
+
**Classification 2:** Intent_Comparison_LOI_Lease
|
| 46 |
+
|
| 47 |
+
**Email 3:**
|
| 48 |
+
Subject: Review for Risky Clauses - New Lease for 456 Elm Rd
|
| 49 |
+
Body: Could you please review the new lease for 456 Elm Rd and detect any potentially risky or missing lease clauses, such as those related to subletting rights or indemnity? Best regards.
|
| 50 |
+
|
| 51 |
+
**Classification 3:** Intent_Clause_Protect
|
| 52 |
+
|
| 53 |
+
**Email 4:**
|
| 54 |
+
Subject: Background Check on Global Holdings Inc.
|
| 55 |
+
Body: Urgent: Can you do a background check on Global Holdings Inc. before we proceed? I’m particularly interested in any litigation history or bankruptcies in the past 5 years. Cheers.
|
| 56 |
+
|
| 57 |
+
**Classification 4:** Intent_Company_research
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
Here is the email {email}. Now classify this keeping all points in view. Do not hallaucinate.
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
id2label = {
|
| 64 |
+
0: "Intent_Amendment_Abstraction",
|
| 65 |
+
1: "Intent_Clause_Protect",
|
| 66 |
+
2: "Intent_Company_research",
|
| 67 |
+
3: "Intent_Comparison_LOI_Lease",
|
| 68 |
+
4: "Intent_Lease_Abstraction",
|
| 69 |
+
5: "Intent_Lease_Listings_Comparison",
|
| 70 |
+
6: "Intent_Sales_Listings_Comparison",
|
| 71 |
+
7: "Intent_Transaction_Date_navigator"
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
encoded_input = tokenizer(email, return_tensors='pt', truncation=True, padding=True, max_length = 256)
|
| 75 |
+
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
output = model(**encoded_input)
|
| 78 |
+
|
| 79 |
+
scores = output.logits[0].detach().numpy()
|
| 80 |
+
print(scores)
|
| 81 |
+
probs = softmax(scores)
|
| 82 |
+
print(probs)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
predicted_class_id = probs.argmax()
|
| 86 |
+
print("here \n")
|
| 87 |
+
print(predicted_class_id)
|
| 88 |
+
|
| 89 |
+
if predicted_class_id >=0 and predicted_class_id <=7 :
|
| 90 |
+
print(predicted_class_id)
|
| 91 |
+
predicted_label = id2label[predicted_class_id]
|
| 92 |
+
print(predicted_label)
|
| 93 |
+
else :
|
| 94 |
+
predicted_label = "Intent_Mixed_Other"
|
| 95 |
+
|
| 96 |
+
return f"\n🧠 Predicted Category: {predicted_label}"
|
prompt.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
promptForIntentclassification = """
|
| 2 |
+
You are an AI who is an expert email analyzer and classification system specializing in real estate and legal documentation.
|
| 3 |
+
Your task is to accurately classify incoming emails into one of 8 predefined business intents. You must also identify emails that
|
| 4 |
+
contain mixed intents or fall outside these 8 categories.
|
| 5 |
+
|
| 6 |
+
**Here are the 8 predefined business intents:**
|
| 7 |
+
|
| 8 |
+
1. **Intent_Lease_Abstraction**: Emails requesting the extraction of key lease metadata and clauses (e.g., rent, term, landlord, tenant, renewal options, escalation schedules, important dates, responsibilities).
|
| 9 |
+
2. **Intent_Comparison_LOI_Lease**: Emails asking to compare a Letter of Intent (LOI) with a final lease agreement to identify discrepancies, changes, or deviations in terms (e.g., TI allowances, common area maintenance, termination clauses).
|
| 10 |
+
3. **Intent_Clause_Protect**: Emails requesting a review of lease clauses to detect potentially risky, missing, or unfavorable terms (e.g., subletting rights, break clauses, indemnity, assignment terms, unreasonable liabilities, compliance issues).
|
| 11 |
+
4. **Intent_Company_research**: Emails seeking background information or due diligence on a company involved in a transaction (e.g., credibility, litigation history, public disputes, bankruptcies, financial health, track record).
|
| 12 |
+
5. **Intent_Transaction_Date_navigator**: Emails focused on extracting, scheduling, or managing transaction-related dates (e.g., escrow, closing, notice periods, possession dates, due diligence deadlines, funding deadlines, inspection dates).
|
| 13 |
+
6. **Intent_Amendment_Abstraction**: Emails requesting the extraction of new terms or highlighting changes introduced by a lease amendment compared to the original lease.
|
| 14 |
+
7. **Intent_Sales_Listings_Comparison**: Emails asking to compare multiple sales listing summaries for properties, focusing on metrics like pricing, square footage, capitalization rate (cap rate), and average price per square foot (PSF).
|
| 15 |
+
8. **Intent_Lease_Listings_Comparison**: Emails asking to compare multiple lease listing summaries for properties, focusing on identifying the best terms, overlaps, gaps, per square foot pricing, and tenant-friendly clauses.
|
| 16 |
+
|
| 17 |
+
**If an email clearly contains elements of more than one of the above intents, or if its primary intent does not fit any of the 8 categories, classify it as "Intent_Mixed_Other".** This "Intent_Mixed_Other" category is crucial for handling complex or out-of-scope requests.
|
| 18 |
+
|
| 19 |
+
**Output Format:**
|
| 20 |
+
|
| 21 |
+
For each email, provide *only* the most appropriate intent label. Do not include any additional text or explanation strictly.
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
Here are some **Example Emails:**
|
| 25 |
+
|
| 26 |
+
**Email 1:**
|
| 27 |
+
Subject: Lease Summary for 123 Main St
|
| 28 |
+
Body: Hi team, please summarize the key terms of the lease for the 123 Main St property. I need to know the base rent, commencement and expiry dates, renewal options, and escalation schedule. Thanks!
|
| 29 |
+
|
| 30 |
+
**Classification 1:** Intent_Lease_Abstraction
|
| 31 |
+
|
| 32 |
+
**Email 2:**
|
| 33 |
+
Subject: LOI vs. Lease Discrepancies - 789 Oak Ave
|
| 34 |
+
Body: Hey, I need help comparing the LOI we submitted for 789 Oak Ave with the final lease. Can you identify any deviations, especially around TI allowances and common area maintenance? Appreciate your help.
|
| 35 |
+
|
| 36 |
+
**Classification 2:** Intent_Comparison_LOI_Lease
|
| 37 |
+
|
| 38 |
+
**Email 3:**
|
| 39 |
+
Subject: Review for Risky Clauses - New Lease for 456 Elm Rd
|
| 40 |
+
Body: Could you please review the new lease for 456 Elm Rd and detect any potentially risky or missing lease clauses, such as those related to subletting rights or indemnity? Best regards.
|
| 41 |
+
|
| 42 |
+
**Classification 3:** Intent_Clause_Protect
|
| 43 |
+
|
| 44 |
+
**Email 4:**
|
| 45 |
+
Subject: Background Check on Global Holdings Inc.
|
| 46 |
+
Body: Urgent: Can you do a background check on Global Holdings Inc. before we proceed? I’m particularly interested in any litigation history or bankruptcies in the past 5 years. Cheers.
|
| 47 |
+
|
| 48 |
+
**Classification 4:** Intent_Company_research
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
Here is the email {email}. Now classify this keeping all points in view. Do not hallaucinate.
|
| 52 |
+
"""
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
flask>=2.2
|
| 2 |
+
flask-cors
|
| 3 |
+
werkzeug>=2.2
|
| 4 |
+
transformers
|
| 5 |
+
torch
|
| 6 |
+
scipy
|
| 7 |
+
datasets
|
| 8 |
+
scikit-learn
|
| 9 |
+
pandas
|