pavansuresh commited on
Commit
3b7cab4
·
verified ·
1 Parent(s): a00fb61

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +87 -83
utils.py CHANGED
@@ -1,105 +1,109 @@
1
  import requests
2
  import base64
 
3
  import os
 
 
 
 
4
  from dotenv import load_dotenv
5
 
6
- # Load environment variables from .env file
7
  load_dotenv()
8
 
9
- # Retrieve credentials from environment variables
10
- USERNAME = os.getenv("SF_USERNAME")
11
- PASSWORD = os.getenv("SF_PASSWORD")
12
- SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
13
- CLIENT_ID = os.getenv("SF_CLIENT_ID")
14
- CLIENT_SECRET = os.getenv("SF_CLIENT_SECRET")
15
- DOMAIN = os.getenv("SF_DOMAIN", "test") # Default to sandbox; use "login" for production
16
-
17
- # Validate environment variables
18
- if not all([USERNAME, PASSWORD, SECURITY_TOKEN, CLIENT_ID, CLIENT_SECRET]):
19
- raise ValueError("❌ Missing required environment variables. Ensure SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_CLIENT_ID, and SF_CLIENT_SECRET are set.")
 
 
 
20
 
21
- LOGIN_URL = f"https://{DOMAIN}.salesforce.com"
 
 
 
 
 
 
22
 
23
- def get_token():
24
- url = f"{LOGIN_URL}/services/oauth2/token"
25
- payload = {
26
- 'grant_type': 'password',
27
- 'client_id': CLIENT_ID,
28
- 'client_secret': CLIENT_SECRET,
29
- 'username': USERNAME,
30
- 'password': PASSWORD + SECURITY_TOKEN
31
- }
32
 
 
 
33
  try:
34
- response = requests.post(url, data=payload)
35
- print(f"Response Status: {response.status_code}")
36
- print(f"Response Body: {response.text}")
37
- result = response.json()
 
38
 
39
- if response.status_code == 200 and 'access_token' in result:
40
- return result['access_token'], result['instance_url']
41
- else:
42
- print(f"Login failed with response: {result}")
43
- raise Exception(f"❌ Salesforce login failed: {result}")
44
- except requests.RequestException as e:
45
- raise Exception(f"🚨 Network error during Salesforce login: {str(e)}")
 
 
 
 
 
 
 
 
46
 
47
- def get_salesforce_objects(token, instance_url):
48
- headers = {'Authorization': f'Bearer {token}'}
49
- response = requests.get(f"{instance_url}/services/data/v56.0/sobjects", headers=headers)
50
- return response.json().get('sobjects', [])
 
 
 
 
 
 
51
 
52
- def create_record(object_name, data, token, instance_url):
53
- headers = {
54
- 'Authorization': f'Bearer {token}',
55
- 'Content-Type': 'application/json'
56
- }
57
- url = f"{instance_url}/services/data/v56.0/sobjects/{object_name}/"
58
- response = requests.post(url, json=data, headers=headers)
59
- return response.json()
60
 
61
- def attach_pdf(record_id, file_path, token, instance_url):
 
62
  try:
63
  with open(file_path, "rb") as f:
64
- body = base64.b64encode(f.read()).decode()
65
-
66
- data = {
67
  "ParentId": record_id,
68
  "Name": os.path.basename(file_path),
69
- "Body": body
70
  }
71
- headers = {
72
- 'Authorization': f'Bearer {token}',
73
- 'Content-Type': 'application/json'
74
- }
75
- url = f"{instance_url}/services/data/v56.0/sobjects/Attachment"
76
- response = requests.post(url, json=data, headers=headers)
77
- return response.json()
78
- except FileNotFoundError:
79
- raise Exception(f"❌ File not found: {file_path}")
80
  except Exception as e:
81
- raise Exception(f"🚨 Error attaching PDF: {str(e)}")
82
 
83
- # Example usage
84
- if __name__ == "__main__":
85
- try:
86
- # Get access token and instance URL
87
- token, instance_url = get_token()
88
- print(f"✅ Successfully authenticated. Token: {token[:10]}..., Instance URL: {instance_url}")
89
-
90
- # Example: List Salesforce objects
91
- objects = get_salesforce_objects(token, instance_url)
92
- print(f"Found {len(objects)} Salesforce objects")
93
-
94
- # Example: Create a record
95
- sample_data = {"Name": "Test Account"}
96
- result = create_record("Account", sample_data, token, instance_url)
97
- print(f"Created record: {result}")
98
-
99
- # Example: Attach a PDF
100
- record_id = result.get("id")
101
- if record_id:
102
- attach_pdf(record_id, "sample.pdf", token, instance_url)
103
- print("✅ PDF attached successfully")
104
- except Exception as e:
105
- print(f"Error: {str(e)}")
 
1
  import requests
2
  import base64
3
+ import json
4
  import os
5
+ from simple_salesforce import Salesforce
6
+ from pdf2image import convert_from_path
7
+ import pytesseract
8
+ from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
9
  from dotenv import load_dotenv
10
 
11
+ # Load environment variables
12
  load_dotenv()
13
 
14
+ # Salesforce Authentication
15
+ def get_salesforce_client():
16
+ try:
17
+ sf = Salesforce(
18
+ username=os.getenv('SALESFORCE_USERNAME'),
19
+ password=os.getenv('SALESFORCE_PASSWORD'),
20
+ security_token=os.getenv('SALESFORCE_SECURITY_TOKEN'),
21
+ instance_url=os.getenv('SALESFORCE_INSTANCE_URL')
22
+ )
23
+ print("Salesforce client connected successfully")
24
+ return sf, None
25
+ except Exception as e:
26
+ print(f"Salesforce connection failed: {str(e)}")
27
+ return None, str(e)
28
 
29
+ # Fetch Salesforce Objects
30
+ def get_salesforce_objects(sf):
31
+ try:
32
+ response = sf.restful('sobjects')
33
+ return [obj['name'] for obj in response['sobjects'] if obj['createable']], None
34
+ except Exception as e:
35
+ return [], str(e)
36
 
37
+ # Fetch Object Fields
38
+ def get_object_fields(sf, object_name):
39
+ try:
40
+ desc = sf.__getattr__(object_name).describe()
41
+ return [field['name'] for field in desc['fields']], None
42
+ except Exception as e:
43
+ return [], str(e)
 
 
44
 
45
+ # OCR for Text Extraction
46
+ def extract_text_from_pdf(pdf_path):
47
  try:
48
+ images = convert_from_path(pdf_path)
49
+ text_data = [pytesseract.image_to_string(img) for img in images]
50
+ return {"pages": text_data}, None
51
+ except Exception as e:
52
+ return {}, str(e)
53
 
54
+ # Key-Value Pair Extraction using LayoutLMv3
55
+ def extract_key_value_pairs(pdf_path):
56
+ try:
57
+ processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
58
+ model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base-finetuned-funsd")
59
+ images = convert_from_path(pdf_path)
60
+ extracted_data = []
61
+ for img in images:
62
+ encoding = processor(img, truncation=True, return_tensors="pt")
63
+ outputs = model(**encoding)
64
+ # Simplified: Return dummy key-value pairs (real implementation needs post-processing)
65
+ extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
66
+ return extracted_data, None
67
+ except Exception as e:
68
+ return [], str(e)
69
 
70
+ # Map Extracted Data to Salesforce Fields
71
+ def map_fields(extracted_data, salesforce_fields):
72
+ mappings = {}
73
+ confidence_scores = {}
74
+ for key in extracted_data[0]["keys"]: # Simplified: Using first page
75
+ for field in salesforce_fields:
76
+ if key.lower() in field.lower():
77
+ mappings[key] = field
78
+ confidence_scores[key] = 0.9 # Dummy confidence score
79
+ return mappings, confidence_scores, None
80
 
81
+ # Create Salesforce Record
82
+ def create_record(sf, object_api_name, data):
83
+ try:
84
+ result = sf.__getattr__(object_api_name).create(data)
85
+ return result['id'], None
86
+ except Exception as e:
87
+ return None, str(e)
 
88
 
89
+ # Attach PDF to Salesforce Record
90
+ def attach_pdf(sf, record_id, file_path):
91
  try:
92
  with open(file_path, "rb") as f:
93
+ encoded_file = base64.b64encode(f.read()).decode()
94
+ attachment = {
 
95
  "ParentId": record_id,
96
  "Name": os.path.basename(file_path),
97
+ "Body": encoded_file
98
  }
99
+ sf.Attachment.create(attachment)
100
+ return "PDF Attached", None
 
 
 
 
 
 
 
101
  except Exception as e:
102
+ return None, str(e)
103
 
104
+ # Log Failed Migration
105
+ def log_failure(pdf_path, object_name, error):
106
+ log_entry = {"pdf": pdf_path, "object": object_name, "error": error}
107
+ with open("failures.json", "a") as f:
108
+ json.dump(log_entry, f)
109
+ f.write("\n")