pavansuresh commited on
Commit
a00fb61
·
verified ·
1 Parent(s): 06b2fe0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +83 -87
utils.py CHANGED
@@ -1,109 +1,105 @@
1
  import requests
2
  import base64
3
- import json
4
  import os
5
- from simple_salesforce import Salesforce
6
- from pdf2image import convert_from_path
7
- import pytesseract
8
- from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
9
  from dotenv import load_dotenv
10
 
11
- # Load environment variables
12
  load_dotenv()
13
 
14
- # Salesforce Authentication
15
- def get_salesforce_client():
16
- try:
17
- sf = Salesforce(
18
- username=os.getenv('SALESFORCE_USERNAME'),
19
- password=os.getenv('SALESFORCE_PASSWORD'),
20
- security_token=os.getenv('SALESFORCE_SECURITY_TOKEN'),
21
- instance_url=os.getenv('SALESFORCE_INSTANCE_URL')
22
- )
23
- print("Salesforce client connected successfully")
24
- return sf, None
25
- except Exception as e:
26
- print(f"Salesforce connection failed: {str(e)}")
27
- return None, str(e)
28
 
29
- # Fetch Salesforce Objects
30
- def get_salesforce_objects(sf):
31
- try:
32
- response = sf.restful('sobjects')
33
- return [obj['name'] for obj in response['sobjects'] if obj['createable']], None
34
- except Exception as e:
35
- return [], str(e)
36
 
37
- # Fetch Object Fields
38
- def get_object_fields(sf, object_name):
39
- try:
40
- desc = sf.__getattr__(object_name).describe()
41
- return [field['name'] for field in desc['fields']], None
42
- except Exception as e:
43
- return [], str(e)
44
 
45
- # OCR for Text Extraction
46
- def extract_text_from_pdf(pdf_path):
47
- try:
48
- images = convert_from_path(pdf_path)
49
- text_data = [pytesseract.image_to_string(img) for img in images]
50
- return {"pages": text_data}, None
51
- except Exception as e:
52
- return {}, str(e)
 
53
 
54
- # Key-Value Pair Extraction using LayoutLMv3
55
- def extract_key_value_pairs(pdf_path):
56
  try:
57
- processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
58
- model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base-finetuned-funsd")
59
- images = convert_from_path(pdf_path)
60
- extracted_data = []
61
- for img in images:
62
- encoding = processor(img, truncation=True, return_tensors="pt")
63
- outputs = model(**encoding)
64
- # Simplified: Return dummy key-value pairs (real implementation needs post-processing)
65
- extracted_data.append({"keys": ["Contract Number", "Date"], "values": ["12345", "2025-01-01"]})
66
- return extracted_data, None
67
- except Exception as e:
68
- return [], str(e)
69
 
70
- # Map Extracted Data to Salesforce Fields
71
- def map_fields(extracted_data, salesforce_fields):
72
- mappings = {}
73
- confidence_scores = {}
74
- for key in extracted_data[0]["keys"]: # Simplified: Using first page
75
- for field in salesforce_fields:
76
- if key.lower() in field.lower():
77
- mappings[key] = field
78
- confidence_scores[key] = 0.9 # Dummy confidence score
79
- return mappings, confidence_scores, None
80
 
81
- # Create Salesforce Record
82
- def create_record(sf, object_api_name, data):
83
- try:
84
- result = sf.__getattr__(object_api_name).create(data)
85
- return result['id'], None
86
- except Exception as e:
87
- return None, str(e)
 
 
 
 
 
 
88
 
89
- # Attach PDF to Salesforce Record
90
- def attach_pdf(sf, record_id, file_path):
91
  try:
92
  with open(file_path, "rb") as f:
93
- encoded_file = base64.b64encode(f.read()).decode()
94
- attachment = {
 
95
  "ParentId": record_id,
96
  "Name": os.path.basename(file_path),
97
- "Body": encoded_file
98
  }
99
- sf.Attachment.create(attachment)
100
- return "PDF Attached", None
 
 
 
 
 
 
 
101
  except Exception as e:
102
- return None, str(e)
103
 
104
- # Log Failed Migration
105
- def log_failure(pdf_path, object_name, error):
106
- log_entry = {"pdf": pdf_path, "object": object_name, "error": error}
107
- with open("failures.json", "a") as f:
108
- json.dump(log_entry, f)
109
- f.write("\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  import base64
 
3
  import os
 
 
 
 
4
  from dotenv import load_dotenv
5
 
6
+ # Load environment variables from .env file
7
  load_dotenv()
8
 
9
+ # Retrieve credentials from environment variables
10
+ USERNAME = os.getenv("SF_USERNAME")
11
+ PASSWORD = os.getenv("SF_PASSWORD")
12
+ SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
13
+ CLIENT_ID = os.getenv("SF_CLIENT_ID")
14
+ CLIENT_SECRET = os.getenv("SF_CLIENT_SECRET")
15
+ DOMAIN = os.getenv("SF_DOMAIN", "test") # Default to sandbox; use "login" for production
 
 
 
 
 
 
 
16
 
17
+ # Validate environment variables
18
+ if not all([USERNAME, PASSWORD, SECURITY_TOKEN, CLIENT_ID, CLIENT_SECRET]):
19
+ raise ValueError("❌ Missing required environment variables. Ensure SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_CLIENT_ID, and SF_CLIENT_SECRET are set.")
 
 
 
 
20
 
21
+ LOGIN_URL = f"https://{DOMAIN}.salesforce.com"
 
 
 
 
 
 
22
 
23
+ def get_token():
24
+ url = f"{LOGIN_URL}/services/oauth2/token"
25
+ payload = {
26
+ 'grant_type': 'password',
27
+ 'client_id': CLIENT_ID,
28
+ 'client_secret': CLIENT_SECRET,
29
+ 'username': USERNAME,
30
+ 'password': PASSWORD + SECURITY_TOKEN
31
+ }
32
 
 
 
33
  try:
34
+ response = requests.post(url, data=payload)
35
+ print(f"Response Status: {response.status_code}")
36
+ print(f"Response Body: {response.text}")
37
+ result = response.json()
 
 
 
 
 
 
 
 
38
 
39
+ if response.status_code == 200 and 'access_token' in result:
40
+ return result['access_token'], result['instance_url']
41
+ else:
42
+ print(f"Login failed with response: {result}")
43
+ raise Exception(f"❌ Salesforce login failed: {result}")
44
+ except requests.RequestException as e:
45
+ raise Exception(f"🚨 Network error during Salesforce login: {str(e)}")
 
 
 
46
 
47
+ def get_salesforce_objects(token, instance_url):
48
+ headers = {'Authorization': f'Bearer {token}'}
49
+ response = requests.get(f"{instance_url}/services/data/v56.0/sobjects", headers=headers)
50
+ return response.json().get('sobjects', [])
51
+
52
+ def create_record(object_name, data, token, instance_url):
53
+ headers = {
54
+ 'Authorization': f'Bearer {token}',
55
+ 'Content-Type': 'application/json'
56
+ }
57
+ url = f"{instance_url}/services/data/v56.0/sobjects/{object_name}/"
58
+ response = requests.post(url, json=data, headers=headers)
59
+ return response.json()
60
 
61
+ def attach_pdf(record_id, file_path, token, instance_url):
 
62
  try:
63
  with open(file_path, "rb") as f:
64
+ body = base64.b64encode(f.read()).decode()
65
+
66
+ data = {
67
  "ParentId": record_id,
68
  "Name": os.path.basename(file_path),
69
+ "Body": body
70
  }
71
+ headers = {
72
+ 'Authorization': f'Bearer {token}',
73
+ 'Content-Type': 'application/json'
74
+ }
75
+ url = f"{instance_url}/services/data/v56.0/sobjects/Attachment"
76
+ response = requests.post(url, json=data, headers=headers)
77
+ return response.json()
78
+ except FileNotFoundError:
79
+ raise Exception(f"❌ File not found: {file_path}")
80
  except Exception as e:
81
+ raise Exception(f"🚨 Error attaching PDF: {str(e)}")
82
 
83
+ # Example usage
84
+ if __name__ == "__main__":
85
+ try:
86
+ # Get access token and instance URL
87
+ token, instance_url = get_token()
88
+ print(f"✅ Successfully authenticated. Token: {token[:10]}..., Instance URL: {instance_url}")
89
+
90
+ # Example: List Salesforce objects
91
+ objects = get_salesforce_objects(token, instance_url)
92
+ print(f"Found {len(objects)} Salesforce objects")
93
+
94
+ # Example: Create a record
95
+ sample_data = {"Name": "Test Account"}
96
+ result = create_record("Account", sample_data, token, instance_url)
97
+ print(f"Created record: {result}")
98
+
99
+ # Example: Attach a PDF
100
+ record_id = result.get("id")
101
+ if record_id:
102
+ attach_pdf(record_id, "sample.pdf", token, instance_url)
103
+ print("✅ PDF attached successfully")
104
+ except Exception as e:
105
+ print(f"Error: {str(e)}")