pavansuresh commited on
Commit
099275d
·
verified ·
1 Parent(s): 06e182c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -228
app.py CHANGED
@@ -1,230 +1,161 @@
1
- from flask import Flask, request, jsonify, redirect, url_for
2
- import pdfplumber
3
- from transformers import pipeline
4
- import sqlite3
5
- import json
6
- import requests
7
- from simple_salesforce import Salesforce
8
- import urllib.parse
9
- from dotenv import load_dotenv
10
  import os
11
- import logging
12
-
13
- # Set up logging
14
- logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='a',
15
- format='%(asctime)s - %(levelname)s - %(message)s')
16
- logger = logging.getLogger(__name__)
17
-
18
- app = Flask(__name__)
19
-
20
- # Load environment variables
21
- load_dotenv()
22
- SALESFORCE_OAUTH_URL = os.getenv('SALESFORCE_OAUTH_URL')
23
- SF_CLIENT_ID = os.getenv('SF_CLIENT_ID')
24
- SF_CLIENT_SECRET = os.getenv('SF_CLIENT_SECRET')
25
- SALESFORCE_REDIRECT_URI = os.getenv('SALESFORCE_REDIRECT_URI')
26
- SF_USERNAME = os.getenv('SF_USERNAME')
27
- SF_PASSWORD = os.getenv('SF_PASSWORD')
28
- SF_DOMAIN = os.getenv('SF_DOMAIN')
29
-
30
- # Validate environment variables
31
- required_env_vars = [SALESFORCE_OAUTH_URL, SF_CLIENT_ID, SF_CLIENT_SECRET, SALESFORCE_REDIRECT_URI]
32
- if None in required_env_vars:
33
- logger.error("Missing required environment variables")
34
- raise ValueError("Missing required environment variables in .env file")
35
-
36
- # Database connection (SQLite)
37
- try:
38
- conn = sqlite3.connect('contracts.db')
39
- cursor = conn.cursor()
40
- cursor.execute('''
41
- CREATE TABLE IF NOT EXISTS contracts (
42
- id INTEGER PRIMARY KEY AUTOINCREMENT,
43
- content TEXT NOT NULL,
44
- entities TEXT,
45
- compliance TEXT,
46
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
47
- )
48
- ''')
49
- conn.commit()
50
- logger.info("SQLite database initialized")
51
- except Exception as e:
52
- logger.error(f"Failed to initialize SQLite database: {str(e)}")
53
- raise
54
-
55
- # Initialize Hugging Face pipelines
56
- try:
57
- ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
58
- compliance_pipeline = pipeline("text-classification", model="facebook/bart-large-mnli")
59
- logger.info("Hugging Face pipelines initialized")
60
- except Exception as e:
61
- logger.error(f"Failed to initialize Hugging Face pipelines: {str(e)}")
62
- raise
63
-
64
- # Store OAuth tokens
65
- access_token = None
66
- instance_url = None
67
-
68
- @app.route('/login')
69
- def login():
70
- logger.debug("Initiating Salesforce OAuth login")
71
- params = {
72
- 'client_id': SF_CLIENT_ID,
73
- 'redirect_uri': SALESFORCE_REDIRECT_URI,
74
- 'response_type': 'code',
75
- 'scope': 'api refresh_token'
76
- }
77
- auth_url = f"https://login.salesforce.com/services/oauth2/authorize?{urllib.parse.urlencode(params)}"
78
- return redirect(auth_url)
79
-
80
- @app.route('/callback')
81
- def oauth_callback():
82
- global access_token, instance_url
83
- code = request.args.get('code')
84
- if not code:
85
- logger.error("Authorization code not found")
86
- return jsonify({"error": "Authorization code not found"}), 400
87
-
88
- logger.debug(f"Received OAuth code: {code}")
89
- data = {
90
- 'grant_type': 'authorization_code',
91
- 'client_id': SF_CLIENT_ID,
92
- 'client_secret': SF_CLIENT_SECRET,
93
- 'redirect_uri': SALESFORCE_REDIRECT_URI,
94
- 'code': code
95
- }
96
- try:
97
- response = requests.post(SALESFORCE_OAUTH_URL, data=data)
98
- response.raise_for_status()
99
- token_data = response.json()
100
- access_token = token_data['access_token']
101
- instance_url = token_data['instance_url']
102
- logger.info(f"Successfully obtained Salesforce access token. Instance URL: {instance_url}")
103
- return redirect(url_for('index'))
104
- except Exception as e:
105
- logger.error(f"Failed to obtain access token: {str(e)}")
106
- return jsonify({"error": "Failed to obtain access token", "details": str(e)}), 500
107
-
108
- @app.route('/')
109
- def index():
110
- logger.debug("Accessing root endpoint")
111
- return jsonify({"message": "Welcome to Contract Analyzer. Please log in to Salesforce."})
112
-
113
- def extract_text_from_pdf(file):
114
- try:
115
- with pdfplumber.open(file) as pdf:
116
- text = ""
117
- for page in pdf.pages:
118
- extracted = page.extract_text()
119
- if extracted:
120
- text += extracted + "\n"
121
- logger.debug(f"Extracted text from PDF: {text[:100]}...")
122
- return text
123
- except Exception as e:
124
- logger.error(f"Failed to extract text from PDF: {str(e)}")
125
- return str(e)
126
-
127
- @app.route('/upload_contract', methods=['POST'])
128
- def upload_contract():
129
- global access_token, instance_url
130
- if not access_token or not instance_url:
131
- logger.error("Not authenticated with Salesforce")
132
- return jsonify({"error": "Please authenticate with Salesforce first"}), 401
133
-
134
- if 'contract' not in request.files:
135
- logger.error("No file uploaded")
136
- return jsonify({"error": "No file uploaded"}), 400
137
-
138
- file = request.files['contract']
139
- logger.debug(f"Received file: {file.filename}")
140
- text = extract_text_from_pdf(file)
141
- if not text or isinstance(text, str) and text.startswith("Error"):
142
- logger.error(f"Failed to extract text: {text}")
143
- return jsonify({"error": "Failed to extract text from PDF", "details": text}), 500
144
-
145
- # Extract entities
146
- try:
147
- entities = ner_pipeline(text)
148
- extracted_data = [{"entity": ent['word'], "type": ent['entity_group']} for ent in entities]
149
- logger.info(f"Extracted {len(extracted_data)} entities")
150
- except Exception as e:
151
- logger.error(f"Entity extraction failed: {str(e)}")
152
- return jsonify({"error": "Entity extraction failed", "details": str(e)}), 500
153
-
154
- # Analyze compliance
155
- try:
156
- compliance_results = []
157
- sentences = [s.strip() for s in text.split('\n') if s.strip()]
158
- for sentence in sentences[:10]: # Limit for demo
159
- result = compliance_pipeline(sentence, candidate_labels=["low_risk", "high_risk"])
160
- compliance_results.append({
161
- "text": sentence[:100],
162
- "risk": result['labels'][0],
163
- "score": result['scores'][0]
164
- })
165
- logger.info(f"Analyzed compliance for {len(compliance_results)} sentences")
166
- except Exception as e:
167
- logger.error(f"Compliance analysis failed: {str(e)}")
168
- return jsonify({"error": "Compliance analysis failed", "details": str(e)}), 500
169
-
170
- # Store in SQLite
171
- try:
172
- cursor.execute(
173
- "INSERT INTO contracts (content, entities, compliance) VALUES (?, ?, ?)",
174
- (text, json.dumps(extracted_data), json.dumps(compliance_results))
175
- )
176
- contract_id = cursor.lastrowid
177
- conn.commit()
178
- logger.info(f"Stored contract in SQLite with ID: {contract_id}")
179
- except Exception as e:
180
- conn.rollback()
181
- logger.error(f"Database error: {str(e)}")
182
- return jsonify({"error": f"Database error: {str(e)}"}), 500
183
-
184
- # Push to Salesforce
185
- try:
186
- sf = Salesforce(instance_url=instance_url, session_id=access_token)
187
- sf_response = sf.Contract__c.create({
188
- 'Content__c': text[:1000], # Truncate for Salesforce field limit
189
- 'Entities__c': json.dumps(extracted_data),
190
- 'Compliance__c': json.dumps(compliance_results)
191
- })
192
- logger.info(f"Stored contract in Salesforce with ID: {sf_response['id']}")
193
- except Exception as e:
194
- logger.error(f"Failed to save to Salesforce: {str(e)}")
195
- return jsonify({"error": f"Failed to save to Salesforce: {str(e)}"}), 500
196
-
197
- return jsonify({
198
- "contract_id": contract_id,
199
- "salesforce_id": sf_response['id'],
200
- "entities": extracted_data,
201
- "compliance": compliance_results
202
- })
203
-
204
- @app.route('/get_contract/<int:id>', methods=['GET'])
205
- def get_contract(id):
206
  try:
207
- cursor.execute("SELECT content, entities, compliance FROM contracts WHERE id = ?", (id,))
208
- result = cursor.fetchone()
209
- if result:
210
- logger.info(f"Retrieved contract ID: {id} from SQLite")
211
- return jsonify({
212
- "content": result[0],
213
- "entities": json.loads(result[1]),
214
- "compliance": json.loads(result[2])
215
- })
216
- logger.warning(f"Contract ID {id} not found")
217
- return jsonify({"error": "Contract not found"}), 404
218
- except Exception as e:
219
- logger.error(f"Error retrieving contract: {str(e)}")
220
- return jsonify({"error": str(e)}), 500
221
-
222
- @app.teardown_appcontext
223
- def close_db(exception):
224
- cursor.close()
225
- conn.close()
226
- logger.debug("Closed SQLite database connection")
227
-
228
- if __name__ == '__main__':
229
- logger.info("Starting Flask application")
230
- app.run(debug=True, host='0.0.0.0', port=8080)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
 
 
 
 
 
 
 
 
2
  import os
3
+ from utils import (
4
+ get_salesforce_client, get_salesforce_objects, get_object_fields,
5
+ extract_text_from_pdf, extract_key_value_pairs, map_fields,
6
+ create_record, attach_pdf, log_failure
7
+ )
8
+
9
+ # Ensure uploads directory exists
10
+ os.makedirs("uploads", exist_ok=True)
11
+
12
+ # Gradio Interface Functions
13
+ def upload_pdfs(*files):
14
+ uploaded_files = []
15
+ for file in files:
16
+ if file is not None:
17
+ file_path = f"uploads/{file.name}"
18
+ with open(file_path, "wb") as f:
19
+ f.write(file.read())
20
+ uploaded_files.append(file_path)
21
+ return f"Uploaded {len(uploaded_files)} PDF(s): {', '.join(uploaded_files)}"
22
+
23
+ def fetch_objects():
24
+ sf, error = get_salesforce_client()
25
+ if error:
26
+ return gr.Dropdown.update(choices=[]), f"Error: {error}"
27
+ objects, error = get_salesforce_objects(sf)
28
+ if error:
29
+ return gr.Dropdown.update(choices=[]), f"Error: {error}"
30
+ return gr.Dropdown.update(choices=objects), "Objects fetched successfully"
31
+
32
+ def fetch_fields(object_name):
33
+ sf, error = get_salesforce_client()
34
+ if error:
35
+ return gr.CheckboxGroup.update(choices=[]), f"Error: {error}"
36
+ fields, error = get_object_fields(sf, object_name)
37
+ if error:
38
+ return gr.CheckboxGroup.update(choices=[]), f"Error: {error}"
39
+ return gr.CheckboxGroup.update(choices=fields), "Fields fetched successfully"
40
+
41
+ def process_pdf(pdf_path):
42
+ text_data, error = extract_text_from_pdf(pdf_path)
43
+ if error:
44
+ return f"Error: {error}"
45
+ kv_pairs, error = extract_key_value_pairs(pdf_path)
46
+ if error:
47
+ return f"Error: {error}"
48
+ return f"Text: {text_data}\nKey-Value Pairs: {kv_pairs}"
49
+
50
+ def display_mappings(pdf_path, object_name):
51
+ sf, error = get_salesforce_client()
52
+ if error:
53
+ return f"Error: {error}"
54
+ fields, error = get_object_fields(sf, object_name)
55
+ if error:
56
+ return f"Error: {error}"
57
+ extracted_data, error = extract_key_value_pairs(pdf_path)
58
+ if error:
59
+ return f"Error: {error}"
60
+ mappings, confidence_scores, error = map_fields(extracted_data, fields)
61
+ if error:
62
+ return f"Error: {error}"
63
+ output = ""
64
+ for key, field in mappings.items():
65
+ output += f"{key} -> {field} (Confidence: {confidence_scores[key]})\n"
66
+ return output
67
+
68
+ def migrate_to_salesforce(pdf_path, object_name):
69
+ sf, error = get_salesforce_client()
70
+ if error:
71
+ log_failure(pdf_path, object_name, error)
72
+ return f"Error: {error}"
73
+ extracted_data, error = extract_key_value_pairs(pdf_path)
74
+ if error:
75
+ log_failure(pdf_path, object_name, error)
76
+ return f"Error: {error}"
77
+ fields, error = get_object_fields(sf, object_name)
78
+ if error:
79
+ log_failure(pdf_path, object_name, error)
80
+ return f"Error: {error}"
81
+ mappings, _, error = map_fields(extracted_data, fields)
82
+ if error:
83
+ log_failure(pdf_path, object_name, error)
84
+ return f"Error: {error}"
85
+ data = {mappings[key]: value for key, value in zip(extracted_data[0]["keys"], extracted_data[0]["values"])}
86
+ record_id, error = create_record(sf, object_name, data)
87
+ if error:
88
+ log_failure(pdf_path, object_name, error)
89
+ return f"Error: {error}"
90
+ attach_status, error = attach_pdf(sf, record_id, pdf_path)
91
+ if error:
92
+ log_failure(pdf_path, object_name, error)
93
+ return f"Error: {error}"
94
+ return f"Record Created: {record_id}\n{attach_status}"
95
+
96
+ def display_failures():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
+ with open("failures.json", "r") as f:
99
+ failures = [json.loads(line) for line in f]
100
+ output = ""
101
+ for idx, failure in enumerate(failures):
102
+ output += f"Failure {idx + 1}: PDF={failure['pdf']}, Object={failure['object']}, Error={failure['error']}\n"
103
+ return output
104
+ except FileNotFoundError:
105
+ return "No failures logged"
106
+
107
+ def retry_migration(pdf_path, object_name):
108
+ return migrate_to_salesforce(pdf_path, object_name)
109
+
110
+ # Gradio UI
111
+ with gr.Blocks() as app:
112
+ gr.Markdown("# Smart Contract Migrator")
113
+
114
+ # PDF Upload Section
115
+ gr.Markdown("## Upload PDFs")
116
+ pdf_upload = gr.File(label="Upload Contract PDFs", file_count="multiple", file_types=[".pdf"])
117
+ upload_status = gr.Textbox(label="Upload Status")
118
+ pdf_upload.change(upload_pdfs, inputs=pdf_upload, outputs=upload_status)
119
+
120
+ # Salesforce Object Selection
121
+ gr.Markdown("## Salesforce Object Selection")
122
+ object_dropdown = gr.Dropdown(label="Select Salesforce Object")
123
+ fetch_objects_button = gr.Button("Fetch Objects")
124
+ object_status = gr.Textbox(label="Object Fetch Status")
125
+ fetch_objects_button.click(fetch_objects, outputs=[object_dropdown, object_status])
126
+
127
+ # Field Selection
128
+ field_checkboxes = gr.CheckboxGroup(label="Select Fields")
129
+ fetch_fields_button = gr.Button("Fetch Fields")
130
+ field_status = gr.Textbox(label="Field Fetch Status")
131
+ fetch_fields_button.click(fetch_fields, inputs=object_dropdown, outputs=[field_checkboxes, field_status])
132
+
133
+ # PDF Processing
134
+ gr.Markdown("## Process PDF")
135
+ process_button = gr.Button("Process PDF")
136
+ process_output = gr.Textbox(label="Extracted Data")
137
+ process_button.click(process_pdf, inputs=pdf_upload, outputs=process_output)
138
+
139
+ # Field Mapping
140
+ gr.Markdown("## Field Mapping")
141
+ map_button = gr.Button("Map Fields")
142
+ mapping_output = gr.Textbox(label="Field Mappings")
143
+ map_button.click(display_mappings, inputs=[pdf_upload, object_dropdown], outputs=mapping_output)
144
+
145
+ # Salesforce Migration
146
+ gr.Markdown("## Migrate to Salesforce")
147
+ migrate_button = gr.Button("Migrate to Salesforce")
148
+ migrate_output = gr.Textbox(label="Migration Status")
149
+ migrate_button.click(migrate_to_salesforce, inputs=[pdf_upload, object_dropdown], outputs=migrate_output)
150
+
151
+ # Reconciliation Dashboard
152
+ gr.Markdown("## Reconciliation Dashboard")
153
+ show_failures_button = gr.Button("Show Failures")
154
+ failures_output = gr.Textbox(label="Failed Migrations")
155
+ pdf_path_input = gr.Textbox(label="PDF Path to Retry")
156
+ retry_button = gr.Button("Retry Migration")
157
+ retry_output = gr.Textbox(label="Retry Status")
158
+ show_failures_button.click(display_failures, outputs=failures_output)
159
+ retry_button.click(retry_migration, inputs=[pdf_path_input, object_dropdown], outputs=retry_output)
160
+
161
+ app.launch()