Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,11 +12,11 @@ from io import BytesIO
|
|
| 12 |
import uuid
|
| 13 |
import logging
|
| 14 |
import textwrap
|
|
|
|
| 15 |
from reportlab.lib.pagesizes import letter
|
| 16 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
| 17 |
from reportlab.lib.styles import getSampleStyleSheet
|
| 18 |
-
|
| 19 |
-
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
|
| 20 |
|
| 21 |
# Set up logging
|
| 22 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -353,14 +353,13 @@ footer, .gradio-footer, .hide, [data-testid="Use via API"], [data-testid="mmsett
|
|
| 353 |
}
|
| 354 |
"""
|
| 355 |
|
| 356 |
-
# Salesforce credentials
|
| 357 |
SF_USERNAME = "Kushalpavansekharm503@agentforce.com"
|
| 358 |
SF_PASSWORD = "Kushal@123"
|
| 359 |
SF_TOKEN = "WwUIFWBVUjeKn9VPKyWJmawY0"
|
| 360 |
|
| 361 |
-
@retry(stop_max_attempt_number=3, wait_fixed=2000, retry=retry_if_exception_type(Exception))
|
| 362 |
def authenticate_salesforce() -> Salesforce:
|
| 363 |
-
"""Authenticate with Salesforce and return a Salesforce client"""
|
| 364 |
try:
|
| 365 |
sf = Salesforce(
|
| 366 |
username=SF_USERNAME,
|
|
@@ -371,7 +370,7 @@ def authenticate_salesforce() -> Salesforce:
|
|
| 371 |
return sf
|
| 372 |
except Exception as e:
|
| 373 |
logger.error(f"Failed to authenticate with Salesforce: {str(e)}")
|
| 374 |
-
raise
|
| 375 |
|
| 376 |
def get_hugging_face_sentiment(text: str) -> float:
|
| 377 |
"""Get sentiment score using Hugging Face model"""
|
|
@@ -385,135 +384,37 @@ def get_hugging_face_sentiment(text: str) -> float:
|
|
| 385 |
logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
|
| 386 |
return 0.5
|
| 387 |
|
| 388 |
-
def
|
| 389 |
-
"""Generate a PDF with
|
| 390 |
try:
|
| 391 |
-
pdf_filename = f"
|
| 392 |
-
pdf_path = os.path.join(
|
| 393 |
pdf_file = BytesIO()
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
y_position = 8.0 * inch
|
| 408 |
-
c.setFont("Helvetica-Bold", 12)
|
| 409 |
-
c.drawString(1 * inch, y_position, "Summary:")
|
| 410 |
-
y_position -= 0.3 * inch
|
| 411 |
-
|
| 412 |
-
c.setFont("Helvetica", 10)
|
| 413 |
-
summary_data = {
|
| 414 |
-
"Total Penalties": sum(data['penalty_counts'].values()),
|
| 415 |
-
"Total Obligations": sum(data['obligation_counts'].values()),
|
| 416 |
-
"Total Delays": sum(data['delay_counts'].values()),
|
| 417 |
-
"Analysis Timestamp": time.strftime("%Y-%m-%d %H:%M:%S IST")
|
| 418 |
-
}
|
| 419 |
-
for key, value in summary_data.items():
|
| 420 |
-
c.drawString(1 * inch, y_position, f"{key}: {value}")
|
| 421 |
-
y_position -= 0.25 * inch
|
| 422 |
-
|
| 423 |
-
y_position -= 0.5 * inch
|
| 424 |
-
c.setFont("Helvetica-Bold", 12)
|
| 425 |
-
c.drawString(1 * inch, y_position, "Detailed Analysis:")
|
| 426 |
-
y_position -= 0.3 * inch
|
| 427 |
-
|
| 428 |
-
c.setFont("Helvetica", 10)
|
| 429 |
-
sections = [
|
| 430 |
-
("Penalty Clauses", data['penalty_details']),
|
| 431 |
-
("Obligation Clauses", data['obligation_details']),
|
| 432 |
-
("Delay Clauses", data['delay_details']),
|
| 433 |
-
("Penalty Amounts", data['penalty_amounts']),
|
| 434 |
-
("Extracted Data", data['penalty_examples'])
|
| 435 |
-
]
|
| 436 |
-
for title, content in sections:
|
| 437 |
-
c.drawString(1 * inch, y_position, f"{title}:")
|
| 438 |
-
y_position -= 0.2 * inch
|
| 439 |
-
lines = content.split("\n")
|
| 440 |
-
for line in lines:
|
| 441 |
-
if y_position < 1 * inch:
|
| 442 |
-
c.showPage()
|
| 443 |
-
c.setFont("Helvetica", 10)
|
| 444 |
-
y_position = 10 * inch
|
| 445 |
-
c.drawString(1.2 * inch, y_position, line)
|
| 446 |
-
y_position -= 0.2 * inch
|
| 447 |
-
|
| 448 |
-
c.save()
|
| 449 |
pdf_file.seek(0)
|
| 450 |
-
|
| 451 |
with open(pdf_path, "wb") as f:
|
| 452 |
f.write(pdf_file.getvalue())
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
|
|
|
| 456 |
except Exception as e:
|
| 457 |
logger.error(f"Error generating PDF: {e}")
|
| 458 |
return "", "", None
|
| 459 |
|
| 460 |
-
def upload_pdf_to_salesforce(sf: Salesforce, pdf_file: BytesIO, report_id: str) -> str:
|
| 461 |
-
"""Upload PDF to Salesforce and return the download URL"""
|
| 462 |
-
try:
|
| 463 |
-
if not pdf_file:
|
| 464 |
-
logger.error("No PDF file provided for upload")
|
| 465 |
-
return ""
|
| 466 |
-
|
| 467 |
-
encoded_pdf = base64.b64encode(pdf_file.getvalue()).decode('utf-8')
|
| 468 |
-
content_version_data = {
|
| 469 |
-
"Title": f"Contract_Analysis_{int(time.time())}",
|
| 470 |
-
"PathOnClient": f"contract_analysis_{int(time.time())}.pdf",
|
| 471 |
-
"VersionData": encoded_pdf,
|
| 472 |
-
"FirstPublishLocationId": report_id
|
| 473 |
-
}
|
| 474 |
-
content_version = sf.ContentVersion.create(content_version_data)
|
| 475 |
-
result = sf.query(f"SELECT Id, ContentDocumentId FROM ContentVersion WHERE Id = '{content_version['id']}'")
|
| 476 |
-
|
| 477 |
-
if not result['records']:
|
| 478 |
-
logger.error("Failed to retrieve ContentVersion")
|
| 479 |
-
return ""
|
| 480 |
-
|
| 481 |
-
file_url = f"https://{sf.sf_instance}/sfc/servlet.shepherd/version/download/{content_version['id']}"
|
| 482 |
-
logger.info(f"PDF uploaded to Salesforce: {file_url}")
|
| 483 |
-
return file_url
|
| 484 |
-
except Exception as e:
|
| 485 |
-
logger.error(f"Error uploading PDF to Salesforce: {e}")
|
| 486 |
-
return ""
|
| 487 |
-
|
| 488 |
-
def save_to_salesforce(sf: Salesforce, data: Dict, pdf_file: BytesIO) -> Dict:
|
| 489 |
-
"""Save analysis results and PDF to Salesforce, return record and file URLs"""
|
| 490 |
-
try:
|
| 491 |
-
# Save Custom_Risk_Analysis__c record
|
| 492 |
-
record = {
|
| 493 |
-
'Sentiment_Score__c': data['sentiment_score'],
|
| 494 |
-
'Risk_Score__c': data['risk_score'],
|
| 495 |
-
'Risk_Level__c': data['risk_level'],
|
| 496 |
-
'Record_Id__c': data['record_id'],
|
| 497 |
-
'Penalty_Examples__c': data['penalty_examples'][:131072],
|
| 498 |
-
'Penalty_Details__c': data['penalty_details'][:131072],
|
| 499 |
-
'Penalty_Amounts__c': data['penalty_amounts'][:131072],
|
| 500 |
-
'Obligation_Details__c': data['obligation_details'][:131072],
|
| 501 |
-
'Delay_Details__c': data['delay_details'][:131072]
|
| 502 |
-
}
|
| 503 |
-
result = sf.Custom_Risk_Analysis__c.create(record)
|
| 504 |
-
logger.info(f"Successfully created Salesforce record: {result['id']}")
|
| 505 |
-
|
| 506 |
-
# Upload PDF if available
|
| 507 |
-
download_url = upload_pdf_to_salesforce(sf, pdf_file, result['id']) if pdf_file else ""
|
| 508 |
-
|
| 509 |
-
return {
|
| 510 |
-
'record_id': result['id'],
|
| 511 |
-
'download_url': download_url
|
| 512 |
-
}
|
| 513 |
-
except Exception as e:
|
| 514 |
-
logger.error(f"Failed to save to Salesforce: {str(e)}")
|
| 515 |
-
raise Exception(f"Salesforce record or file creation failed: {str(e)}")
|
| 516 |
-
|
| 517 |
def extract_text_from_pdf(pdf_path: str) -> str:
|
| 518 |
"""Extract text from PDF using pdfplumber"""
|
| 519 |
try:
|
|
@@ -666,16 +567,11 @@ def format_clause_example(example: str, index: int) -> str:
|
|
| 666 |
def analyze_pdf(file_obj) -> List:
|
| 667 |
"""Main analysis function for Gradio interface"""
|
| 668 |
try:
|
| 669 |
-
import tempfile
|
| 670 |
-
temp_dir = tempfile.mkdtemp()
|
| 671 |
-
|
| 672 |
if not file_obj:
|
| 673 |
raise Exception("No PDF file uploaded. Please upload a valid PDF file.")
|
| 674 |
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
except Exception as e:
|
| 678 |
-
raise Exception(f"Salesforce authentication failed: {str(e)}")
|
| 679 |
|
| 680 |
try:
|
| 681 |
text = extract_text_from_pdf(file_obj.name)
|
|
@@ -758,29 +654,12 @@ def analyze_pdf(file_obj) -> List:
|
|
| 758 |
extracted_data = "\n".join([format_clause_example(sent, i+1) for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "<div class='success-box'>✅ No penalty clauses found - Excellent contract terms!</div>"
|
| 759 |
|
| 760 |
record_id = str(uuid.uuid4())
|
| 761 |
-
sf_data = {
|
| 762 |
-
'sentiment_score': sentiment_score,
|
| 763 |
-
'risk_score': risk_score,
|
| 764 |
-
'risk_level': risk_level,
|
| 765 |
-
'record_id': record_id,
|
| 766 |
-
'penalty_counts': penalty_counts,
|
| 767 |
-
'penalty_details': "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
|
| 768 |
-
'penalty_amounts': "\n".join([f"${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found",
|
| 769 |
-
'obligation_counts': obligation_counts,
|
| 770 |
-
'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
|
| 771 |
-
'delay_counts': delay_counts,
|
| 772 |
-
'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()]),
|
| 773 |
-
'penalty_examples': extracted_data
|
| 774 |
-
}
|
| 775 |
-
|
| 776 |
try:
|
| 777 |
-
pdf_path, pdf_url, pdf_file =
|
| 778 |
-
|
| 779 |
-
download_url = salesforce_result['download_url'] or pdf_url
|
| 780 |
-
logger.info(f"Saved to Salesforce with Record ID: {salesforce_result['record_id']}, PDF URL: {download_url}")
|
| 781 |
except Exception as e:
|
| 782 |
-
logger.error(f"
|
| 783 |
-
|
| 784 |
|
| 785 |
box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
|
| 786 |
risk_icon = "✅" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
|
|
@@ -800,8 +679,8 @@ def analyze_pdf(file_obj) -> List:
|
|
| 800 |
</div>
|
| 801 |
{sentiment_meter}
|
| 802 |
<div style='margin-top: 15px;'>
|
| 803 |
-
<strong>Download
|
| 804 |
-
|
| 805 |
</div>
|
| 806 |
</div>
|
| 807 |
"""
|
|
@@ -844,7 +723,7 @@ def analyze_pdf(file_obj) -> List:
|
|
| 844 |
"""
|
| 845 |
return [error_message] * 8
|
| 846 |
finally:
|
| 847 |
-
if os.path.exists(temp_dir):
|
| 848 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 849 |
logger.info(f"Cleaned up temporary directory: {temp_dir}")
|
| 850 |
|
|
|
|
| 12 |
import uuid
|
| 13 |
import logging
|
| 14 |
import textwrap
|
| 15 |
+
import tempfile
|
| 16 |
from reportlab.lib.pagesizes import letter
|
| 17 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
| 18 |
from reportlab.lib.styles import getSampleStyleSheet
|
| 19 |
+
import time
|
|
|
|
| 20 |
|
| 21 |
# Set up logging
|
| 22 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 353 |
}
|
| 354 |
"""
|
| 355 |
|
| 356 |
+
# Salesforce credentials (kept for potential future use, but not used in this update)
|
| 357 |
SF_USERNAME = "Kushalpavansekharm503@agentforce.com"
|
| 358 |
SF_PASSWORD = "Kushal@123"
|
| 359 |
SF_TOKEN = "WwUIFWBVUjeKn9VPKyWJmawY0"
|
| 360 |
|
|
|
|
| 361 |
def authenticate_salesforce() -> Salesforce:
|
| 362 |
+
"""Authenticate with Salesforce and return a Salesforce client (not used in this update)"""
|
| 363 |
try:
|
| 364 |
sf = Salesforce(
|
| 365 |
username=SF_USERNAME,
|
|
|
|
| 370 |
return sf
|
| 371 |
except Exception as e:
|
| 372 |
logger.error(f"Failed to authenticate with Salesforce: {str(e)}")
|
| 373 |
+
raise Exception(f"Salesforce authentication failed: {str(e)}")
|
| 374 |
|
| 375 |
def get_hugging_face_sentiment(text: str) -> float:
|
| 376 |
"""Get sentiment score using Hugging Face model"""
|
|
|
|
| 384 |
logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
|
| 385 |
return 0.5
|
| 386 |
|
| 387 |
+
def generate_sentiment_pdf(sentiment_score: float, output_dir: str) -> Tuple[str, str, BytesIO]:
|
| 388 |
+
"""Generate a PDF with sentiment analysis results and return path, URL, and file object"""
|
| 389 |
try:
|
| 390 |
+
pdf_filename = f"sentiment_analysis_{int(time.time())}.pdf"
|
| 391 |
+
pdf_path = os.path.join(output_dir, pdf_filename)
|
| 392 |
pdf_file = BytesIO()
|
| 393 |
+
doc = SimpleDocTemplate(pdf_file, pagesize=letter)
|
| 394 |
+
styles = getSampleStyleSheet()
|
| 395 |
+
story = []
|
| 396 |
+
|
| 397 |
+
story.append(Paragraph("Sentiment Analysis Report", styles['Title']))
|
| 398 |
+
story.append(Spacer(1, 12))
|
| 399 |
+
sentiment_text = f"Sentiment Score: {sentiment_score:.2f} (Scale: 0.0 Negative to 1.0 Positive)"
|
| 400 |
+
story.append(Paragraph(sentiment_text, styles['Normal']))
|
| 401 |
+
story.append(Spacer(1, 12))
|
| 402 |
+
interpretation = "Positive" if sentiment_score > 0.6 else "Negative" if sentiment_score < 0.4 else "Neutral"
|
| 403 |
+
story.append(Paragraph(f"Interpretation: {interpretation}", styles['Normal']))
|
| 404 |
+
|
| 405 |
+
doc.build(story)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
pdf_file.seek(0)
|
| 407 |
+
|
| 408 |
with open(pdf_path, "wb") as f:
|
| 409 |
f.write(pdf_file.getvalue())
|
| 410 |
+
|
| 411 |
+
public_url = f"http://localhost:7860/static/output/{pdf_filename}" # Placeholder URL; adjust as needed
|
| 412 |
+
logger.info(f"PDF generated: {public_url}")
|
| 413 |
+
return pdf_path, public_url, pdf_file
|
| 414 |
except Exception as e:
|
| 415 |
logger.error(f"Error generating PDF: {e}")
|
| 416 |
return "", "", None
|
| 417 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
def extract_text_from_pdf(pdf_path: str) -> str:
|
| 419 |
"""Extract text from PDF using pdfplumber"""
|
| 420 |
try:
|
|
|
|
| 567 |
def analyze_pdf(file_obj) -> List:
|
| 568 |
"""Main analysis function for Gradio interface"""
|
| 569 |
try:
|
|
|
|
|
|
|
|
|
|
| 570 |
if not file_obj:
|
| 571 |
raise Exception("No PDF file uploaded. Please upload a valid PDF file.")
|
| 572 |
|
| 573 |
+
temp_dir = tempfile.mkdtemp(prefix="contract_analysis_")
|
| 574 |
+
logger.info(f"Created temporary directory for PDF processing: {temp_dir}")
|
|
|
|
|
|
|
| 575 |
|
| 576 |
try:
|
| 577 |
text = extract_text_from_pdf(file_obj.name)
|
|
|
|
| 654 |
extracted_data = "\n".join([format_clause_example(sent, i+1) for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "<div class='success-box'>✅ No penalty clauses found - Excellent contract terms!</div>"
|
| 655 |
|
| 656 |
record_id = str(uuid.uuid4())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
try:
|
| 658 |
+
pdf_path, pdf_url, pdf_file = generate_sentiment_pdf(sentiment_score, temp_dir)
|
| 659 |
+
logger.info(f"Generated PDF with URL: {pdf_url}")
|
|
|
|
|
|
|
| 660 |
except Exception as e:
|
| 661 |
+
logger.error(f"PDF generation failed: {e}")
|
| 662 |
+
pdf_url = "Failed to generate PDF download link"
|
| 663 |
|
| 664 |
box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
|
| 665 |
risk_icon = "✅" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
|
|
|
|
| 679 |
</div>
|
| 680 |
{sentiment_meter}
|
| 681 |
<div style='margin-top: 15px;'>
|
| 682 |
+
<strong>Download Sentiment Report:</strong>
|
| 683 |
+
<a href="{pdf_url}" target="_blank" style="color: var(--primary-color); text-decoration: underline;">Download PDF</a>
|
| 684 |
</div>
|
| 685 |
</div>
|
| 686 |
"""
|
|
|
|
| 723 |
"""
|
| 724 |
return [error_message] * 8
|
| 725 |
finally:
|
| 726 |
+
if 'temp_dir' in locals() and os.path.exists(temp_dir):
|
| 727 |
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 728 |
logger.info(f"Cleaned up temporary directory: {temp_dir}")
|
| 729 |
|