DPR-YOLO8 / app.py
Rammohan0504's picture
Update app.py
cfcd8f7 verified
import torch
from ultralytics import YOLO
import numpy as np
import random
from PIL import Image
import gradio as gr
from datetime import datetime
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as PDFImage
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from simple_salesforce import Salesforce
import os
from dotenv import load_dotenv
import base64
import concurrent.futures
# Load environment variables
load_dotenv()
# Salesforce credentials
SF_USERNAME = os.getenv('SF_USERNAME')
SF_PASSWORD = os.getenv('SF_PASSWORD')
SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN')
# Salesforce connection
try:
sf = Salesforce(username=SF_USERNAME, password=SF_PASSWORD, security_token=SF_SECURITY_TOKEN)
except Exception as e:
sf = None
print(f"Failed to connect to Salesforce: {str(e)}")
# Load YOLOv8 pretrained model
model = YOLO('yolov8n.pt')
def infer_activity_and_phrase(obj, box, img_height):
"""
Dynamically infer a construction activity and generate a descriptive phrase based on the detected object,
using object name, bounding box size, and position.
Args:
obj (str): Detected COCO object (e.g., 'truck', 'crane', 'person').
box (list): Bounding box [x1, y1, x2, y2, conf, cls].
img_height (int): Image height for position analysis.
Returns:
tuple: (activity, phrase) or (None, None) if unmapped.
"""
# If the object is a person, label it as a worker
if obj.lower() == 'person':
obj = 'worker' # Change label from person to worker
# Normalize object name
obj_lower = obj.lower().replace('_', ' ')
# Skip irrelevant objects
if len(obj_lower) <= 4 or any(suffix in obj_lower for suffix in ['cat', 'dog', 'bird', 'fish']):
return None, None
# Define common construction materials
materials = ['cement', 'concrete', 'bricks', 'iron', 'steel', 'sand', 'mortar', 'wood', 'plaster']
# If object is a material, adjust activity accordingly
if any(material in obj_lower for material in materials):
base_name = obj_lower.title()
activity = f"{base_name} pouring"
location = "work zone" # You can be more specific based on detection
verb = random.choice(["observed", "detected", "noted"])
level = "ground level" # Adjust the level based on detection position
phrase = f"{activity} {verb} at the {level}."
return activity, phrase
# Extract bounding box info
x1, y1, x2, y2, conf = box[:5]
box_area = (x2 - x1) * (y2 - y1) # Area of bounding box
box_center_y = (y1 + y2) / 2 # Vertical center of box
# Normalize area (relative to image size, assuming 640x640 for simplicity)
img_area = 640 * 640 # Typical YOLO input size
area_ratio = box_area / img_area
# Base activity and location
base_name = obj_lower.title()
# Synonym lists for activity variation (added more activities)
action_terms = ['Operation', 'Handling', 'Setup', 'Installation', 'Preparation', 'Transport', 'Lifting', 'Surveying', 'Piling', 'Concrete Work', 'Flooring', 'Finishing', 'Marking', 'Surveying']
location_terms = ['site area', 'work zone', 'structural zone', 'foundation level', 'elevated area', 'safety zone', 'concrete pouring zone', 'flooring zone']
# Categorize based on size and position
if area_ratio > 0.1: # Large objects (e.g., vehicles, heavy equipment)
if box_center_y < img_height / 3: # Top 1/3: elevated equipment
activity = f"{base_name} Lifting"
location = "elevated area"
else: # Ground-level: transport or heavy equipment
activity = f"{base_name} Transport"
location = "site perimeter"
elif area_ratio > 0.01: # Medium objects (e.g., tools, equipment)
activity = f"{base_name} Setup"
location = "work zone"
else: # Small objects (e.g., safety items, materials)
activity = f"{base_name} Marking"
location = "safety zone"
# Vary activity with synonyms to ensure variety of activities
action = random.choice(action_terms)
activity = f"{base_name} {action}" if random.random() > 0.5 else activity
# Adjust location based on position
if box_center_y < img_height / 3:
location = random.choice(['elevated area', 'structural zone'])
elif box_center_y > 2 * img_height / 3:
location = random.choice(['foundation level', 'ground level'])
else:
location = random.choice(location_terms)
# Generate phrase
verbs = ["observed", "detected", "noted"]
verb = random.choice(verbs)
# Calculate the floor or level dynamically based on the y-position of the object
if location == 'elevated area':
level = '3rd level'
elif location == 'foundation level':
level = 'ground level'
else:
level = '2nd level'
phrase = f"{activity} {verb} at the {level}."
return activity, phrase
def generate_labels_from_image(image):
try:
img = np.array(image.convert("RGB"))
img_height = img.shape[0]
print(f"Image shape: {img.shape}") # Debug: Verify image input
results = model(img, conf=0.1) # Lowered confidence threshold
detected_objects = set()
raw_detections = []
for r in results:
for box in r.boxes.data.cpu().numpy():
cls = int(box[5])
conf = float(box[4])
label = model.names.get(cls)
raw_detections.append((label, conf, box))
detected_objects.add(label)
print(f"Raw detections: {[(label, conf) for label, conf, _ in raw_detections]}")
print(f"Detected objects: {detected_objects}")
activity_labels = set()
phrases = []
for label, conf, box in raw_detections:
activity, phrase = infer_activity_and_phrase(label, box, img_height)
if activity and phrase:
activity_labels.add(activity)
phrases.append(phrase)
print(f"Inferred activities: {activity_labels}")
if not phrases:
if raw_detections:
raw_labels = [f"{label} (conf: {conf:.2f})" for label, conf, _ in raw_detections]
return f"No construction activities inferred. Raw detections: {', '.join(raw_labels)}."
return "No objects detected in the image."
return "\n".join(sorted(phrases))
except Exception as e:
print(f"Error processing image: {str(e)}")
return f"Error processing image: {str(e)}"
def save_dpr_to_pdf(dpr_text, image_paths, captions, filename):
try:
doc = SimpleDocTemplate(filename, pagesize=letter)
styles = getSampleStyleSheet()
title_style = ParagraphStyle(
name='Title', fontSize=16, leading=20, alignment=1, spaceAfter=20, fontName='Helvetica-Bold'
)
body_style = ParagraphStyle(
name='Body', fontSize=12, leading=14, spaceAfter=10, fontName='Helvetica'
)
flowables = []
flowables.append(Paragraph("Daily Progress Report", title_style))
for line in dpr_text.split('\n'):
line = line.replace('\u2019', "'").replace('\u2018', "'")
if line.strip():
flowables.append(Paragraph(line, body_style))
else:
flowables.append(Spacer(1, 12))
for img_path, caption in zip(image_paths, captions):
try:
img = PDFImage(img_path, width=200, height=150)
flowables.append(img)
flowables.append(Paragraph(f"Description: {caption}", body_style))
flowables.append(Spacer(1, 12))
except Exception as e:
flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style))
doc.build(flowables)
return f"PDF saved successfully as {filename}", filename
except Exception as e:
return f"Error saving PDF: {str(e)}", None
def upload_file_to_salesforce(file_path, filename, sf_connection, file_type):
try:
with open(file_path, 'rb') as f:
file_content = f.read()
file_content_b64 = base64.b64encode(file_content).decode('utf-8')
description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image"
content_version = sf_connection.ContentVersion.create({
'Title': filename,
'PathOnClient': filename,
'VersionData': file_content_b64,
'Description': description
})
content_version_id = content_version['id']
content_document = sf_connection.query(f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'")
content_document_id = content_document['records'][0]['ContentDocumentId']
content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}"
return content_document_id, content_document_url, f"File {filename} uploaded successfully"
except Exception as e:
return None, None, f"Error uploading {filename} to Salesforce: {str(e)}"
def generate_dpr(files):
dpr_text = []
captions = []
image_paths = []
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
dpr_text.append(f"Daily Progress Report\nGenerated on: {current_time}\n")
with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(lambda file: generate_labels_from_image(Image.open(file.name)), files))
for i, file in enumerate(files):
caption = results[i]
captions.append(caption)
dpr_section = f"\nImage: {file.name}\nProgress Summary:\n{caption}\n"
dpr_text.append(dpr_section)
image_paths.append(file.name)
dpr_output = "\n".join(dpr_text)
pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename)
salesforce_result = ""
if sf and pdf_filepath:
try:
report_description = "; ".join(captions)[:255]
dpr_record = sf.Daily_Progress_Reports__c.create({
'Detected_Activities__c': report_description
})
dpr_record_id = dpr_record['id']
salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n"
pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce(
pdf_filepath, pdf_filename, sf, "pdf"
)
salesforce_result += pdf_upload_result + "\n"
if pdf_content_document_id:
sf.ContentDocumentLink.create({
'ContentDocumentId': pdf_content_document_id,
'LinkedEntityId': dpr_record_id,
'ShareType': 'V'
})
if pdf_url:
sf.Daily_Progress_Reports__c.update(dpr_record_id, {
'PDF_URL__c': pdf_url
})
salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n"
for file in files:
image_filename = os.path.basename(file.name)
image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce(
file.name, image_filename, sf, "image"
)
if image_content_document_id:
sf.ContentDocumentLink.create({
'ContentDocumentId': image_content_document_id,
'LinkedEntityId': dpr_record_id,
'ShareType': 'V'
})
sf.Daily_Progress_Reports__c.update(dpr_record_id, {
'Site_Images__c': image_content_document_id
})
salesforce_result += image_upload_result + "\n"
except Exception as e:
salesforce_result += f"Error interacting with Salesforce: {str(e)}\n"
else:
salesforce_result = "Salesforce connection not available or PDF generation failed.\n"
return (
dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}",
pdf_filepath
)
# Gradio interface for uploading images
iface = gr.Interface(
fn=generate_dpr,
inputs=gr.Files(type="filepath", label="Upload Site Photos"),
outputs=[
gr.Textbox(label="Daily Progress Report"),
gr.File(label="Download PDF")
],
title="Daily Progress Report Generator",
description="Upload up to 10 site photos. The AI model dynamically detects objects using YOLOv8 and infers construction activities like concrete work, flooring, piling, finishing, and more based on object names, sizes, and positions. The DPR is saved as a PDF and uploaded to Salesforce.",
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch()