import torch from ultralytics import YOLO import numpy as np import random from PIL import Image import gradio as gr from datetime import datetime from reportlab.lib.pagesizes import letter from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as PDFImage from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from simple_salesforce import Salesforce import os from dotenv import load_dotenv import base64 import concurrent.futures # Load environment variables load_dotenv() # Salesforce credentials SF_USERNAME = os.getenv('SF_USERNAME') SF_PASSWORD = os.getenv('SF_PASSWORD') SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN') # Salesforce connection try: sf = Salesforce(username=SF_USERNAME, password=SF_PASSWORD, security_token=SF_SECURITY_TOKEN) except Exception as e: sf = None print(f"Failed to connect to Salesforce: {str(e)}") # Load YOLOv8 pretrained model model = YOLO('yolov8n.pt') def infer_activity_and_phrase(obj, box, img_height): """ Dynamically infer a construction activity and generate a descriptive phrase based on the detected object, using object name, bounding box size, and position. Args: obj (str): Detected COCO object (e.g., 'truck', 'crane', 'person'). box (list): Bounding box [x1, y1, x2, y2, conf, cls]. img_height (int): Image height for position analysis. Returns: tuple: (activity, phrase) or (None, None) if unmapped. """ # If the object is a person, label it as a worker if obj.lower() == 'person': obj = 'worker' # Change label from person to worker # Normalize object name obj_lower = obj.lower().replace('_', ' ') # Skip irrelevant objects if len(obj_lower) <= 4 or any(suffix in obj_lower for suffix in ['cat', 'dog', 'bird', 'fish']): return None, None # Define common construction materials materials = ['cement', 'concrete', 'bricks', 'iron', 'steel', 'sand', 'mortar', 'wood', 'plaster'] # If object is a material, adjust activity accordingly if any(material in obj_lower for material in materials): base_name = obj_lower.title() activity = f"{base_name} pouring" location = "work zone" # You can be more specific based on detection verb = random.choice(["observed", "detected", "noted"]) level = "ground level" # Adjust the level based on detection position phrase = f"{activity} {verb} at the {level}." return activity, phrase # Extract bounding box info x1, y1, x2, y2, conf = box[:5] box_area = (x2 - x1) * (y2 - y1) # Area of bounding box box_center_y = (y1 + y2) / 2 # Vertical center of box # Normalize area (relative to image size, assuming 640x640 for simplicity) img_area = 640 * 640 # Typical YOLO input size area_ratio = box_area / img_area # Base activity and location base_name = obj_lower.title() # Synonym lists for activity variation (added more activities) action_terms = ['Operation', 'Handling', 'Setup', 'Installation', 'Preparation', 'Transport', 'Lifting', 'Surveying', 'Piling', 'Concrete Work', 'Flooring', 'Finishing', 'Marking', 'Surveying'] location_terms = ['site area', 'work zone', 'structural zone', 'foundation level', 'elevated area', 'safety zone', 'concrete pouring zone', 'flooring zone'] # Categorize based on size and position if area_ratio > 0.1: # Large objects (e.g., vehicles, heavy equipment) if box_center_y < img_height / 3: # Top 1/3: elevated equipment activity = f"{base_name} Lifting" location = "elevated area" else: # Ground-level: transport or heavy equipment activity = f"{base_name} Transport" location = "site perimeter" elif area_ratio > 0.01: # Medium objects (e.g., tools, equipment) activity = f"{base_name} Setup" location = "work zone" else: # Small objects (e.g., safety items, materials) activity = f"{base_name} Marking" location = "safety zone" # Vary activity with synonyms to ensure variety of activities action = random.choice(action_terms) activity = f"{base_name} {action}" if random.random() > 0.5 else activity # Adjust location based on position if box_center_y < img_height / 3: location = random.choice(['elevated area', 'structural zone']) elif box_center_y > 2 * img_height / 3: location = random.choice(['foundation level', 'ground level']) else: location = random.choice(location_terms) # Generate phrase verbs = ["observed", "detected", "noted"] verb = random.choice(verbs) # Calculate the floor or level dynamically based on the y-position of the object if location == 'elevated area': level = '3rd level' elif location == 'foundation level': level = 'ground level' else: level = '2nd level' phrase = f"{activity} {verb} at the {level}." return activity, phrase def generate_labels_from_image(image): try: img = np.array(image.convert("RGB")) img_height = img.shape[0] print(f"Image shape: {img.shape}") # Debug: Verify image input results = model(img, conf=0.1) # Lowered confidence threshold detected_objects = set() raw_detections = [] for r in results: for box in r.boxes.data.cpu().numpy(): cls = int(box[5]) conf = float(box[4]) label = model.names.get(cls) raw_detections.append((label, conf, box)) detected_objects.add(label) print(f"Raw detections: {[(label, conf) for label, conf, _ in raw_detections]}") print(f"Detected objects: {detected_objects}") activity_labels = set() phrases = [] for label, conf, box in raw_detections: activity, phrase = infer_activity_and_phrase(label, box, img_height) if activity and phrase: activity_labels.add(activity) phrases.append(phrase) print(f"Inferred activities: {activity_labels}") if not phrases: if raw_detections: raw_labels = [f"{label} (conf: {conf:.2f})" for label, conf, _ in raw_detections] return f"No construction activities inferred. Raw detections: {', '.join(raw_labels)}." return "No objects detected in the image." return "\n".join(sorted(phrases)) except Exception as e: print(f"Error processing image: {str(e)}") return f"Error processing image: {str(e)}" def save_dpr_to_pdf(dpr_text, image_paths, captions, filename): try: doc = SimpleDocTemplate(filename, pagesize=letter) styles = getSampleStyleSheet() title_style = ParagraphStyle( name='Title', fontSize=16, leading=20, alignment=1, spaceAfter=20, fontName='Helvetica-Bold' ) body_style = ParagraphStyle( name='Body', fontSize=12, leading=14, spaceAfter=10, fontName='Helvetica' ) flowables = [] flowables.append(Paragraph("Daily Progress Report", title_style)) for line in dpr_text.split('\n'): line = line.replace('\u2019', "'").replace('\u2018', "'") if line.strip(): flowables.append(Paragraph(line, body_style)) else: flowables.append(Spacer(1, 12)) for img_path, caption in zip(image_paths, captions): try: img = PDFImage(img_path, width=200, height=150) flowables.append(img) flowables.append(Paragraph(f"Description: {caption}", body_style)) flowables.append(Spacer(1, 12)) except Exception as e: flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style)) doc.build(flowables) return f"PDF saved successfully as {filename}", filename except Exception as e: return f"Error saving PDF: {str(e)}", None def upload_file_to_salesforce(file_path, filename, sf_connection, file_type): try: with open(file_path, 'rb') as f: file_content = f.read() file_content_b64 = base64.b64encode(file_content).decode('utf-8') description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image" content_version = sf_connection.ContentVersion.create({ 'Title': filename, 'PathOnClient': filename, 'VersionData': file_content_b64, 'Description': description }) content_version_id = content_version['id'] content_document = sf_connection.query(f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'") content_document_id = content_document['records'][0]['ContentDocumentId'] content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}" return content_document_id, content_document_url, f"File {filename} uploaded successfully" except Exception as e: return None, None, f"Error uploading {filename} to Salesforce: {str(e)}" def generate_dpr(files): dpr_text = [] captions = [] image_paths = [] current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") dpr_text.append(f"Daily Progress Report\nGenerated on: {current_time}\n") with concurrent.futures.ThreadPoolExecutor() as executor: results = list(executor.map(lambda file: generate_labels_from_image(Image.open(file.name)), files)) for i, file in enumerate(files): caption = results[i] captions.append(caption) dpr_section = f"\nImage: {file.name}\nProgress Summary:\n{caption}\n" dpr_text.append(dpr_section) image_paths.append(file.name) dpr_output = "\n".join(dpr_text) pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf" pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename) salesforce_result = "" if sf and pdf_filepath: try: report_description = "; ".join(captions)[:255] dpr_record = sf.Daily_Progress_Reports__c.create({ 'Detected_Activities__c': report_description }) dpr_record_id = dpr_record['id'] salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n" pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce( pdf_filepath, pdf_filename, sf, "pdf" ) salesforce_result += pdf_upload_result + "\n" if pdf_content_document_id: sf.ContentDocumentLink.create({ 'ContentDocumentId': pdf_content_document_id, 'LinkedEntityId': dpr_record_id, 'ShareType': 'V' }) if pdf_url: sf.Daily_Progress_Reports__c.update(dpr_record_id, { 'PDF_URL__c': pdf_url }) salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n" for file in files: image_filename = os.path.basename(file.name) image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce( file.name, image_filename, sf, "image" ) if image_content_document_id: sf.ContentDocumentLink.create({ 'ContentDocumentId': image_content_document_id, 'LinkedEntityId': dpr_record_id, 'ShareType': 'V' }) sf.Daily_Progress_Reports__c.update(dpr_record_id, { 'Site_Images__c': image_content_document_id }) salesforce_result += image_upload_result + "\n" except Exception as e: salesforce_result += f"Error interacting with Salesforce: {str(e)}\n" else: salesforce_result = "Salesforce connection not available or PDF generation failed.\n" return ( dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}", pdf_filepath ) # Gradio interface for uploading images iface = gr.Interface( fn=generate_dpr, inputs=gr.Files(type="filepath", label="Upload Site Photos"), outputs=[ gr.Textbox(label="Daily Progress Report"), gr.File(label="Download PDF") ], title="Daily Progress Report Generator", description="Upload up to 10 site photos. The AI model dynamically detects objects using YOLOv8 and infers construction activities like concrete work, flooring, piling, finishing, and more based on object names, sizes, and positions. The DPR is saved as a PDF and uploaded to Salesforce.", allow_flagging="never" ) if __name__ == "__main__": iface.launch()