DPR-5gee

Sleeping

App Files Files Community

Rammohan0504 commited on May 19, 2025

Commit

1b5b162

verified ·

1 Parent(s): 28b77a1

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -116

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from dotenv import load_dotenv
 import base64
 import io
 import concurrent.futures
 # Load environment variables from .env file
 load_dotenv()
@@ -36,222 +37,182 @@ model.eval()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
-# Inference function to generate captions dynamically based on image content
-def generate_captions_from_image(image):
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    # Resize image for faster processing (use smaller resolution to speed up inference)
-    image = image.resize((320, 320))  # Reduced size for faster processing
-    # Preprocess the image and generate a caption
-    inputs = processor(image, return_tensors="pt").to(device, torch.float16)
-    output = model.generate(**inputs, max_new_tokens=50)
-    caption = processor.decode(output[0], skip_special_tokens=True)
-    return caption
 # Function to save DPR text to a PDF file
 def save_dpr_to_pdf(dpr_text, image_paths, captions, filename):
     try:
-        # Create a PDF document
         doc = SimpleDocTemplate(filename, pagesize=letter)
         styles = getSampleStyleSheet()
-        # Define custom styles
         title_style = ParagraphStyle(
-            name='Title',
-            fontSize=16,
-            leading=20,
-            alignment=1,  # Center
-            spaceAfter=20,
-            textColor=colors.black,
-            fontName='Helvetica-Bold'
         )
         body_style = ParagraphStyle(
-            name='Body',
-            fontSize=12,
-            leading=14,
-            spaceAfter=10,
-            textColor=colors.black,
-            fontName='Helvetica'
         )
-        # Build the PDF content
         flowables = []
-        # Add title
         flowables.append(Paragraph("Daily Progress Report", title_style))
-        # Split DPR text into lines and add as paragraphs (excluding descriptions for images)
         for line in dpr_text.split('\n'):
-            # Replace problematic characters for PDF
             line = line.replace('\u2019', "'").replace('\u2018', "'")
             if line.strip():
                 flowables.append(Paragraph(line, body_style))
             else:
                 flowables.append(Spacer(1, 12))
-        # Add images and captions in the correct order (no need to add description to dpr_text again)
         for img_path, caption in zip(image_paths, captions):
             try:
-                # Add image first
-                img = PDFImage(img_path, width=200, height=150)  # Adjust image size if needed
                 flowables.append(img)
-                # Add description below the image
                 description = f"Description: {caption}"
                 flowables.append(Paragraph(description, body_style))
-                flowables.append(Spacer(1, 12))  # Add some space between images
             except Exception as e:
                 flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style))
-        # Build the PDF
         doc.build(flowables)
         return f"PDF saved successfully as {filename}", filename
     except Exception as e:
         return f"Error saving PDF: {str(e)}", None
-# Function to upload a file to Salesforce as ContentVersion
 def upload_file_to_salesforce(file_path, filename, sf_connection, file_type):
     try:
-        # Read file content and encode in base64
         with open(file_path, 'rb') as f:
             file_content = f.read()
         file_content_b64 = base64.b64encode(file_content).decode('utf-8')
-        # Set description based on file type
         description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image"
-        # Create ContentVersion
         content_version = sf_connection.ContentVersion.create({
             'Title': filename,
             'PathOnClient': filename,
             'VersionData': file_content_b64,
             'Description': description
         })
-        # Get ContentDocumentId
         content_version_id = content_version['id']
         content_document = sf_connection.query(
             f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'"
         )
         content_document_id = content_document['records'][0]['ContentDocumentId']
-        # Generate a valid Salesforce URL for the ContentDocument
         content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}"
-        # Ensure the link is valid
         return content_document_id, content_document_url, f"File {filename} uploaded successfully"
     except Exception as e:
         return None, None, f"Error uploading {filename} to Salesforce: {str(e)}"
-# Function to generate the daily progress report (DPR), save as PDF, and upload to Salesforce
 def generate_dpr(files):
     dpr_text = []
     captions = []
-    image_paths = []
-    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    # Add header to the DPR
-    dpr_text.append(f"Daily Progress Report\nGenerated on: {current_time}\n")
-    # Process images in parallel for faster performance
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        results = list(executor.map(lambda file: generate_captions_from_image(Image.open(file.name)), files))
-    for i, file in enumerate(files):
-        caption = results[i]
-        captions.append(caption)
-        # Generate DPR section for this image with dynamic caption
-        dpr_section = f"\nImage: {file.name}\nDescription: {caption}\n"
-        dpr_text.append(dpr_section)
-        # Save image path for embedding in the report
-        image_paths.append(file.name)
-    # Combine DPR text
     dpr_output = "\n".join(dpr_text)
-    # Generate PDF filename with timestamp
     pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
-    # Save DPR text to PDF
     pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename)
     salesforce_result = ""
-    pdf_content_document_id = None
-    pdf_url = None
-    image_content_document_ids = []
     if sf and pdf_filepath:
         try:
-            # Create Daily_Progress_Reports__c record
-            report_description = "; ".join(captions)[:255]  # Concatenate captions, limit to 255 chars
             dpr_record = sf.Daily_Progress_Reports__c.create({
-                'Detected_Activities__c': report_description  # Store in Detected_Activities__c field
             })
             dpr_record_id = dpr_record['id']
             salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n"
-            # Upload PDF to Salesforce
             pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce(
                 pdf_filepath, pdf_filename, sf, "pdf"
             )
             salesforce_result += pdf_upload_result + "\n"
-            # Link PDF to DPR record
             if pdf_content_document_id:
                 sf.ContentDocumentLink.create({
                     'ContentDocumentId': pdf_content_document_id,
                     'LinkedEntityId': dpr_record_id,
                     'ShareType': 'V'
                 })
-            # Update the DPR record with the PDF URL
             if pdf_url:
                 sf.Daily_Progress_Reports__c.update(dpr_record_id, {
-                    'PDF_URL__c': pdf_url  # Storing the PDF URL correctly
                 })
                 salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n"
-            # Upload images to Salesforce and link them to DPR record
             for file in files:
                 image_filename = os.path.basename(file.name)
                 image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce(
                     file.name, image_filename, sf, "image"
                 )
                 if image_content_document_id:
-                    # Link image to the Daily Progress Report record (DPR) using ContentDocumentLink
                     sf.ContentDocumentLink.create({
                         'ContentDocumentId': image_content_document_id,
-                        'LinkedEntityId': dpr_record_id,  # Link image to DPR record
-                        'ShareType': 'V'  # 'V' means Viewer access
                     })
-                    # Now, update the DPR record with the ContentDocumentId in the Site_Images field (if it's a text or URL field)
                     sf.Daily_Progress_Reports__c.update(dpr_record_id, {
-                        'Site_Images__c': image_content_document_id  # Storing the ContentDocumentId directly
                     })
                     salesforce_result += image_upload_result + "\n"
         except Exception as e:
             salesforce_result += f"Error interacting with Salesforce: {str(e)}\n"
     else:
         salesforce_result = "Salesforce connection not available or PDF generation failed.\n"
-    # Return DPR text, PDF file, and Salesforce upload status
     return (
-        dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}",
         pdf_filepath
     )
-# Gradio interface for uploading multiple files, displaying DPR, and downloading PDF
 iface = gr.Interface(
     fn=generate_dpr,
-    inputs=gr.Files(type="filepath", label="Upload Site Photos"),
     outputs=[
         gr.Textbox(label="Daily Progress Report"),
         gr.File(label="Download PDF")

 import base64
 import io
 import concurrent.futures
+import time
 # Load environment variables from .env file
 load_dotenv()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# Caption generation function for a single image file path
+def generate_captions_from_image(image_path):
+    try:
+        image = Image.open(image_path)
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Resize smaller for speed
+        image = image.resize((224, 224))
+        inputs = processor(image, return_tensors="pt").to(device, torch.float16)
+        with torch.no_grad():
+            output = model.generate(**inputs, max_new_tokens=50)
+        caption = processor.decode(output[0], skip_special_tokens=True)
+        return caption
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # Function to save DPR text to a PDF file
 def save_dpr_to_pdf(dpr_text, image_paths, captions, filename):
     try:
         doc = SimpleDocTemplate(filename, pagesize=letter)
         styles = getSampleStyleSheet()
         title_style = ParagraphStyle(
+            name='Title', fontSize=16, leading=20, alignment=1,
+            spaceAfter=20, textColor=colors.black, fontName='Helvetica-Bold'
         )
         body_style = ParagraphStyle(
+            name='Body', fontSize=12, leading=14,
+            spaceAfter=10, textColor=colors.black, fontName='Helvetica'
         )
         flowables = []
         flowables.append(Paragraph("Daily Progress Report", title_style))
         for line in dpr_text.split('\n'):
             line = line.replace('\u2019', "'").replace('\u2018', "'")
             if line.strip():
                 flowables.append(Paragraph(line, body_style))
             else:
                 flowables.append(Spacer(1, 12))
         for img_path, caption in zip(image_paths, captions):
             try:
+                img = PDFImage(img_path, width=200, height=150)
                 flowables.append(img)
                 description = f"Description: {caption}"
                 flowables.append(Paragraph(description, body_style))
+                flowables.append(Spacer(1, 12))
             except Exception as e:
                 flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style))
         doc.build(flowables)
         return f"PDF saved successfully as {filename}", filename
     except Exception as e:
         return f"Error saving PDF: {str(e)}", None
+# Function to upload file to Salesforce
 def upload_file_to_salesforce(file_path, filename, sf_connection, file_type):
     try:
         with open(file_path, 'rb') as f:
             file_content = f.read()
         file_content_b64 = base64.b64encode(file_content).decode('utf-8')
         description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image"
         content_version = sf_connection.ContentVersion.create({
             'Title': filename,
             'PathOnClient': filename,
             'VersionData': file_content_b64,
             'Description': description
         })
         content_version_id = content_version['id']
         content_document = sf_connection.query(
             f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'"
         )
         content_document_id = content_document['records'][0]['ContentDocumentId']
         content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}"
         return content_document_id, content_document_url, f"File {filename} uploaded successfully"
     except Exception as e:
         return None, None, f"Error uploading {filename} to Salesforce: {str(e)}"
+# Main DPR generation function with timeout and parallel caption generation
 def generate_dpr(files):
+    start_time = time.time()
     dpr_text = []
     captions = []
+    image_paths = [file.name for file in files]
+    dpr_text.append(f"Daily Progress Report\nGenerated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+    # Generate captions with timeout and ProcessPoolExecutor
+    try:
+        with concurrent.futures.ProcessPoolExecutor() as executor:
+            futures = {executor.submit(generate_captions_from_image, file.name): file.name for file in files}
+            for future in concurrent.futures.as_completed(futures, timeout=8):  # 8 seconds max
+                try:
+                    caption = future.result()
+                except Exception as e:
+                    caption = f"Caption generation error: {str(e)}"
+                captions.append(caption)
+                dpr_text.append(f"\nImage: {futures[future]}\nDescription: {caption}\n")
+    except concurrent.futures.TimeoutError:
+        # Handle timeout by appending info for remaining images
+        remaining_files = [futures[f] for f in futures if not futures[f] in [c[0] for c in captions]]
+        for rf in remaining_files:
+            captions.append("Caption generation timed out")
+            dpr_text.append(f"\nImage: {rf}\nDescription: Caption generation timed out\n")
     dpr_output = "\n".join(dpr_text)
     pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
     pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename)
+    elapsed = time.time() - start_time
+    if elapsed > 10:
+        return "Processing exceeded 10 seconds timeout. Please try fewer images or smaller images.", None
     salesforce_result = ""
     if sf and pdf_filepath:
         try:
+            # Create DPR record in Salesforce
+            report_description = "; ".join(captions)[:255]
             dpr_record = sf.Daily_Progress_Reports__c.create({
+                'Detected_Activities__c': report_description
             })
             dpr_record_id = dpr_record['id']
             salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n"
+            # Upload PDF
             pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce(
                 pdf_filepath, pdf_filename, sf, "pdf"
             )
             salesforce_result += pdf_upload_result + "\n"
             if pdf_content_document_id:
                 sf.ContentDocumentLink.create({
                     'ContentDocumentId': pdf_content_document_id,
                     'LinkedEntityId': dpr_record_id,
                     'ShareType': 'V'
                 })
             if pdf_url:
                 sf.Daily_Progress_Reports__c.update(dpr_record_id, {
+                    'PDF_URL__c': pdf_url
                 })
                 salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n"
+            # Upload and link images
             for file in files:
                 image_filename = os.path.basename(file.name)
                 image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce(
                     file.name, image_filename, sf, "image"
                 )
                 if image_content_document_id:
                     sf.ContentDocumentLink.create({
                         'ContentDocumentId': image_content_document_id,
+                        'LinkedEntityId': dpr_record_id,
+                        'ShareType': 'V'
                     })
                     sf.Daily_Progress_Reports__c.update(dpr_record_id, {
+                        'Site_Images__c': image_content_document_id
                     })
                     salesforce_result += image_upload_result + "\n"
         except Exception as e:
             salesforce_result += f"Error interacting with Salesforce: {str(e)}\n"
     else:
         salesforce_result = "Salesforce connection not available or PDF generation failed.\n"
+    total_elapsed = time.time() - start_time
     return (
+        dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}\n\nTotal processing time: {total_elapsed:.2f} seconds",
         pdf_filepath
     )
+# Gradio interface
 iface = gr.Interface(
     fn=generate_dpr,
+    inputs=gr.Files(type="file", label="Upload Site Photos"),
     outputs=[
         gr.Textbox(label="Daily Progress Report"),
         gr.File(label="Download PDF")