Rammohan0504 commited on
Commit
1b5b162
·
verified ·
1 Parent(s): 28b77a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -116
app.py CHANGED
@@ -13,6 +13,7 @@ from dotenv import load_dotenv
13
  import base64
14
  import io
15
  import concurrent.futures
 
16
 
17
  # Load environment variables from .env file
18
  load_dotenv()
@@ -36,222 +37,182 @@ model.eval()
36
  device = "cuda" if torch.cuda.is_available() else "cpu"
37
  model.to(device)
38
 
39
- # Inference function to generate captions dynamically based on image content
40
- def generate_captions_from_image(image):
41
- if image.mode != "RGB":
42
- image = image.convert("RGB")
43
-
44
- # Resize image for faster processing (use smaller resolution to speed up inference)
45
- image = image.resize((320, 320)) # Reduced size for faster processing
46
-
47
- # Preprocess the image and generate a caption
48
- inputs = processor(image, return_tensors="pt").to(device, torch.float16)
49
- output = model.generate(**inputs, max_new_tokens=50)
50
- caption = processor.decode(output[0], skip_special_tokens=True)
51
-
52
- return caption
 
53
 
54
  # Function to save DPR text to a PDF file
55
  def save_dpr_to_pdf(dpr_text, image_paths, captions, filename):
56
  try:
57
- # Create a PDF document
58
  doc = SimpleDocTemplate(filename, pagesize=letter)
59
  styles = getSampleStyleSheet()
60
-
61
- # Define custom styles
62
  title_style = ParagraphStyle(
63
- name='Title',
64
- fontSize=16,
65
- leading=20,
66
- alignment=1, # Center
67
- spaceAfter=20,
68
- textColor=colors.black,
69
- fontName='Helvetica-Bold'
70
  )
71
  body_style = ParagraphStyle(
72
- name='Body',
73
- fontSize=12,
74
- leading=14,
75
- spaceAfter=10,
76
- textColor=colors.black,
77
- fontName='Helvetica'
78
  )
79
-
80
- # Build the PDF content
81
  flowables = []
82
-
83
- # Add title
84
  flowables.append(Paragraph("Daily Progress Report", title_style))
85
-
86
- # Split DPR text into lines and add as paragraphs (excluding descriptions for images)
87
  for line in dpr_text.split('\n'):
88
- # Replace problematic characters for PDF
89
  line = line.replace('\u2019', "'").replace('\u2018', "'")
90
  if line.strip():
91
  flowables.append(Paragraph(line, body_style))
92
  else:
93
  flowables.append(Spacer(1, 12))
94
-
95
- # Add images and captions in the correct order (no need to add description to dpr_text again)
96
  for img_path, caption in zip(image_paths, captions):
97
  try:
98
- # Add image first
99
- img = PDFImage(img_path, width=200, height=150) # Adjust image size if needed
100
  flowables.append(img)
101
- # Add description below the image
102
  description = f"Description: {caption}"
103
  flowables.append(Paragraph(description, body_style))
104
- flowables.append(Spacer(1, 12)) # Add some space between images
105
  except Exception as e:
106
  flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style))
107
-
108
- # Build the PDF
109
  doc.build(flowables)
110
  return f"PDF saved successfully as {filename}", filename
111
  except Exception as e:
112
  return f"Error saving PDF: {str(e)}", None
113
 
114
- # Function to upload a file to Salesforce as ContentVersion
115
  def upload_file_to_salesforce(file_path, filename, sf_connection, file_type):
116
  try:
117
- # Read file content and encode in base64
118
  with open(file_path, 'rb') as f:
119
  file_content = f.read()
120
  file_content_b64 = base64.b64encode(file_content).decode('utf-8')
121
-
122
- # Set description based on file type
123
  description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image"
124
-
125
- # Create ContentVersion
126
  content_version = sf_connection.ContentVersion.create({
127
  'Title': filename,
128
  'PathOnClient': filename,
129
  'VersionData': file_content_b64,
130
  'Description': description
131
  })
132
-
133
- # Get ContentDocumentId
134
  content_version_id = content_version['id']
135
  content_document = sf_connection.query(
136
  f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'"
137
  )
138
  content_document_id = content_document['records'][0]['ContentDocumentId']
139
-
140
- # Generate a valid Salesforce URL for the ContentDocument
141
  content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}"
142
-
143
-
144
- # Ensure the link is valid
145
  return content_document_id, content_document_url, f"File {filename} uploaded successfully"
146
  except Exception as e:
147
  return None, None, f"Error uploading {filename} to Salesforce: {str(e)}"
148
 
149
- # Function to generate the daily progress report (DPR), save as PDF, and upload to Salesforce
150
  def generate_dpr(files):
 
151
  dpr_text = []
152
  captions = []
153
- image_paths = []
154
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
155
-
156
- # Add header to the DPR
157
- dpr_text.append(f"Daily Progress Report\nGenerated on: {current_time}\n")
158
-
159
- # Process images in parallel for faster performance
160
- with concurrent.futures.ThreadPoolExecutor() as executor:
161
- results = list(executor.map(lambda file: generate_captions_from_image(Image.open(file.name)), files))
162
-
163
- for i, file in enumerate(files):
164
- caption = results[i]
165
- captions.append(caption)
166
-
167
- # Generate DPR section for this image with dynamic caption
168
- dpr_section = f"\nImage: {file.name}\nDescription: {caption}\n"
169
- dpr_text.append(dpr_section)
170
-
171
- # Save image path for embedding in the report
172
- image_paths.append(file.name)
173
-
174
- # Combine DPR text
175
  dpr_output = "\n".join(dpr_text)
176
-
177
- # Generate PDF filename with timestamp
178
  pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
179
-
180
- # Save DPR text to PDF
181
  pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename)
182
-
 
 
 
 
183
  salesforce_result = ""
184
- pdf_content_document_id = None
185
- pdf_url = None
186
- image_content_document_ids = []
187
-
188
  if sf and pdf_filepath:
189
  try:
190
- # Create Daily_Progress_Reports__c record
191
- report_description = "; ".join(captions)[:255] # Concatenate captions, limit to 255 chars
192
  dpr_record = sf.Daily_Progress_Reports__c.create({
193
- 'Detected_Activities__c': report_description # Store in Detected_Activities__c field
194
  })
195
  dpr_record_id = dpr_record['id']
196
  salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n"
197
-
198
- # Upload PDF to Salesforce
199
  pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce(
200
  pdf_filepath, pdf_filename, sf, "pdf"
201
  )
202
  salesforce_result += pdf_upload_result + "\n"
203
-
204
- # Link PDF to DPR record
205
  if pdf_content_document_id:
206
  sf.ContentDocumentLink.create({
207
  'ContentDocumentId': pdf_content_document_id,
208
  'LinkedEntityId': dpr_record_id,
209
  'ShareType': 'V'
210
  })
211
-
212
- # Update the DPR record with the PDF URL
213
  if pdf_url:
214
  sf.Daily_Progress_Reports__c.update(dpr_record_id, {
215
- 'PDF_URL__c': pdf_url # Storing the PDF URL correctly
216
  })
217
  salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n"
218
-
219
- # Upload images to Salesforce and link them to DPR record
220
  for file in files:
221
  image_filename = os.path.basename(file.name)
222
  image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce(
223
  file.name, image_filename, sf, "image"
224
  )
225
-
226
  if image_content_document_id:
227
- # Link image to the Daily Progress Report record (DPR) using ContentDocumentLink
228
  sf.ContentDocumentLink.create({
229
  'ContentDocumentId': image_content_document_id,
230
- 'LinkedEntityId': dpr_record_id, # Link image to DPR record
231
- 'ShareType': 'V' # 'V' means Viewer access
232
  })
233
-
234
- # Now, update the DPR record with the ContentDocumentId in the Site_Images field (if it's a text or URL field)
235
  sf.Daily_Progress_Reports__c.update(dpr_record_id, {
236
- 'Site_Images__c': image_content_document_id # Storing the ContentDocumentId directly
237
  })
238
-
239
  salesforce_result += image_upload_result + "\n"
240
-
241
  except Exception as e:
242
  salesforce_result += f"Error interacting with Salesforce: {str(e)}\n"
243
  else:
244
  salesforce_result = "Salesforce connection not available or PDF generation failed.\n"
245
-
246
- # Return DPR text, PDF file, and Salesforce upload status
 
247
  return (
248
- dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}",
249
  pdf_filepath
250
  )
251
- # Gradio interface for uploading multiple files, displaying DPR, and downloading PDF
 
252
  iface = gr.Interface(
253
  fn=generate_dpr,
254
- inputs=gr.Files(type="filepath", label="Upload Site Photos"),
255
  outputs=[
256
  gr.Textbox(label="Daily Progress Report"),
257
  gr.File(label="Download PDF")
 
13
  import base64
14
  import io
15
  import concurrent.futures
16
+ import time
17
 
18
  # Load environment variables from .env file
19
  load_dotenv()
 
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
  model.to(device)
39
 
40
+ # Caption generation function for a single image file path
41
+ def generate_captions_from_image(image_path):
42
+ try:
43
+ image = Image.open(image_path)
44
+ if image.mode != "RGB":
45
+ image = image.convert("RGB")
46
+ # Resize smaller for speed
47
+ image = image.resize((224, 224))
48
+ inputs = processor(image, return_tensors="pt").to(device, torch.float16)
49
+ with torch.no_grad():
50
+ output = model.generate(**inputs, max_new_tokens=50)
51
+ caption = processor.decode(output[0], skip_special_tokens=True)
52
+ return caption
53
+ except Exception as e:
54
+ return f"Error processing image: {str(e)}"
55
 
56
  # Function to save DPR text to a PDF file
57
  def save_dpr_to_pdf(dpr_text, image_paths, captions, filename):
58
  try:
 
59
  doc = SimpleDocTemplate(filename, pagesize=letter)
60
  styles = getSampleStyleSheet()
 
 
61
  title_style = ParagraphStyle(
62
+ name='Title', fontSize=16, leading=20, alignment=1,
63
+ spaceAfter=20, textColor=colors.black, fontName='Helvetica-Bold'
 
 
 
 
 
64
  )
65
  body_style = ParagraphStyle(
66
+ name='Body', fontSize=12, leading=14,
67
+ spaceAfter=10, textColor=colors.black, fontName='Helvetica'
 
 
 
 
68
  )
 
 
69
  flowables = []
 
 
70
  flowables.append(Paragraph("Daily Progress Report", title_style))
71
+
 
72
  for line in dpr_text.split('\n'):
 
73
  line = line.replace('\u2019', "'").replace('\u2018', "'")
74
  if line.strip():
75
  flowables.append(Paragraph(line, body_style))
76
  else:
77
  flowables.append(Spacer(1, 12))
78
+
 
79
  for img_path, caption in zip(image_paths, captions):
80
  try:
81
+ img = PDFImage(img_path, width=200, height=150)
 
82
  flowables.append(img)
 
83
  description = f"Description: {caption}"
84
  flowables.append(Paragraph(description, body_style))
85
+ flowables.append(Spacer(1, 12))
86
  except Exception as e:
87
  flowables.append(Paragraph(f"Error loading image: {str(e)}", body_style))
88
+
 
89
  doc.build(flowables)
90
  return f"PDF saved successfully as {filename}", filename
91
  except Exception as e:
92
  return f"Error saving PDF: {str(e)}", None
93
 
94
+ # Function to upload file to Salesforce
95
  def upload_file_to_salesforce(file_path, filename, sf_connection, file_type):
96
  try:
 
97
  with open(file_path, 'rb') as f:
98
  file_content = f.read()
99
  file_content_b64 = base64.b64encode(file_content).decode('utf-8')
 
 
100
  description = "Daily Progress Report PDF" if file_type == "pdf" else "Site Image"
 
 
101
  content_version = sf_connection.ContentVersion.create({
102
  'Title': filename,
103
  'PathOnClient': filename,
104
  'VersionData': file_content_b64,
105
  'Description': description
106
  })
 
 
107
  content_version_id = content_version['id']
108
  content_document = sf_connection.query(
109
  f"SELECT ContentDocumentId FROM ContentVersion WHERE Id = '{content_version_id}'"
110
  )
111
  content_document_id = content_document['records'][0]['ContentDocumentId']
 
 
112
  content_document_url = f"https://{sf_connection.sf_instance}/sfc/servlet.shepherd/version/download/{content_version_id}"
 
 
 
113
  return content_document_id, content_document_url, f"File {filename} uploaded successfully"
114
  except Exception as e:
115
  return None, None, f"Error uploading {filename} to Salesforce: {str(e)}"
116
 
117
+ # Main DPR generation function with timeout and parallel caption generation
118
  def generate_dpr(files):
119
+ start_time = time.time()
120
  dpr_text = []
121
  captions = []
122
+ image_paths = [file.name for file in files]
123
+
124
+ dpr_text.append(f"Daily Progress Report\nGenerated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
125
+
126
+ # Generate captions with timeout and ProcessPoolExecutor
127
+ try:
128
+ with concurrent.futures.ProcessPoolExecutor() as executor:
129
+ futures = {executor.submit(generate_captions_from_image, file.name): file.name for file in files}
130
+ for future in concurrent.futures.as_completed(futures, timeout=8): # 8 seconds max
131
+ try:
132
+ caption = future.result()
133
+ except Exception as e:
134
+ caption = f"Caption generation error: {str(e)}"
135
+ captions.append(caption)
136
+ dpr_text.append(f"\nImage: {futures[future]}\nDescription: {caption}\n")
137
+ except concurrent.futures.TimeoutError:
138
+ # Handle timeout by appending info for remaining images
139
+ remaining_files = [futures[f] for f in futures if not futures[f] in [c[0] for c in captions]]
140
+ for rf in remaining_files:
141
+ captions.append("Caption generation timed out")
142
+ dpr_text.append(f"\nImage: {rf}\nDescription: Caption generation timed out\n")
143
+
144
  dpr_output = "\n".join(dpr_text)
 
 
145
  pdf_filename = f"DPR_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.pdf"
146
+
 
147
  pdf_result, pdf_filepath = save_dpr_to_pdf(dpr_output, image_paths, captions, pdf_filename)
148
+
149
+ elapsed = time.time() - start_time
150
+ if elapsed > 10:
151
+ return "Processing exceeded 10 seconds timeout. Please try fewer images or smaller images.", None
152
+
153
  salesforce_result = ""
 
 
 
 
154
  if sf and pdf_filepath:
155
  try:
156
+ # Create DPR record in Salesforce
157
+ report_description = "; ".join(captions)[:255]
158
  dpr_record = sf.Daily_Progress_Reports__c.create({
159
+ 'Detected_Activities__c': report_description
160
  })
161
  dpr_record_id = dpr_record['id']
162
  salesforce_result += f"Created Daily_Progress_Reports__c record with ID: {dpr_record_id}\n"
163
+
164
+ # Upload PDF
165
  pdf_content_document_id, pdf_url, pdf_upload_result = upload_file_to_salesforce(
166
  pdf_filepath, pdf_filename, sf, "pdf"
167
  )
168
  salesforce_result += pdf_upload_result + "\n"
169
+
 
170
  if pdf_content_document_id:
171
  sf.ContentDocumentLink.create({
172
  'ContentDocumentId': pdf_content_document_id,
173
  'LinkedEntityId': dpr_record_id,
174
  'ShareType': 'V'
175
  })
176
+
 
177
  if pdf_url:
178
  sf.Daily_Progress_Reports__c.update(dpr_record_id, {
179
+ 'PDF_URL__c': pdf_url
180
  })
181
  salesforce_result += f"Updated PDF URL for record ID {dpr_record_id}\n"
182
+
183
+ # Upload and link images
184
  for file in files:
185
  image_filename = os.path.basename(file.name)
186
  image_content_document_id, image_url, image_upload_result = upload_file_to_salesforce(
187
  file.name, image_filename, sf, "image"
188
  )
 
189
  if image_content_document_id:
 
190
  sf.ContentDocumentLink.create({
191
  'ContentDocumentId': image_content_document_id,
192
+ 'LinkedEntityId': dpr_record_id,
193
+ 'ShareType': 'V'
194
  })
 
 
195
  sf.Daily_Progress_Reports__c.update(dpr_record_id, {
196
+ 'Site_Images__c': image_content_document_id
197
  })
 
198
  salesforce_result += image_upload_result + "\n"
199
+
200
  except Exception as e:
201
  salesforce_result += f"Error interacting with Salesforce: {str(e)}\n"
202
  else:
203
  salesforce_result = "Salesforce connection not available or PDF generation failed.\n"
204
+
205
+ total_elapsed = time.time() - start_time
206
+
207
  return (
208
+ dpr_output + f"\n\n{pdf_result}\n\nSalesforce Upload Status:\n{salesforce_result}\n\nTotal processing time: {total_elapsed:.2f} seconds",
209
  pdf_filepath
210
  )
211
+
212
+ # Gradio interface
213
  iface = gr.Interface(
214
  fn=generate_dpr,
215
+ inputs=gr.Files(type="file", label="Upload Site Photos"),
216
  outputs=[
217
  gr.Textbox(label="Daily Progress Report"),
218
  gr.File(label="Download PDF")