vidhi0405 commited on
Commit
e81f17b
·
1 Parent(s): e987372

only for Image to Text

Browse files
Files changed (1) hide show
  1. app.py +14 -19
app.py CHANGED
@@ -229,7 +229,7 @@ def _get_summarizer_runtime():
229
  if _summarizer_model is None or _summarizer_tokenizer is None:
230
  try:
231
  tokenizer = AutoTokenizer.from_pretrained(SUMMARIZER_MODEL_ID)
232
- model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZER_MODEL_ID)
233
  except Exception as exc:
234
  raise AppError("Failed to load summarization model.", 503) from exc
235
  model.eval()
@@ -257,11 +257,12 @@ def summarize_captions(captions: list[str]) -> str:
257
  truncation=True,
258
  return_tensors="pt",
259
  )
 
260
  with torch.no_grad():
261
  output_ids = model.generate(
262
  **inputs,
263
  max_length=150,
264
- min_length=40,
265
  length_penalty=2.0,
266
  num_beams=4,
267
  early_stopping=True,
@@ -407,25 +408,19 @@ async def generate_caption(request: Request):
407
  if not caption:
408
  raise AppError("Caption summarization produced empty text.", 500)
409
 
410
- audio_file_id = insert_record(
411
- caption_collection,
412
- {
413
- "caption": caption,
414
- "source_filenames": [item["filename"] for item in image_captions],
415
- "image_captions": image_captions,
416
- "images_count": len(image_captions),
417
- "is_summarized": len(image_captions) > 1,
418
- "created_at": datetime.now(timezone.utc),
419
- },
420
- )
421
-
422
- response_data = {
423
- "audio_file_id": audio_file_id,
424
  "caption": caption,
 
 
425
  "images_count": len(image_captions),
 
 
426
  }
427
- if len(image_captions) > 1:
428
- response_data["individual_captions"] = image_captions
429
- response_data["summarized_caption"] = caption
 
 
 
430
 
431
  return ok("Caption generated successfully.", response_data)
 
229
  if _summarizer_model is None or _summarizer_tokenizer is None:
230
  try:
231
  tokenizer = AutoTokenizer.from_pretrained(SUMMARIZER_MODEL_ID)
232
+ model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZER_MODEL_ID, torch_dtype=DTYPE).to(DEVICE)
233
  except Exception as exc:
234
  raise AppError("Failed to load summarization model.", 503) from exc
235
  model.eval()
 
257
  truncation=True,
258
  return_tensors="pt",
259
  )
260
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
261
  with torch.no_grad():
262
  output_ids = model.generate(
263
  **inputs,
264
  max_length=150,
265
+ min_length=20,
266
  length_penalty=2.0,
267
  num_beams=4,
268
  early_stopping=True,
 
408
  if not caption:
409
  raise AppError("Caption summarization produced empty text.", 500)
410
 
411
+ mongo_payload = {
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  "caption": caption,
413
+ "source_filenames": [item["filename"] for item in image_captions],
414
+ "image_captions": image_captions,
415
  "images_count": len(image_captions),
416
+ "is_summarized": len(image_captions) > 1,
417
+ "created_at": datetime.now(timezone.utc),
418
  }
419
+
420
+ audio_file_id = insert_record(caption_collection, mongo_payload)
421
+
422
+ response_data = {**mongo_payload, "audio_file_id": audio_file_id}
423
+ response_data.pop("_id", None) # Remove ObjectId as it is not JSON serializable
424
+ response_data["created_at"] = response_data["created_at"].isoformat()
425
 
426
  return ok("Caption generated successfully.", response_data)