Spaces:

vidhi0405
/

ImageToText

Sleeping

App Files Files Community

vidhi0405 commited on 10 days ago

Commit

e81f17b

1 Parent(s): e987372

only for Image to Text

Browse files

Files changed (1) hide show

app.py +14 -19

app.py CHANGED Viewed

@@ -229,7 +229,7 @@ def _get_summarizer_runtime():
         if _summarizer_model is None or _summarizer_tokenizer is None:
             try:
                 tokenizer = AutoTokenizer.from_pretrained(SUMMARIZER_MODEL_ID)
-                model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZER_MODEL_ID)
             except Exception as exc:
                 raise AppError("Failed to load summarization model.", 503) from exc
             model.eval()
@@ -257,11 +257,12 @@ def summarize_captions(captions: list[str]) -> str:
             truncation=True,
             return_tensors="pt",
         )
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_length=150,
-                min_length=40,
                 length_penalty=2.0,
                 num_beams=4,
                 early_stopping=True,
@@ -407,25 +408,19 @@ async def generate_caption(request: Request):
     if not caption:
         raise AppError("Caption summarization produced empty text.", 500)
-    audio_file_id = insert_record(
-        caption_collection,
-        {
-            "caption": caption,
-            "source_filenames": [item["filename"] for item in image_captions],
-            "image_captions": image_captions,
-            "images_count": len(image_captions),
-            "is_summarized": len(image_captions) > 1,
-            "created_at": datetime.now(timezone.utc),
-        },
-    )
-    response_data = {
-        "audio_file_id": audio_file_id,
         "caption": caption,
         "images_count": len(image_captions),
     }
-    if len(image_captions) > 1:
-        response_data["individual_captions"] = image_captions
-        response_data["summarized_caption"] = caption
     return ok("Caption generated successfully.", response_data)

         if _summarizer_model is None or _summarizer_tokenizer is None:
             try:
                 tokenizer = AutoTokenizer.from_pretrained(SUMMARIZER_MODEL_ID)
+                model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZER_MODEL_ID, torch_dtype=DTYPE).to(DEVICE)
             except Exception as exc:
                 raise AppError("Failed to load summarization model.", 503) from exc
             model.eval()
             truncation=True,
             return_tensors="pt",
         )
+        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_length=150,
+                min_length=20,
                 length_penalty=2.0,
                 num_beams=4,
                 early_stopping=True,
     if not caption:
         raise AppError("Caption summarization produced empty text.", 500)
+    mongo_payload = {
         "caption": caption,
+        "source_filenames": [item["filename"] for item in image_captions],
+        "image_captions": image_captions,
         "images_count": len(image_captions),
+        "is_summarized": len(image_captions) > 1,
+        "created_at": datetime.now(timezone.utc),
     }
+    audio_file_id = insert_record(caption_collection, mongo_payload)
+    response_data = {**mongo_payload, "audio_file_id": audio_file_id}
+    response_data.pop("_id", None)  # Remove ObjectId as it is not JSON serializable
+    response_data["created_at"] = response_data["created_at"].isoformat()
     return ok("Caption generated successfully.", response_data)