Spaces:

vidhi0405
/

VideoToText

Sleeping

vidhi0405 commited on 8 days ago

Commit

a18a676

1 Parent(s): c3d87b5

commit 3

Files changed (2) hide show

app.py CHANGED Viewed

@@ -64,7 +64,6 @@ if MODEL_DEVICE not in {"auto", "cpu", "cuda", "mps"}:
 class AnalysisResponse(BaseModel):
     success: bool
     message: str
-    job_id: str
     video_description: str
     highlights: str
     analysis_file: str
@@ -263,6 +262,12 @@ async def upload_video(
         if not enriched_description:
             enriched_description = base_desc
         results["video_description"] = enriched_description
         with open(analysis_path, "w") as f:
             json.dump(results, f, indent=2)
@@ -270,7 +275,6 @@ async def upload_video(
         return AnalysisResponse(
             success=True,
             message="Video description generated successfully",
-            job_id=job_id,
             video_description=enriched_description,
             highlights=f"/tmp/outputs/{output_filename}",
             analysis_file=f"/tmp/outputs/{analysis_filename}",

 class AnalysisResponse(BaseModel):
     success: bool
     message: str
     video_description: str
     highlights: str
     analysis_file: str
         if not enriched_description:
             enriched_description = base_desc
+        logger.info(
+            "API response selected_set=%s video_description=%s",
+            selected_set or "fallback",
+            enriched_description,
+        )
         results["video_description"] = enriched_description
         with open(analysis_path, "w") as f:
             json.dump(results, f, indent=2)
         return AnalysisResponse(
             success=True,
             message="Video description generated successfully",
             video_description=enriched_description,
             highlights=f"/tmp/outputs/{output_filename}",
             analysis_file=f"/tmp/outputs/{analysis_filename}",

huggingface_exact_approach.py CHANGED Viewed

@@ -165,17 +165,19 @@ class VideoHighlightDetector:
                 "content": [{
                     "type": "text",
                     "text": (
-                        "Describe the video in 4-5 clear, complete sentences.\n"
-                        "Focus only on what is visually happening on screen.\n\n"
                         "Include:\n"
-                        "- The main subjects and their actions\n"
-                        "- The setting or environment\n"
-                        "- Any visible emotions, gestures, or interactions\n"
-                        "- Important changes or events during the clip\n\n"
-                        "Do NOT add assumptions, opinions, or unseen context.\n"
-                        "Do NOT mention the camera, audio, or that this is a video.\n"
-                        "Write in simple, factual, neutral language.\n\n"
-                        f"Use this draft as source facts only:\n{draft}"
                     )
                 }]
             }
@@ -202,7 +204,7 @@ class VideoHighlightDetector:
                 "role": "user",
                 "content": [
                     {"type": "video", "path": clip_path},
-                    {"type": "text", "text": "Write exactly one factual sentence about what is visually happening."}
                 ]
             }
         ]

                 "content": [{
                     "type": "text",
                     "text": (
+                        "You are reporting live from the scene.\n\n"
+                        "In 4–5 complete sentences, describe exactly what is happening.\n"
+                        "Capture the key visible moments as they unfold.\n\n"
                         "Include:\n"
+                        "- Who is present and what they are doing\n"
+                        "- The environment or location\n"
+                        "- Visible reactions, gestures, or interactions\n"
+                        "- Any significant action or change during the clip\n\n"
+                        "Keep the tone natural, clear, and factual.\n"
+                        "Do not speculate.\n"
+                        "Do not mention the recording device.\n\n"
+                        "Use only these observed details:\n"
+                        f"{draft}"
                     )
                 }]
             }
                 "role": "user",
                 "content": [
                     {"type": "video", "path": clip_path},
+                    {"type": "text", "text": "Write exactly 3-4 factual sentences about what is visually happening."}
                 ]
             }
         ]