Spaces:

Rammohan0504
/

DPR

Sleeping

Rammohan0504 commited on May 7, 2025

Commit

2e55271

verified ·

1 Parent(s): 9d2cac1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,15 +10,32 @@ model.eval()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 # Inference function
 def generate_caption(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
     inputs = processor(image, return_tensors="pt").to(device, torch.float16)
     output = model.generate(**inputs, max_new_tokens=50)
     caption = processor.decode(output[0], skip_special_tokens=True)
-    return caption
 # Gradio interface
 iface = gr.Interface(
@@ -26,7 +43,7 @@ iface = gr.Interface(
     inputs=gr.Image(type="pil"),
     outputs="text",
     title="Construction Site Image-to-Text Generator",
-    description="Upload a site photo. The model will detect and describe construction activities."
 )
 iface.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
+# List of construction-related terms
+construction_terms = [
+    "concrete", "scaffolding", "steel rods", "piling", "excavation",
+    "mixer", "cement", "brickwork", "crane", "rebar", "construction",
+    "foundation", "building", "formwork", "drywall", "steel beams",
+    "hammer", "saw", "nails", "jackhammer"
+]
 # Inference function
 def generate_caption(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
+    # Preprocess the image and generate a caption
     inputs = processor(image, return_tensors="pt").to(device, torch.float16)
     output = model.generate(**inputs, max_new_tokens=50)
     caption = processor.decode(output[0], skip_special_tokens=True)
+    # Filter the caption to only include construction-related terms
+    filtered_caption = " ".join([word for word in caption.split() if word.lower() in construction_terms])
+    # If no construction-related terms are found, return a default message
+    if not filtered_caption:
+        filtered_caption = "No construction-related activities detected."
+    return filtered_caption
 # Gradio interface
 iface = gr.Interface(
     inputs=gr.Image(type="pil"),
     outputs="text",
     title="Construction Site Image-to-Text Generator",
+    description="Upload a site photo. The model will detect and describe construction activities and materials (e.g., concrete pouring, scaffolding, steel rods)."
 )
 iface.launch()