ek-5 commited on
Commit
3e82d99
·
verified ·
1 Parent(s): eca969d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ import torch
3
+ from fastapi import FastAPI, UploadFile, File
4
+ from PIL import Image
5
+ from io import BytesIO
6
+ from ultralytics import YOLO
7
+ from huggingface_hub import hf_hub_download
8
+ from transformers import AutoProcessor, AutoModelForCausalLM
9
+
10
+ # --- 1. إعداد تطبيق FastAPI ---
11
+ app = FastAPI(title="Object Detection & Captioning API")
12
+
13
+ # --- 2. تحميل الموديلات (يتم لمرة واحدة عند بدء التشغيل) ---
14
+ # تحميل موديل YOLO
15
+ model_path = hf_hub_download(
16
+ repo_id="GradTeam/yolov26-objectDetection",
17
+ filename="best.pt"
18
+ )
19
+ yolo_model = YOLO(model_path)
20
+
21
+ # تحميل موديل الوصف (GIT)
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ processor = AutoProcessor.from_pretrained("microsoft/git-large")
24
+ git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-large").to(device)
25
+
26
+ # --- 3. الدوال المساعدة ---
27
+
28
+ def get_yolo_detections(image_path):
29
+ results = yolo_model(image_path)
30
+ objects = []
31
+ for r in results:
32
+ boxes = r.boxes.xyxy.tolist()
33
+ classes = r.boxes.cls.tolist()
34
+ for box, cls in zip(boxes, classes):
35
+ name = yolo_model.names[int(cls)]
36
+ objects.append({
37
+ "name": name,
38
+ "box": [round(coord, 2) for coord in box] # تقريب الإحداثيات
39
+ })
40
+ return objects
41
+
42
+ def get_image_caption(image_path, objects):
43
+ image = Image.open(image_path).convert("RGB")
44
+ names = [obj["name"] for obj in objects]
45
+
46
+ # بناء الـ Prompt بناءً على الأجسام المكتشفة
47
+ text_prompt = "Objects detected: " + ", ".join(names) if names else "Describe this image."
48
+
49
+ inputs = processor(images=image, text=text_prompt, return_tensors="pt").to(device)
50
+ generated_ids = git_model.generate(**inputs, max_length=50)
51
+ caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
+ return caption
53
+
54
+ # --- 4. نقطة النهاية (API Endpoint) ---
55
+
56
+ @app.post("/analyze")
57
+ async def analyze_image(file: UploadFile = File(...)):
58
+ # حفظ الملف المرفوع مؤقتاً
59
+ temp_path = "temp_image.jpg"
60
+ with open(temp_path, "wb") as buffer:
61
+ shutil.copyfileobj(file.file, buffer)
62
+
63
+ try:
64
+ # 1. تنفيذ كشف الأجسام
65
+ detected_objects = get_yolo_detections(temp_path)
66
+
67
+ # 2. تنفيذ وصف الصورة بناءً على الأجسام
68
+ description = get_image_caption(temp_path, detected_objects)
69
+
70
+ return {
71
+ "status": "success",
72
+ "detected_objects_count": len(detected_objects),
73
+ "objects": detected_objects,
74
+ "description": description
75
+ }
76
+ except Exception as e:
77
+ return {"status": "error", "message": str(e)}
78
+
79
+ # --- 5. التشغيل (اختياري محلياً) ---
80
+ if name == "__main__":
81
+ import uvicorn
82
+ uvicorn.run(app, host="0.0.0.0", port=7860)