Mustafa-albakkar commited on
Commit
fa1a396
·
verified ·
1 Parent(s): f62f292

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -18
app.py CHANGED
@@ -1,12 +1,29 @@
1
  import gradio as gr
2
  import intel_extension_for_pytorch as ipex
3
  import torch
4
- import torch, os, tempfile, requests, cv2
 
 
 
5
  from transformers import AutoProcessor, AutoModelForVision2Seq
6
  from PIL import Image
7
  from faster_whisper import WhisperModel
8
  import torch.nn as nn
9
  import transformers.activations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # إصلاح مؤقت لمشكلة PytorchGELUTanh المحذوفة
12
  if not hasattr(transformers.activations, "PytorchGELUTanh"):
@@ -14,15 +31,19 @@ if not hasattr(transformers.activations, "PytorchGELUTanh"):
14
  def forward(self, x):
15
  return 0.5 * x * (1 + torch.tanh(torch.sqrt(torch.tensor(2.0 / torch.pi)) * (x + 0.044715 * x**3)))
16
  transformers.activations.PytorchGELUTanh = PytorchGELUTanh
 
17
  # ==============================
18
  # إعدادات الجهاز والنماذج
19
  # ==============================
20
  device = "cpu"
21
 
22
  VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
 
23
  processor = AutoProcessor.from_pretrained(VL_MODEL_ID, trust_remote_code=True)
24
  vl_model = AutoModelForVision2Seq.from_pretrained(VL_MODEL_ID, trust_remote_code=True).to(device)
 
25
  whisper = WhisperModel("base", device=device)
 
26
 
27
  # ==============================
28
  # الدالة الرئيسية لتحليل الوسائط
@@ -32,16 +53,21 @@ def analyze_media(input_data: str) -> str:
32
  يستقبل إما رابط صورة / صوت / فيديو أو مسار ملف محلي.
33
  ويُرجع وصف الصورة أو تفريغ النص من الصوت.
34
  """
 
35
  try:
36
  # --- تحديد نوع الإدخال ---
37
- url_or_path = input_data.strip()
38
  if not url_or_path:
39
- return "No input provided."
 
 
40
 
41
  # --- تحليل الصورة ---
42
  if url_or_path.endswith((".jpg", ".jpeg", ".png")):
 
43
  # تحميل الصورة من الإنترنت أو المسار المحلي
44
  if url_or_path.startswith("http"):
 
45
  response = requests.get(url_or_path, stream=True, timeout=15)
46
  response.raise_for_status()
47
  image = Image.open(response.raw).convert("RGB")
@@ -51,14 +77,18 @@ def analyze_media(input_data: str) -> str:
51
  inputs = processor(text="Describe the image in detail.", images=image, return_tensors="pt").to(device)
52
  with torch.no_grad():
53
  out = vl_model.generate(**inputs, max_new_tokens=256)
54
- result = processor.batch_decode(out, skip_special_tokens=True)[0]
55
- return result.strip()
 
56
 
57
  # --- تحليل الصوت ---
58
  elif url_or_path.endswith((".mp3", ".wav", ".m4a", ".flac")):
 
59
  # تحميل الملف مؤقتًا إذا كان من رابط
 
60
  if url_or_path.startswith("http"):
61
  temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
 
62
  data = requests.get(url_or_path, timeout=30).content
63
  with open(temp_path, "wb") as f:
64
  f.write(data)
@@ -66,15 +96,23 @@ def analyze_media(input_data: str) -> str:
66
  temp_path = url_or_path
67
 
68
  segments, _ = whisper.transcribe(temp_path)
69
- text = " ".join([seg.text for seg in segments])
70
- if os.path.exists(temp_path) and url_or_path.startswith("http"):
71
- os.remove(temp_path)
72
- return text.strip()
 
 
 
 
 
73
 
74
  # --- تحليل الفيديو (وصف الإطار الأول) ---
75
  elif url_or_path.endswith((".mp4", ".avi", ".mov", ".mkv")):
 
 
76
  if url_or_path.startswith("http"):
77
  temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
 
78
  data = requests.get(url_or_path, timeout=30).content
79
  with open(temp_video, "wb") as f:
80
  f.write(data)
@@ -85,7 +123,13 @@ def analyze_media(input_data: str) -> str:
85
  ret, frame = cap.read()
86
  cap.release()
87
  if not ret:
88
- return "Could not read video."
 
 
 
 
 
 
89
  frame_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
90
  cv2.imwrite(frame_path, frame)
91
  image = Image.open(frame_path).convert("RGB")
@@ -93,18 +137,30 @@ def analyze_media(input_data: str) -> str:
93
  inputs = processor(text="Describe the video frame.", images=image, return_tensors="pt").to(device)
94
  with torch.no_grad():
95
  out = vl_model.generate(**inputs, max_new_tokens=256)
96
- result = processor.batch_decode(out, skip_special_tokens=True)[0]
97
- os.remove(frame_path)
98
- if url_or_path.startswith("http") and os.path.exists(temp_video):
99
- os.remove(temp_video)
100
- return result.strip()
 
 
 
 
 
 
 
101
 
102
  else:
103
- return "Unsupported format. Please provide an image, audio, or video file."
 
 
104
 
105
  except Exception as e:
106
- return f"❌ Error: {str(e)}"
107
-
 
 
 
108
 
109
  # ==============================
110
  # واجهة Gradio
@@ -121,4 +177,5 @@ iface = gr.Interface(
121
  # تشغيل الواجهة فقط (بدون FastAPI)
122
  # ==============================
123
  if __name__ == "__main__":
 
124
  iface.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
 
1
  import gradio as gr
2
  import intel_extension_for_pytorch as ipex
3
  import torch
4
+ import os
5
+ import tempfile
6
+ import requests
7
+ import cv2
8
  from transformers import AutoProcessor, AutoModelForVision2Seq
9
  from PIL import Image
10
  from faster_whisper import WhisperModel
11
  import torch.nn as nn
12
  import transformers.activations
13
+ import logging
14
+ import sys
15
+ import traceback
16
+
17
+ # ==============================
18
+ # Logging configuration
19
+ # ==============================
20
+ LOG_LEVEL = os.getenv("MEDIA_AGENT_LOG_LEVEL", "INFO").upper()
21
+ logging.basicConfig(
22
+ level=LOG_LEVEL,
23
+ format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
24
+ handlers=[logging.StreamHandler(stream=sys.stdout)]
25
+ )
26
+ logger = logging.getLogger("MediaAgent")
27
 
28
  # إصلاح مؤقت لمشكلة PytorchGELUTanh المحذوفة
29
  if not hasattr(transformers.activations, "PytorchGELUTanh"):
 
31
  def forward(self, x):
32
  return 0.5 * x * (1 + torch.tanh(torch.sqrt(torch.tensor(2.0 / torch.pi)) * (x + 0.044715 * x**3)))
33
  transformers.activations.PytorchGELUTanh = PytorchGELUTanh
34
+
35
  # ==============================
36
  # إعدادات الجهاز والنماذج
37
  # ==============================
38
  device = "cpu"
39
 
40
  VL_MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
41
+ logger.info("Loading processor and VL model (%s)...", VL_MODEL_ID)
42
  processor = AutoProcessor.from_pretrained(VL_MODEL_ID, trust_remote_code=True)
43
  vl_model = AutoModelForVision2Seq.from_pretrained(VL_MODEL_ID, trust_remote_code=True).to(device)
44
+ logger.info("VL model loaded.")
45
  whisper = WhisperModel("base", device=device)
46
+ logger.info("Whisper model loaded.")
47
 
48
  # ==============================
49
  # الدالة الرئيسية لتحليل الوسائط
 
53
  يستقبل إما رابط صورة / صوت / فيديو أو مسار ملف محلي.
54
  ويُرجع وصف الصورة أو تفريغ النص من الصوت.
55
  """
56
+ logger.info("analyze_media called. input (first 300 chars): %s", (input_data or "")[:300])
57
  try:
58
  # --- تحديد نوع الإدخال ---
59
+ url_or_path = (input_data or "").strip()
60
  if not url_or_path:
61
+ result = "No input provided."
62
+ logger.info("result: %s", result)
63
+ return result
64
 
65
  # --- تحليل الصورة ---
66
  if url_or_path.endswith((".jpg", ".jpeg", ".png")):
67
+ logger.info("Detected image input: %s", url_or_path)
68
  # تحميل الصورة من الإنترنت أو المسار المحلي
69
  if url_or_path.startswith("http"):
70
+ logger.info("Downloading image from URL...")
71
  response = requests.get(url_or_path, stream=True, timeout=15)
72
  response.raise_for_status()
73
  image = Image.open(response.raw).convert("RGB")
 
77
  inputs = processor(text="Describe the image in detail.", images=image, return_tensors="pt").to(device)
78
  with torch.no_grad():
79
  out = vl_model.generate(**inputs, max_new_tokens=256)
80
+ result = processor.batch_decode(out, skip_special_tokens=True)[0].strip()
81
+ logger.info("image analysis result (first 500 chars): %s", result[:500])
82
+ return result
83
 
84
  # --- تحليل الصوت ---
85
  elif url_or_path.endswith((".mp3", ".wav", ".m4a", ".flac")):
86
+ logger.info("Detected audio input: %s", url_or_path)
87
  # تحميل الملف مؤقتًا إذا كان من رابط
88
+ temp_path = None
89
  if url_or_path.startswith("http"):
90
  temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
91
+ logger.info("Downloading audio to temporary path: %s", temp_path)
92
  data = requests.get(url_or_path, timeout=30).content
93
  with open(temp_path, "wb") as f:
94
  f.write(data)
 
96
  temp_path = url_or_path
97
 
98
  segments, _ = whisper.transcribe(temp_path)
99
+ text = " ".join([seg.text for seg in segments]).strip()
100
+ if url_or_path.startswith("http") and os.path.exists(temp_path):
101
+ try:
102
+ os.remove(temp_path)
103
+ logger.debug("Temporary audio file removed: %s", temp_path)
104
+ except Exception:
105
+ logger.warning("Failed to remove temp audio: %s", temp_path)
106
+ logger.info("audio transcription result (first 500 chars): %s", text[:500])
107
+ return text
108
 
109
  # --- تحليل الفيديو (وصف الإطار الأول) ---
110
  elif url_or_path.endswith((".mp4", ".avi", ".mov", ".mkv")):
111
+ logger.info("Detected video input: %s", url_or_path)
112
+ temp_video = None
113
  if url_or_path.startswith("http"):
114
  temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
115
+ logger.info("Downloading video to temporary path: %s", temp_video)
116
  data = requests.get(url_or_path, timeout=30).content
117
  with open(temp_video, "wb") as f:
118
  f.write(data)
 
123
  ret, frame = cap.read()
124
  cap.release()
125
  if not ret:
126
+ result = "Could not read video."
127
+ logger.error(result + " input: %s", url_or_path)
128
+ if temp_video and os.path.exists(temp_video):
129
+ try: os.remove(temp_video)
130
+ except: pass
131
+ return result
132
+
133
  frame_path = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg").name
134
  cv2.imwrite(frame_path, frame)
135
  image = Image.open(frame_path).convert("RGB")
 
137
  inputs = processor(text="Describe the video frame.", images=image, return_tensors="pt").to(device)
138
  with torch.no_grad():
139
  out = vl_model.generate(**inputs, max_new_tokens=256)
140
+ result = processor.batch_decode(out, skip_special_tokens=True)[0].strip()
141
+ logger.info("video frame analysis result (first 500 chars): %s", result[:500])
142
+ try:
143
+ os.remove(frame_path)
144
+ except Exception:
145
+ logger.debug("Could not remove frame file: %s", frame_path)
146
+ if temp_video and os.path.exists(temp_video):
147
+ try:
148
+ os.remove(temp_video)
149
+ except Exception:
150
+ logger.debug("Could not remove temp video: %s", temp_video)
151
+ return result
152
 
153
  else:
154
+ result = "Unsupported format. Please provide an image, audio, or video file."
155
+ logger.warning("Unsupported format for input: %s", url_or_path)
156
+ return result
157
 
158
  except Exception as e:
159
+ # سجل الاستثناء مع traceback كامل
160
+ logger.exception("Exception in analyze_media: %s", e)
161
+ tb = traceback.format_exc()
162
+ # أعد رسالة أكثر ودية للواجهة مع تضمين سطر الخطأ الأول (تفصيل كامل في اللوغ)
163
+ return f"❌ Error: {str(e)} (see server log for traceback)"
164
 
165
  # ==============================
166
  # واجهة Gradio
 
177
  # تشغيل الواجهة فقط (بدون FastAPI)
178
  # ==============================
179
  if __name__ == "__main__":
180
+ logger.info("Launching Gradio app on %s:%s", "0.0.0.0", os.getenv("PORT", 7860))
181
  iface.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))