Mr-HASSAN commited on
Commit
f6e4978
·
verified ·
1 Parent(s): 46d999c

Upload 4 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ NotoNaskhArabic-VariableFont_wght[[:space:]](1).ttf filter=lfs diff=lfs merge=lfs -text
NotoNaskhArabic-VariableFont_wght (1).ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6614f7a5a98f4397f10149d6c199a5ee7996c48c609c604de30cd9049c5c00c3
3
+ size 305080
app.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import os
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import gradio as gr
7
+ from ultralytics import YOLO
8
+ from PIL import Image, ImageDraw, ImageFont
9
+ import arabic_reshaper
10
+ from bidi.algorithm import get_display
11
+
12
+ import google.generativeai as genai
13
+
14
+ # ==========================
15
+ # ⚠️ هنا تحط مفتاحك الجديد
16
+ # ==========================
17
+
18
+ GEMINI_API_KEY = "AIzaSyAvm28ZnTMaZ1Jtg9sYM-EO4qlAN2W4BIQ"
19
+
20
+ # اضبط الـ API
21
+ genai.configure(api_key=GEMINI_API_KEY)
22
+
23
+ # ==========================
24
+ # إعدادات YOLO + الثوابت
25
+ # ==========================
26
+
27
+ WEIGHTS_PATH = "best.pt"
28
+ IMG_SIZE = 256
29
+ CONF_THRESHOLD = 0.5
30
+
31
+ MIN_STABLE_FRAMES = 3
32
+ WARN_BEFORE_RESET = 1.5
33
+ RESET_DELAY = 2.5
34
+
35
+ arabic_map = {
36
+ "aleff": "ا", "bb": "ب", "ta": "ت", "thaa": "ث", "jeem": "ج",
37
+ "haa": "ح", "khaa": "خ", "dal": "د", "thal": "ذ", "ra": "ر",
38
+ "zay": "ز", "seen": "س", "sheen": "ش", "saad": "ص", "dhad": "ض",
39
+ "taa": "ط", "dha": "ظ", "ain": "ع", "ghain": "غ", "fa": "ف",
40
+ "gaaf": "ق", "kaaf": "ك", "laam": "ل", "la": "لا", "meem": "م",
41
+ "nun": "ن", "ha": "ه", "waw": "و", "ya": "ي", "yaa": "ي",
42
+ "toot": "ة", "al": "ال"
43
+ }
44
+
45
+ # ==========================
46
+ # نص توجيهات Gemini
47
+ # ==========================
48
+
49
+ SYSTEM_PROMPT = (
50
+ "أنت مساعد ذكي يستقبل كلمات أو جمل قصيرة قادمة من مترجم لغة "
51
+ "الإشارة العربية، ودورك أن تعيد صياغتها كنص عربي واضح ومفهوم، "
52
+ "أو تشرح معناها باختصار إذا كانت كلمة واحدة."
53
+ )
54
+
55
+ # ==========================
56
+ # دوال رسم العربي
57
+ # ==========================
58
+
59
+ def draw_arabic_text(img, text, x, y,
60
+ font_path="NotoNaskhArabic-VariableFont_wght.ttf",
61
+ font_size=24):
62
+
63
+ reshaped = arabic_reshaper.reshape(text)
64
+ bidi_text = get_display(reshaped)
65
+
66
+ img_pil = Image.fromarray(img)
67
+ draw = ImageDraw.Draw(img_pil)
68
+
69
+ try:
70
+ font = ImageFont.truetype(font_path, font_size)
71
+ except Exception:
72
+ font = ImageFont.load_default()
73
+
74
+ draw.text((x, y), bidi_text, font=font, fill=(0, 0, 0))
75
+ return np.array(img_pil)
76
+
77
+
78
+ def draw_detections(result, frame, names):
79
+ boxes = result.boxes
80
+ detected_labels = []
81
+
82
+ if boxes is None or len(boxes) == 0:
83
+ return frame, detected_labels
84
+
85
+ for box in boxes:
86
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
87
+ cls_id = int(box.cls[0])
88
+
89
+ if isinstance(names, dict):
90
+ eng_label = names.get(cls_id, str(cls_id))
91
+ else:
92
+ eng_label = names[cls_id] if cls_id < len(names) else str(cls_id)
93
+
94
+ ar_label = arabic_map.get(eng_label, eng_label)
95
+ detected_labels.append(ar_label)
96
+
97
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
98
+
99
+ label_bg_y1 = max(0, y1 - 35)
100
+ label_bg_y2 = y1
101
+ cv2.rectangle(frame, (x1, label_bg_y1), (x1 + 140, label_bg_y2),
102
+ (0, 255, 0), -1)
103
+
104
+ frame = draw_arabic_text(frame, ar_label, x1 + 5, label_bg_y1 + 5)
105
+
106
+ return frame, detected_labels
107
+
108
+
109
+ # ==========================
110
+ # تحميل YOLO
111
+ # ==========================
112
+
113
+ print("🔹 Loading YOLO model...")
114
+ model = YOLO(WEIGHTS_PATH)
115
+ print("📚 Classes:", model.names)
116
+
117
+
118
+ # ==========================
119
+ # Gemini API Call
120
+ # ==========================
121
+
122
+ def call_gemini_on_word(word: str) -> str:
123
+ if not word:
124
+ return ""
125
+
126
+ try:
127
+ model_g = genai.GenerativeModel("gemini-1.5-flash")
128
+
129
+ prompt = (
130
+ SYSTEM_PROMPT +
131
+ f"\n\nالنص القادم من مترجم لغة الإشارة هو: «{word}».\n"
132
+ "اكتب جملة قصيرة أو شرحًا بسيطًا بالعربية اعتمادًا على هذا النص."
133
+ )
134
+
135
+ response = model_g.generate_content(prompt)
136
+ return response.text or ""
137
+ except Exception as e:
138
+ return f"خطأ Gemini: {e}"
139
+
140
+
141
+ # ==========================
142
+ # معالجة الفريم
143
+ # ==========================
144
+
145
+ def process_frame(frame,
146
+ current_word="",
147
+ last_label=None,
148
+ stable_count=0,
149
+ last_letter_time=None,
150
+ chat_history=None):
151
+
152
+ if chat_history is None:
153
+ chat_history = []
154
+
155
+ frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
156
+ frame_bgr = cv2.flip(frame_bgr, 1)
157
+
158
+ results = model.predict(frame_bgr, conf=CONF_THRESHOLD,
159
+ imgsz=IMG_SIZE, verbose=False)[0]
160
+
161
+ annotated, labels = draw_detections(results, frame_bgr, model.names)
162
+
163
+ if labels:
164
+ current_label = labels[0]
165
+ if current_label == last_label:
166
+ stable_count += 1
167
+ else:
168
+ last_label = current_label
169
+ stable_count = 1
170
+
171
+ if stable_count >= MIN_STABLE_FRAMES:
172
+ current_word += current_label
173
+ last_letter_time = time.time()
174
+ stable_count = 0
175
+
176
+ status_text = ""
177
+
178
+ if current_word and last_letter_time is not None:
179
+ elapsed = time.time() - last_letter_time
180
+
181
+ if elapsed > RESET_DELAY:
182
+ final_text = current_word
183
+
184
+ chat_history.append(["🖐️ من الإشارات", final_text])
185
+
186
+ gpt_reply = call_gemini_on_word(final_text)
187
+
188
+ if gpt_reply:
189
+ chat_history.append(["🤖 المساعد", gpt_reply])
190
+
191
+ current_word = ""
192
+ last_label = None
193
+ stable_count = 0
194
+ last_letter_time = None
195
+ elif elapsed > WARN_BEFORE_RESET:
196
+ status_text = f"الكلمة الحالية: {current_word} (سيتم إنهاؤها قريبًا)"
197
+ else:
198
+ status_text = f"الكلمة الحالية: {current_word}"
199
+
200
+ annotated_rgb = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
201
+
202
+ return (
203
+ annotated_rgb,
204
+ status_text,
205
+ current_word,
206
+ last_label,
207
+ stable_count,
208
+ last_letter_time,
209
+ chat_history,
210
+ chat_history,
211
+ )
212
+
213
+
214
+ # ==========================
215
+ # واجهة Gradio
216
+ # ==========================
217
+
218
+ with gr.Blocks() as demo:
219
+ gr.Markdown("## ASL → Arabic Chat (YOLO + Gemini)")
220
+
221
+ with gr.Row():
222
+ cam = gr.Image(
223
+ sources=["webcam"],
224
+ streaming=True,
225
+ type="numpy",
226
+ label="الكاميرا",
227
+ )
228
+ video_out = gr.Image(label="النتيجة")
229
+
230
+ word_status = gr.Markdown()
231
+ chatbox = gr.Chatbot(label="الشات (إشارة → نص)")
232
+
233
+ state_current_word = gr.State("")
234
+ state_last_label = gr.State(None)
235
+ state_stable_count = gr.State(0)
236
+ state_last_letter_time = gr.State(None)
237
+ state_chat_history = gr.State([])
238
+
239
+ cam.stream(
240
+ fn=process_frame,
241
+ inputs=[
242
+ cam,
243
+ state_current_word,
244
+ state_last_label,
245
+ state_stable_count,
246
+ state_last_letter_time,
247
+ state_chat_history,
248
+ ],
249
+ outputs=[
250
+ video_out,
251
+ word_status,
252
+ state_current_word,
253
+ state_last_label,
254
+ state_stable_count,
255
+ state_last_letter_time,
256
+ state_chat_history,
257
+ chatbox,
258
+ ],
259
+ )
260
+
261
+
262
+ if __name__ == "__main__":
263
+ demo.launch()
best (2).pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1126bf72b6b69eb9e608ad6132a9a9411c37854e1b08f5bb6ccbe8f6f0418c0
3
+ size 52045963
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ultralytics
2
+ opencv-python-headless
3
+ Pillow
4
+ arabic-reshaper
5
+ python-bidi
6
+ gradio
7
+ google-generativeai