Spaces:

KIMOSSINO
/

hashtags

Sleeping

KIMOSSINO commited on Dec 10, 2024

Commit

d2a4de1

verified ·

1 Parent(s): 130fa7f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -31,26 +31,38 @@ def extract_titles_and_hashtags(file):
     # معالجة كل حاوية
     for container in desc_containers:
-        title = container.get('aria-label', 'بدون عنوان').strip()
-        hashtags = []
         # استخراج الهاشتاغات
         for tag in container.find_all('a'):
             tag_text = tag.get_text(strip=True)
             if tag_text.startswith('#'):
                 hashtags.append(tag_text)
         if hashtags:
             hashtags_counter.update(hashtags)
-            data.append({
-                "Title": title,
-                "Hashtags": ", ".join(hashtags)
-            })
     # إعداد النصوص النهائية
     titles_text = "\n".join(
         f"{i+1}. {row['Title']}"
         for i, row in enumerate(data)
     )
     hashtags_text = "\n".join(

     # معالجة كل حاوية
     for container in desc_containers:
+        # البحث عن العنوان في عدة أماكن محتملة
+        title = (
+            container.find('h2', class_='title')  # البحث عن عنصر h2 مع class='title'
+            or container.find('h1', class_='title')  # أو h1
+            or container.find('div', class_='title')  # أو div
+            or container.find(class_='title')  # أو أي عنصر يحتوي على class='title'
+        )
+        if title:
+            title = title.get_text(strip=True)
+        else:
+            title = container.get('aria-label', 'بدون عنوان').strip()
         # استخراج الهاشتاغات
+        hashtags = []
         for tag in container.find_all('a'):
             tag_text = tag.get_text(strip=True)
             if tag_text.startswith('#'):
                 hashtags.append(tag_text)
+        data.append({
+            "Title": title,
+            "Hashtags": ", ".join(hashtags)
+        })
         if hashtags:
             hashtags_counter.update(hashtags)
     # إعداد النصوص النهائية
     titles_text = "\n".join(
         f"{i+1}. {row['Title']}"
         for i, row in enumerate(data)
+        if row['Title'] and row['Title'] != 'بدون عنوان'  # تجاهل العناوين الفارغة
     )
     hashtags_text = "\n".join(