| import gradio as gr |
| from collections import Counter |
| from bs4 import BeautifulSoup |
|
|
|
|
| def extract_titles_and_hashtags(file): |
| try: |
| |
| if hasattr(file, 'read'): |
| content = file.read() |
| else: |
| with open(file.name, 'r', encoding='utf-8') as f: |
| content = f.read() |
| except Exception as e: |
| return f"خطأ أثناء قراءة الملف: {str(e)}", "", "" |
|
|
| |
| try: |
| soup = BeautifulSoup(content, 'html.parser') |
| except Exception as e: |
| return f"خطأ في تحليل محتوى HTML: {str(e)}", "", "" |
|
|
| |
| data = [] |
| hashtags_counter = Counter() |
|
|
| |
| desc_containers = soup.find_all('div', class_="css-vi46v1-DivDesContainer") |
| if not desc_containers: |
| return "لم يتم العثور على أي بيانات مطابقة.", "", "" |
|
|
| |
| for container in desc_containers: |
| |
| title = ( |
| container.find('h2', class_='title') |
| or container.find('h1', class_='title') |
| or container.find('div', class_='title') |
| or container.find(class_='title') |
| ) |
| |
| if title: |
| title = title.get_text(strip=True) |
| else: |
| title = container.get('aria-label', 'بدون عنوان').strip() |
| |
| |
| hashtags = [] |
| for tag in container.find_all('a'): |
| tag_text = tag.get_text(strip=True) |
| if tag_text.startswith('#'): |
| hashtags.append(tag_text) |
| |
| data.append({ |
| "Title": title, |
| "Hashtags": ", ".join(hashtags) |
| }) |
| if hashtags: |
| hashtags_counter.update(hashtags) |
|
|
| |
| titles_text = "\n".join( |
| f"{i+1}. {row['Title']}" |
| for i, row in enumerate(data) |
| if row['Title'] and row['Title'] != 'بدون عنوان' |
| ) |
|
|
| hashtags_text = "\n".join( |
| f"{hashtag}: {count}" |
| for hashtag, count in sorted(hashtags_counter.items(), key=lambda x: (-x[1], x[0])) |
| ) |
|
|
| unique_hashtags_text = "\n".join(sorted(hashtags_counter.keys())) |
|
|
| |
| return ( |
| titles_text or "لا توجد عناوين مستخرجة.", |
| hashtags_text or "لا توجد هاشتاغات مستخرجة.", |
| unique_hashtags_text or "لا توجد هاشتاغات فريدة." |
| ) |
| |
| def gradio_interface(): |
| with gr.Blocks() as demo: |
| gr.Markdown("## 📝 محلل النصوص المتقدم") |
|
|
| with gr.Row(): |
| file_input = gr.File(label="📂 رفع ملف TXT", file_types=[".txt"]) |
|
|
| with gr.Row(): |
| analyze_btn = gr.Button("تحليل البيانات", variant="primary") |
|
|
| with gr.Row(): |
| titles_output = gr.Textbox( |
| label="📜 العناوين المستخرجة", |
| lines=10, |
| interactive=False, |
| placeholder="ستظهر العناوين هنا" |
| ) |
| hashtags_output = gr.Textbox( |
| label="🏷️ الهاشتاغات المستخرجة (مع التكرار)", |
| lines=10, |
| interactive=False, |
| placeholder="ستظهر الهاشتاغات هنا" |
| ) |
| unique_hashtags_output = gr.Textbox( |
| label="🏷️ الهاشتاغات الفريدة (غير المكررة)", |
| lines=10, |
| interactive=False, |
| placeholder="ستظهر الهاشتاغات الفريدة هنا" |
| ) |
|
|
| analyze_btn.click( |
| fn=extract_titles_and_hashtags, |
| inputs=[file_input], |
| outputs=[titles_output, hashtags_output, unique_hashtags_output], |
| ) |
|
|
| return demo |
|
|
|
|
| |
| if __name__ == "__main__": |
| demo = gradio_interface() |
| demo.launch() |
|
|