Spaces:

anaspro
/

chatbox

Runtime error

anaspro commited on Oct 30, 2025

Commit

df14f5f

1 Parent(s): dbcf08f

🔧 إصلاحات مهمة للـ deployment

✅ إصلاحات:
- إصلاح خطأ Gradio stop_btn (من True إلى string)
- تحسين قراءة ملف Markdown مع معالجة أفضل للأخطاء
- إضافة بيانات احتياطية في حالة عدم وجود الملف
- تحديث إصدار Gradio إلى 4.44.1
- تحسين رسائل التشخيص والـ debugging

📁 الملفات المحدثة:
- app.py: إصلاح stop_btn وتحسين RAG
- simple_rag.py: تحسين قراءة Markdown
- requirements.txt: تحديث Gradio

Files changed (3) hide show

app.py +26 -5
requirements.txt +1 -1
simple_rag.py +39 -30

app.py CHANGED Viewed

@@ -45,13 +45,34 @@ try:
         # تحميل ملف شركة NBTEL
         nbtel_file = "./data/nbtel_company_profile.md"
         if os.path.exists(nbtel_file):
             documents = rag_system.load_markdown_file(nbtel_file)
-            rag_system.add_documents(documents)
-            rag_system.build_index()
-            rag_system.save_index()
-            print("✅ تم إنشاء فهرس RAG بنجاح")
         else:
             print(f"⚠️ لم يتم العثور على ملف البيانات: {nbtel_file}")
     print(f"✅ نظام RAG جاهز - {len(rag_system.documents)} مستند")
     RAG_ENABLED = True
@@ -399,7 +420,7 @@ demo = gr.ChatInterface(
     **للدعم المباشر**: 📞 6337 | 📱 واتساب: 0773 633 7777
     """,
     examples=examples,
-    stop_btn=True,
     css="""
     .gradio-container, .chatbot, .chatbot * {
         direction: rtl !important;

         # تحميل ملف شركة NBTEL
         nbtel_file = "./data/nbtel_company_profile.md"
         if os.path.exists(nbtel_file):
+            print(f"📁 وجد ملف البيانات: {nbtel_file}")
             documents = rag_system.load_markdown_file(nbtel_file)
+            if documents:
+                rag_system.add_documents(documents)
+                rag_system.build_index()
+                rag_system.save_index()
+                print("✅ تم إنشاء فهرس RAG بنجاح")
+            else:
+                print("⚠️ لم يتم استخراج أي مستندات من الملف")
         else:
             print(f"⚠️ لم يتم العثور على ملف البيانات: {nbtel_file}")
+            # إنشاء بيانات تجريبية بسيطة
+            sample_docs = [
+                {
+                    'title': 'معلومات أساسية عن NBTEL',
+                    'content': 'شركة NBTEL عراقية متخصصة في خدمات الإنترنت والاتصالات. نقدم خدمات WiFi و FTTX في محافظات نينوى وكركوك وصلاح الدين.',
+                    'source': 'fallback'
+                },
+                {
+                    'title': 'معلومات التواصل',
+                    'content': 'للدعم الفني: 6337، واتساب: 0773 633 7777، إيميل: Info@nbtel.iq',
+                    'source': 'fallback'
+                }
+            ]
+            rag_system.add_documents(sample_docs)
+            rag_system.build_index()
+            rag_system.save_index()
+            print("✅ تم إنشاء فهرس تجريبي")
     print(f"✅ نظام RAG جاهز - {len(rag_system.documents)} مستند")
     RAG_ENABLED = True
     **للدعم المباشر**: 📞 6337 | 📱 واتساب: 0773 633 7777
     """,
     examples=examples,
+    stop_btn="إيقاف",
     css="""
     .gradio-container, .chatbot, .chatbot * {
         direction: rtl !important;

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio>=4.0.0
 spaces[huggingface]>=0.28.0
 transformers>=4.35.0
 torch>=2.1.0

+gradio>=4.44.1
 spaces[huggingface]>=0.28.0
 transformers>=4.35.0
 torch>=2.1.0

simple_rag.py CHANGED Viewed

@@ -49,56 +49,65 @@ class SimpleRAG:
             with open(file_path, 'r', encoding='utf-8') as f:
                 content = f.read()
-            # تقسيم حسب العناوين
-            sections = re.split(r'\n(#{1,6})\s+', content)
-            current_section = ""
-            current_title = ""
-            for i, part in enumerate(sections):
-                if part.startswith('#'):
-                    # هذا عنوان
-                    if current_section and current_title:
                         documents.append({
                             'title': current_title,
-                            'content': current_section.strip(),
                             'source': 'nbtel_profile',
                             'section_type': 'main'
                         })
-                    current_title = ""
-                    current_section = ""
-                elif part.strip().startswith('#'):
-                    # عنوان فرعي
-                    current_title = part.strip()
                 else:
-                    # محتوى
-                    if i + 1 < len(sections) and not sections[i + 1].startswith('#'):
-                        current_title = part.strip().split('\n')[0] if part.strip() else current_title
-                        current_section = part
             # إضافة القسم الأخير
-            if current_section and current_title:
                 documents.append({
                     'title': current_title,
-                    'content': current_section.strip(),
                     'source': 'nbtel_profile',
                     'section_type': 'main'
                 })
-            # تقسيم إضافي للأقسام الطويلة
             final_docs = []
             for doc in documents:
-                if len(doc['content']) > 1000:
-                    chunks = self._split_long_text(doc['content'])
                     for i, chunk in enumerate(chunks):
-                        final_docs.append({
-                            'title': f"{doc['title']} - جزء {i+1}",
-                            'content': chunk,
-                            'source': doc['source'],
-                            'section_type': 'chunk'
-                        })
                 else:
-                    final_docs.append(doc)
             print(f"✅ تم تحميل {len(final_docs)} قسم من الملف")
             return final_docs

             with open(file_path, 'r', encoding='utf-8') as f:
                 content = f.read()
+            print(f"📄 قراءة الملف، الحجم: {len(content)} حرف")
+            # تقسيم بسيط حسب العناوين
+            sections = re.split(r'\n(#{1,3}\s+.*?)\n', content)
+            current_title = "مقدمة"
+            current_content = ""
+            for i, section in enumerate(sections):
+                section = section.strip()
+                if not section:
+                    continue
+                # إذا كان عنوان (يبدأ بـ #)
+                if section.startswith('#'):
+                    # حفظ القسم السابق
+                    if current_content.strip():
                         documents.append({
                             'title': current_title,
+                            'content': current_content.strip(),
                             'source': 'nbtel_profile',
                             'section_type': 'main'
                         })
+                    # بدء قسم جديد
+                    current_title = section.replace('#', '').strip()
+                    current_content = ""
                 else:
+                    # إضافة المحتوى للقسم الحالي
+                    current_content += section + "\n"
             # إضافة القسم الأخير
+            if current_content.strip():
                 documents.append({
                     'title': current_title,
+                    'content': current_content.strip(),
                     'source': 'nbtel_profile',
                     'section_type': 'main'
                 })
+            # تقسيم إضافي للأقسام الطويلة جداً
             final_docs = []
             for doc in documents:
+                if len(doc['content']) > 1500:
+                    chunks = self._split_long_text(doc['content'], max_length=1200)
                     for i, chunk in enumerate(chunks):
+                        if chunk.strip():  # تأكد من وجود محتوى
+                            final_docs.append({
+                                'title': f"{doc['title']} - جزء {i+1}",
+                                'content': chunk,
+                                'source': doc['source'],
+                                'section_type': 'chunk'
+                            })
                 else:
+                    if doc['content'].strip():  # تأكد من وجود محتوى
+                        final_docs.append(doc)
+            # إزالة المستندات الفارغة
+            final_docs = [doc for doc in final_docs if len(doc['content'].strip()) > 50]
             print(f"✅ تم تحميل {len(final_docs)} قسم من الملف")
             return final_docs