Spaces:

M17idd
/

army

Sleeping

App Files Files Community

M17idd commited on May 4, 2025

Commit

e4ed771

verified ·

1 Parent(s): a44f0ea

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -49

app.py CHANGED Viewed

@@ -502,11 +502,22 @@ import concurrent.futures
 from hazm import Normalizer
 from rapidfuzz import fuzz
 from langchain.schema import SystemMessage, HumanMessage
 folder_path = '46'
 normalizer = Normalizer()
-@st.cache_data(show_spinner="در حال پردازش اسناد... لطفاً صبور باشید.")
 def load_and_process_documents(path):
     def process_docx(filename):
         try:
@@ -516,12 +527,10 @@ def load_and_process_documents(path):
             normalized = normalizer.normalize(text)
             return filename, normalized
         except Exception as e:
-            print(f"Error processing {filename}: {e}")
             return filename, ""
     filenames = [f for f in os.listdir(path) if f.endswith(".docx")]
     doc_texts = {}
     with concurrent.futures.ThreadPoolExecutor() as executor:
         for filename, content in executor.map(process_docx, filenames):
             doc_texts[filename] = content
@@ -530,57 +539,43 @@ def load_and_process_documents(path):
 doc_texts = load_and_process_documents(folder_path)
-# لیست کلمات توقف
 stop_words = [
     "است", "و", "با", "که", "در", "از", "برای", "به", "بر", "تا", "این", "آن", "یک", "کدام", "کجا", "هم", "همه",
-    "یا", "از", "بر", "همچنین", "می", "باید", "شود", "شد", "گفت", "گویا", "داشت", "داشتن", "کنند", "کنیم",
-    "کرد", "کردن", "نیز", "یا", "اگر", "ای", "اینکه", "نه", "باشید", "باشم", "باشی", "در حالی که", "مگر", "چرا"
 ]
-# تابعی برای پاکسازی کلمات اضافی از سوال
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
-# تابعی برای استخراج کلمات از متن
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
-    # جستجو برای هر کلمه در هر خط
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
-# تابعی برای پاکسازی متن
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
-# تابعی برای پیدا کردن نزدیک‌ترین خطوط به سوال
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
-    # حذف کلمات اضافی از سوال
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
-    # بررسی محتوای فایل‌ها
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
-            similarity = fuzz.partial_ratio(query, line)  # محاسبه شباهت خط با سوال
             all_matched_lines.append((line, similarity))
-    # مرتب سازی بر اساس شباهت
-    all_matched_lines.sort(key=lambda x: x[1], reverse=True)
-    # انتخاب ۱۰ خط نزدیک‌تر
-    closest_lines = [line for line, _ in all_matched_lines[:top_n]]
-    return closest_lines
-# تابعی برای حذف کلمات توقف از یک لیست از خطوط
 def remove_stop_words_from_lines(lines, stop_words):
     cleaned_lines = []
     for line in lines:
@@ -589,25 +584,23 @@ def remove_stop_words_from_lines(lines, stop_words):
         cleaned_lines.append(" ".join(cleaned_words))
     return cleaned_lines
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
 query = st.chat_input("چطور می‌تونم کمک کنم؟")
 if query:
-        st.markdown(f'<div class="chat-message">{query}</div>', unsafe_allow_html=True)
-    think = st.markdown("""
-    <div class="thinking-message">
-        <p>در حال فکر کردن...</p>
-        <div class="spinner"></div>
-    </div>
     """, unsafe_allow_html=True)
-    # پیدا کردن ۱۰ خط نزدیک‌تر به سوال
-    closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=3)
-    # حذف کلمات توقف از خطوط نزدیک
     cleaned_closest_lines = remove_stop_words_from_lines(closest_lines, stop_words)
     if cleaned_closest_lines:
@@ -624,18 +617,18 @@ if query:
             SystemMessage(content="You are a helpful assistant."),
             HumanMessage(content=prompt)
         ])
-        rewritten = clean_text(response.content.strip())
-        # ذخیره پیام‌های چت
-        st.session_state.chat_history.append(("🧑", query))
-        st.session_state.chat_history.append(("🤖", rewritten))
-        think.empty()
-    else:
-        st.warning("هیچ خط مرتبطی پیدا نشد.")
-        think.empty()
 # نمایش تاریخچه گفتگو
-for sender, msg in st.session_state.chat_history:
-    st.markdown(f"{sender} {msg}")

 from hazm import Normalizer
 from rapidfuzz import fuzz
 from langchain.schema import SystemMessage, HumanMessage
+from langchain.chat_models import ChatOpenAI
+# تنظیمات
 folder_path = '46'
 normalizer = Normalizer()
+llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
+st.set_page_config(page_title="پرسش از اسناد", layout="wide")
+st.title("📄 دستیار هوشمند پرسش از اسناد")
+# حافظه گفتگو
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# بارگذاری اسناد
+@st.cache_data(show_spinner="در حال بارگذاری اسناد...")
 def load_and_process_documents(path):
     def process_docx(filename):
         try:
             normalized = normalizer.normalize(text)
             return filename, normalized
         except Exception as e:
             return filename, ""
     filenames = [f for f in os.listdir(path) if f.endswith(".docx")]
     doc_texts = {}
     with concurrent.futures.ThreadPoolExecutor() as executor:
         for filename, content in executor.map(process_docx, filenames):
             doc_texts[filename] = content
 doc_texts = load_and_process_documents(folder_path)
+# کلمات توقف
 stop_words = [
     "است", "و", "با", "که", "در", "از", "برای", "به", "بر", "تا", "این", "آن", "یک", "کدام", "کجا", "هم", "همه",
+    "یا", "همچنین", "می", "باید", "شود", "شد", "گفت", "گویا", "داشت", "داشتن", "کنند", "کنیم",
+    "کرد", "کردن", "نیز", "اگر", "ای", "اینکه", "نه", "باشید", "باشم", "باشی", "در حالی که", "مگر", "چرا"
 ]
+# توابع کمکی
 def remove_stop_words(text, stop_words):
     words = text.split()
     return " ".join([word for word in words if word not in stop_words])
 def extract_keywords_from_text(text, query_words):
     matched_lines = []
     lines = text.split("\n")
     for line in lines:
         if any(query_word in line for query_word in query_words):
             matched_lines.append(line)
     return matched_lines
 def clean_text(text):
     return re.sub(r'[^آ-ی۰-۹0-9،.؟!؛+\-* ]+', '', text)
 def find_closest_lines(query, doc_texts, stop_words, top_n=10):
     cleaned_query = remove_stop_words(query, stop_words)
     query_words = cleaned_query.split()
     all_matched_lines = []
     for filename, text in doc_texts.items():
         matched_lines = extract_keywords_from_text(text, query_words)
         for line in matched_lines:
+            similarity = fuzz.partial_ratio(query, line)
             all_matched_lines.append((line, similarity))
+    all_matched_lines.sort(key=lambda x: x[1], reverse=True)
+    return [line for line, _ in all_matched_lines[:top_n]]
 def remove_stop_words_from_lines(lines, stop_words):
     cleaned_lines = []
     for line in lines:
         cleaned_lines.append(" ".join(cleaned_words))
     return cleaned_lines
+# ورودی کاربر با chat_input
 query = st.chat_input("چطور می‌تونم کمک کنم؟")
 if query:
+    # نمایش پیام کاربر
+    st.markdown(f'<div style="background-color:#eef;padding:10px;border-radius:10px;margin:10px 0;"><strong>🧑:</strong> {query}</div>', unsafe_allow_html=True)
+    # نشان دادن وضعیت در حال فکر کردن
+    thinking = st.empty()
+    thinking.markdown("""
+        <div style="background-color:#f9f9f9;padding:10px;border-radius:10px;">
+        ⏳ در حال فکر کردن...
+        </div>
     """, unsafe_allow_html=True)
+    # جستجو در متن اسناد
+    closest_lines = find_closest_lines(query, doc_texts, stop_words, top_n=3)
     cleaned_closest_lines = remove_stop_words_from_lines(closest_lines, stop_words)
     if cleaned_closest_lines:
             SystemMessage(content="You are a helpful assistant."),
             HumanMessage(content=prompt)
         ])
+        final_answer = clean_text(response.content.strip())
+    else:
+        final_answer = "❗ هیچ خط مرتبطی با سؤال پیدا نشد."
+    # پاک کردن وضعیت در حال فکر کردن
+    thinking.empty()
+    # ذخیره و نمایش پاسخ
+    st.session_state.chat_history.append(("🧑", query))
+    st.session_state.chat_history.append(("🤖", final_answer))
 # نمایش تاریخچه گفتگو
+st.markdown("---")
+for sender, message in st.session_state.chat_history:
+    st.markdown(f'<div style="background-color:#f0f0f0;padding:10px;border-radius:10px;margin-bottom:5px;"><strong>{sender}</strong>: {message}</div>', unsafe_allow_html=True)