Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,6 +49,7 @@ def track_sources(question, answer, sources):
|
|
| 49 |
"sources": [
|
| 50 |
{
|
| 51 |
"file": doc.metadata['source'],
|
|
|
|
| 52 |
"content": doc.page_content
|
| 53 |
}
|
| 54 |
for doc in sources
|
|
@@ -71,6 +72,7 @@ def track_sources(question, answer, sources):
|
|
| 71 |
for i, doc in enumerate(sources, 1):
|
| 72 |
f.write(f"\nالمصدر #{i}:\n")
|
| 73 |
f.write(f"- الملف: {os.path.basename(doc.metadata['source'])}\n")
|
|
|
|
| 74 |
f.write(f"- المحتوى الكامل:\n{doc.page_content}\n")
|
| 75 |
f.write("-"*80 + "\n")
|
| 76 |
|
|
@@ -102,7 +104,7 @@ def setup_chains(vectorstore: FAISS):
|
|
| 102 |
llm = ChatOpenAI(
|
| 103 |
model="meta-llama/llama-3-70b-instruct",
|
| 104 |
base_url="https://openrouter.ai/api/v1",
|
| 105 |
-
api_key="sk-or-v1-
|
| 106 |
temperature=0.4
|
| 107 |
)
|
| 108 |
|
|
@@ -173,7 +175,7 @@ def setup_chains(vectorstore: FAISS):
|
|
| 173 |
llm=llm,
|
| 174 |
retriever=vectorstore.as_retriever(
|
| 175 |
search_type="mmr",
|
| 176 |
-
search_kwargs={'k': 20, 'fetch_k': 100, 'lambda_mult': 0.
|
| 177 |
),
|
| 178 |
return_source_documents=True,
|
| 179 |
chain_type_kwargs={"prompt": qa_prompt}
|
|
@@ -181,6 +183,47 @@ def setup_chains(vectorstore: FAISS):
|
|
| 181 |
|
| 182 |
return rephrase_chain, qa_chain
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
def create_gradio_interface(rephrase_chain, qa_chain):
|
| 185 |
"""إنشاء واجهة Gradio مع عرض المصادر"""
|
| 186 |
with gr.Blocks(title="المساعد الذكي للتأمين الصحي") as demo:
|
|
@@ -223,6 +266,7 @@ def create_gradio_interface(rephrase_chain, qa_chain):
|
|
| 223 |
write_to_log(
|
| 224 |
f"المصدر #{i}:",
|
| 225 |
f"الملف: {os.path.basename(doc.metadata['source'])}",
|
|
|
|
| 226 |
f"المحتوى: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"المحتوى: {doc.page_content}",
|
| 227 |
"-"*50
|
| 228 |
)
|
|
@@ -283,4 +327,4 @@ if __name__ == "__main__":
|
|
| 283 |
vectorstore = load_embeddings()
|
| 284 |
rephrase_chain, qa_chain = setup_chains(vectorstore)
|
| 285 |
demo = create_gradio_interface(rephrase_chain, qa_chain)
|
| 286 |
-
demo.launch(
|
|
|
|
| 49 |
"sources": [
|
| 50 |
{
|
| 51 |
"file": doc.metadata['source'],
|
| 52 |
+
"section": doc.metadata.get('section', 'غير محدد'),
|
| 53 |
"content": doc.page_content
|
| 54 |
}
|
| 55 |
for doc in sources
|
|
|
|
| 72 |
for i, doc in enumerate(sources, 1):
|
| 73 |
f.write(f"\nالمصدر #{i}:\n")
|
| 74 |
f.write(f"- الملف: {os.path.basename(doc.metadata['source'])}\n")
|
| 75 |
+
f.write(f"- العنوان: {doc.metadata.get('section', 'غير محدد')}\n")
|
| 76 |
f.write(f"- المحتوى الكامل:\n{doc.page_content}\n")
|
| 77 |
f.write("-"*80 + "\n")
|
| 78 |
|
|
|
|
| 104 |
llm = ChatOpenAI(
|
| 105 |
model="meta-llama/llama-3-70b-instruct",
|
| 106 |
base_url="https://openrouter.ai/api/v1",
|
| 107 |
+
api_key="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",
|
| 108 |
temperature=0.4
|
| 109 |
)
|
| 110 |
|
|
|
|
| 175 |
llm=llm,
|
| 176 |
retriever=vectorstore.as_retriever(
|
| 177 |
search_type="mmr",
|
| 178 |
+
search_kwargs={'k': 20, 'fetch_k': 100, 'lambda_mult': 0.9}
|
| 179 |
),
|
| 180 |
return_source_documents=True,
|
| 181 |
chain_type_kwargs={"prompt": qa_prompt}
|
|
|
|
| 183 |
|
| 184 |
return rephrase_chain, qa_chain
|
| 185 |
|
| 186 |
+
def process_question(question: str, rephrase_chain, qa_chain, chat_history: list) -> tuple:
|
| 187 |
+
"""معالجة السؤال مع التتبع الكامل"""
|
| 188 |
+
rewritten = rephrase_chain.invoke({"question": question})
|
| 189 |
+
fusha_question = rewritten.content.strip()
|
| 190 |
+
|
| 191 |
+
result = qa_chain.invoke(fusha_question)
|
| 192 |
+
answer = result["result"]
|
| 193 |
+
|
| 194 |
+
# تسجيل كامل في السجل مع المصادر
|
| 195 |
+
write_to_log(
|
| 196 |
+
f"السؤال: {question}",
|
| 197 |
+
f"السؤال المحول: {fusha_question}",
|
| 198 |
+
f"الإجابة: {answer}",
|
| 199 |
+
"المصادر المستخدمة:"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
for i, doc in enumerate(result["source_documents"], 1):
|
| 203 |
+
write_to_log(
|
| 204 |
+
f"المصدر #{i}:",
|
| 205 |
+
f"الملف: {os.path.basename(doc.metadata['source'])}",
|
| 206 |
+
f"القسم: {doc.metadata.get('section', 'غير محدد')}",
|
| 207 |
+
f"المحتوى: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"المحتوى: {doc.page_content}",
|
| 208 |
+
"-"*50
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
# تتبع المصادر في الملفات الأخرى
|
| 212 |
+
track_sources(question, answer, result["source_documents"])
|
| 213 |
+
|
| 214 |
+
# تحضير بيانات المصادر للعرض
|
| 215 |
+
sources_data = [
|
| 216 |
+
{
|
| 217 |
+
"الملف": os.path.basename(doc.metadata['source']),
|
| 218 |
+
"النص": doc.page_content[:300] + "..." if len(doc.page_content) > 300 else doc.page_content,
|
| 219 |
+
"الإجابة": answer
|
| 220 |
+
}
|
| 221 |
+
for doc in result["source_documents"]
|
| 222 |
+
]
|
| 223 |
+
|
| 224 |
+
chat_history.append((question, answer))
|
| 225 |
+
return "", chat_history, sources_data
|
| 226 |
+
|
| 227 |
def create_gradio_interface(rephrase_chain, qa_chain):
|
| 228 |
"""إنشاء واجهة Gradio مع عرض المصادر"""
|
| 229 |
with gr.Blocks(title="المساعد الذكي للتأمين الصحي") as demo:
|
|
|
|
| 266 |
write_to_log(
|
| 267 |
f"المصدر #{i}:",
|
| 268 |
f"الملف: {os.path.basename(doc.metadata['source'])}",
|
| 269 |
+
f"القسم: {doc.metadata.get('section', 'غير محدد')}",
|
| 270 |
f"المحتوى: {doc.page_content[:300]}..." if len(doc.page_content) > 300 else f"المحتوى: {doc.page_content}",
|
| 271 |
"-"*50
|
| 272 |
)
|
|
|
|
| 327 |
vectorstore = load_embeddings()
|
| 328 |
rephrase_chain, qa_chain = setup_chains(vectorstore)
|
| 329 |
demo = create_gradio_interface(rephrase_chain, qa_chain)
|
| 330 |
+
demo.launch(share=False)
|