Spaces:
Sleeping
Sleeping
mayar-waleed commited on
Commit ·
075bf66
1
Parent(s): ebe7495
My updates
Browse files- License +21 -0
- app/deps.py +14 -15
- app/history.py +51 -0
- app/main.py +28 -2
- app/rag_pipeline.py +17 -2
- app/schemas.py +7 -0
- test_dataset_5_questions.json +0 -22
License
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Mayar Waleed Salah
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
app/deps.py
CHANGED
|
@@ -4,28 +4,27 @@
|
|
| 4 |
# ============================================
|
| 5 |
from __future__ import annotations
|
| 6 |
|
| 7 |
-
import
|
| 8 |
-
from typing import Any, Optional
|
| 9 |
|
| 10 |
from .config import settings
|
| 11 |
from .rag_pipeline import build_qa_chain
|
| 12 |
|
| 13 |
-
_lock = threading.RLock()
|
| 14 |
-
_chain: Optional[Any] = None
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def reload_chain():
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
_chain = build_qa_chain(settings)
|
| 29 |
-
return _chain
|
| 30 |
|
| 31 |
|
|
|
|
| 4 |
# ============================================
|
| 5 |
from __future__ import annotations
|
| 6 |
|
| 7 |
+
from typing import Any, List, Optional
|
|
|
|
| 8 |
|
| 9 |
from .config import settings
|
| 10 |
from .rag_pipeline import build_qa_chain
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
def get_chain(conversation_history: List[dict] = None):
|
| 14 |
+
"""
|
| 15 |
+
Build the QA chain with conversation history.
|
| 16 |
+
Always rebuilds the chain to include current conversation context.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
conversation_history: List of previous messages for context
|
| 20 |
+
"""
|
| 21 |
+
# Always build/rebuild the chain with current conversation history
|
| 22 |
+
# This ensures each request gets the proper context
|
| 23 |
+
return build_qa_chain(settings, conversation_history=conversation_history)
|
| 24 |
|
| 25 |
|
| 26 |
def reload_chain():
|
| 27 |
+
"""Rebuild the QA chain from scratch without conversation history."""
|
| 28 |
+
return build_qa_chain(settings)
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
app/history.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================
|
| 2 |
+
# file: app/history.py
|
| 3 |
+
# ============================================
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import threading
|
| 7 |
+
from typing import Dict, List
|
| 8 |
+
|
| 9 |
+
from .schemas import Message
|
| 10 |
+
|
| 11 |
+
_lock = threading.RLock()
|
| 12 |
+
_conversations: Dict[str, List[Message]] = {}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_history(session_id: str) -> List[Message]:
|
| 16 |
+
"""Retrieve conversation history for a session."""
|
| 17 |
+
with _lock:
|
| 18 |
+
return _conversations.get(session_id, [])
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def add_to_history(session_id: str, user_msg: str, assistant_msg: str):
|
| 22 |
+
"""Add user and assistant messages to conversation history."""
|
| 23 |
+
with _lock:
|
| 24 |
+
if session_id not in _conversations:
|
| 25 |
+
_conversations[session_id] = []
|
| 26 |
+
_conversations[session_id].append(Message(role="user", content=user_msg))
|
| 27 |
+
_conversations[session_id].append(Message(role="assistant", content=assistant_msg))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def clear_history(session_id: str):
|
| 31 |
+
"""Clear conversation history for a session."""
|
| 32 |
+
with _lock:
|
| 33 |
+
if session_id in _conversations:
|
| 34 |
+
del _conversations[session_id]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_history_text(session_id: str, max_messages: int = 6) -> str:
|
| 38 |
+
"""Convert conversation history to formatted text for LLM context."""
|
| 39 |
+
history = get_history(session_id)
|
| 40 |
+
if not history:
|
| 41 |
+
return ""
|
| 42 |
+
|
| 43 |
+
# Keep only last max_messages messages
|
| 44 |
+
recent = history[-max_messages:]
|
| 45 |
+
|
| 46 |
+
history_text = "السجل السابق للمحادثة:\n"
|
| 47 |
+
for msg in recent:
|
| 48 |
+
role_label = "المستخدم" if msg.role == "user" else "المستشار"
|
| 49 |
+
history_text += f"{role_label}: {msg.content}\n"
|
| 50 |
+
|
| 51 |
+
return history_text + "\n---\n\n"
|
app/main.py
CHANGED
|
@@ -10,6 +10,7 @@ from fastapi import FastAPI, HTTPException
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
|
| 12 |
from .deps import get_chain, reload_chain
|
|
|
|
| 13 |
from .schemas import AskRequest, AskResponse, SourceDoc
|
| 14 |
from .utils import convert_to_eastern_arabic
|
| 15 |
|
|
@@ -42,6 +43,23 @@ def reload():
|
|
| 42 |
return {"status": "reloaded"}
|
| 43 |
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
def _dedupe_sources(docs) -> List[SourceDoc]:
|
| 46 |
if not docs:
|
| 47 |
return []
|
|
@@ -70,7 +88,12 @@ def _dedupe_sources(docs) -> List[SourceDoc]:
|
|
| 70 |
|
| 71 |
@app.post("/ask", response_model=AskResponse)
|
| 72 |
async def ask(payload: AskRequest):
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
try:
|
| 76 |
# LangChain invoke is sync; run in worker thread
|
|
@@ -90,7 +113,10 @@ async def ask(payload: AskRequest):
|
|
| 90 |
if s.article_number:
|
| 91 |
s.article_number = convert_to_eastern_arabic(s.article_number)
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
|
|
|
|
| 10 |
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
|
| 12 |
from .deps import get_chain, reload_chain
|
| 13 |
+
from .history import get_history, add_to_history, clear_history
|
| 14 |
from .schemas import AskRequest, AskResponse, SourceDoc
|
| 15 |
from .utils import convert_to_eastern_arabic
|
| 16 |
|
|
|
|
| 43 |
return {"status": "reloaded"}
|
| 44 |
|
| 45 |
|
| 46 |
+
@app.post("/clear-history")
|
| 47 |
+
def clear_session(session_id: str = "default"):
|
| 48 |
+
"""Clear conversation history for a session."""
|
| 49 |
+
clear_history(session_id)
|
| 50 |
+
return {"status": "cleared", "session_id": session_id}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@app.get("/history")
|
| 54 |
+
def get_session_history(session_id: str = "default"):
|
| 55 |
+
"""Retrieve conversation history for a session."""
|
| 56 |
+
history = get_history(session_id)
|
| 57 |
+
return {
|
| 58 |
+
"session_id": session_id,
|
| 59 |
+
"messages": [{"role": msg.role, "content": msg.content} for msg in history]
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
def _dedupe_sources(docs) -> List[SourceDoc]:
|
| 64 |
if not docs:
|
| 65 |
return []
|
|
|
|
| 88 |
|
| 89 |
@app.post("/ask", response_model=AskResponse)
|
| 90 |
async def ask(payload: AskRequest):
|
| 91 |
+
# Retrieve conversation history for this session
|
| 92 |
+
history = get_history(payload.session_id)
|
| 93 |
+
history_dicts = [{"role": msg.role, "content": msg.content} for msg in history]
|
| 94 |
+
|
| 95 |
+
# Get chain with conversation history context
|
| 96 |
+
chain = get_chain(conversation_history=history_dicts)
|
| 97 |
|
| 98 |
try:
|
| 99 |
# LangChain invoke is sync; run in worker thread
|
|
|
|
| 113 |
if s.article_number:
|
| 114 |
s.article_number = convert_to_eastern_arabic(s.article_number)
|
| 115 |
|
| 116 |
+
# Save this exchange to history
|
| 117 |
+
add_to_history(payload.session_id, payload.query, answer)
|
| 118 |
+
|
| 119 |
+
return AskResponse(answer=answer, sources=sources, session_id=payload.session_id, raw=result)
|
| 120 |
|
| 121 |
|
| 122 |
|
app/rag_pipeline.py
CHANGED
|
@@ -86,10 +86,14 @@ def _load_json_folder(folder_path: str) -> List[dict]:
|
|
| 86 |
return all_items
|
| 87 |
|
| 88 |
|
| 89 |
-
def build_qa_chain(settings: Settings):
|
| 90 |
"""
|
| 91 |
Builds and returns:
|
| 92 |
qa_chain: Runnable that returns {"context": [Document...], "input": str, "answer": str}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
"""
|
| 94 |
if not os.path.exists(settings.data_dir):
|
| 95 |
raise FileNotFoundError(f"Data folder not found: {settings.data_dir}")
|
|
@@ -345,6 +349,8 @@ def build_qa_chain(settings: Settings):
|
|
| 345 |
|
| 346 |
مهمتك الأساسية: الإجابة بدقة استناداً إلى "السياق التشريعي" المرفق أدناه.
|
| 347 |
عند وجود نص قانوني في السياق، هو مصدرك الأول والأهم.
|
|
|
|
|
|
|
| 348 |
</role>
|
| 349 |
|
| 350 |
<decision_logic>
|
|
@@ -407,10 +413,19 @@ def build_qa_chain(settings: Settings):
|
|
| 407 |
</formatting_rules>
|
| 408 |
"""
|
| 409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
prompt = ChatPromptTemplate.from_messages(
|
| 411 |
[
|
| 412 |
("system", system_instructions),
|
| 413 |
-
("system", "السياق التشريعي المتاح (المصدر الأساسي):\n{context}"),
|
| 414 |
("human", "سؤال المستفيد:\n{input}"),
|
| 415 |
]
|
| 416 |
)
|
|
|
|
| 86 |
return all_items
|
| 87 |
|
| 88 |
|
| 89 |
+
def build_qa_chain(settings: Settings, conversation_history: List[dict] = None):
|
| 90 |
"""
|
| 91 |
Builds and returns:
|
| 92 |
qa_chain: Runnable that returns {"context": [Document...], "input": str, "answer": str}
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
settings: Configuration settings
|
| 96 |
+
conversation_history: Optional list of previous messages for context
|
| 97 |
"""
|
| 98 |
if not os.path.exists(settings.data_dir):
|
| 99 |
raise FileNotFoundError(f"Data folder not found: {settings.data_dir}")
|
|
|
|
| 349 |
|
| 350 |
مهمتك الأساسية: الإجابة بدقة استناداً إلى "السياق التشريعي" المرفق أدناه.
|
| 351 |
عند وجود نص قانوني في السياق، هو مصدرك الأول والأهم.
|
| 352 |
+
|
| 353 |
+
استخدم سجل المحادثة السابقة (إن وجد) لفهم السياق والإجابة بتسلسل منطقي.
|
| 354 |
</role>
|
| 355 |
|
| 356 |
<decision_logic>
|
|
|
|
| 413 |
</formatting_rules>
|
| 414 |
"""
|
| 415 |
|
| 416 |
+
# Build conversation history text for context
|
| 417 |
+
history_text = ""
|
| 418 |
+
if conversation_history:
|
| 419 |
+
history_text = "السجل السابق للمحادثة:\n"
|
| 420 |
+
for msg in conversation_history[-6:]: # Keep last 6 messages
|
| 421 |
+
role_label = "المستخدم" if msg.get("role") == "user" else "المستشار"
|
| 422 |
+
history_text += f"{role_label}: {msg.get('content', '')}\n"
|
| 423 |
+
history_text += "\n---\n\n"
|
| 424 |
+
|
| 425 |
prompt = ChatPromptTemplate.from_messages(
|
| 426 |
[
|
| 427 |
("system", system_instructions),
|
| 428 |
+
("system", f"{history_text}السياق التشريعي المتاح (المصدر الأساسي):\n{{context}}"),
|
| 429 |
("human", "سؤال المستفيد:\n{input}"),
|
| 430 |
]
|
| 431 |
)
|
app/schemas.py
CHANGED
|
@@ -4,8 +4,14 @@ from typing import Any, Dict, List, Optional
|
|
| 4 |
from pydantic import BaseModel, Field
|
| 5 |
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class AskRequest(BaseModel):
|
| 8 |
query: str = Field(..., min_length=1, description="User question in Arabic")
|
|
|
|
| 9 |
include_sources: bool = Field(default=True, description="Return retrieved source docs")
|
| 10 |
eastern_arabic_numerals: bool = Field(
|
| 11 |
default=False, description="Convert digits 0-9 to Eastern Arabic numerals"
|
|
@@ -26,4 +32,5 @@ class SourceDoc(BaseModel):
|
|
| 26 |
class AskResponse(BaseModel):
|
| 27 |
answer: str
|
| 28 |
sources: List[SourceDoc] = Field(default_factory=list)
|
|
|
|
| 29 |
raw: Dict[str, Any] = Field(default_factory=dict)
|
|
|
|
| 4 |
from pydantic import BaseModel, Field
|
| 5 |
|
| 6 |
|
| 7 |
+
class Message(BaseModel):
|
| 8 |
+
role: str = Field(..., description="'user' or 'assistant'")
|
| 9 |
+
content: str = Field(..., description="Message content")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
class AskRequest(BaseModel):
|
| 13 |
query: str = Field(..., min_length=1, description="User question in Arabic")
|
| 14 |
+
session_id: str = Field(default="default", description="Unique session identifier")
|
| 15 |
include_sources: bool = Field(default=True, description="Return retrieved source docs")
|
| 16 |
eastern_arabic_numerals: bool = Field(
|
| 17 |
default=False, description="Convert digits 0-9 to Eastern Arabic numerals"
|
|
|
|
| 32 |
class AskResponse(BaseModel):
|
| 33 |
answer: str
|
| 34 |
sources: List[SourceDoc] = Field(default_factory=list)
|
| 35 |
+
session_id: str
|
| 36 |
raw: Dict[str, Any] = Field(default_factory=dict)
|
test_dataset_5_questions.json
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"question": "ما الطبيعة القانونية لحق العمل في الدستور المصري؟",
|
| 4 |
-
"ground_truth": "حق أساسي/حرية: العمل حق وواجب تكفله الدولة. يُمنع العمل الجبري إلا بقانون ولخدمة عامة وبمقابل عادل."
|
| 5 |
-
},
|
| 6 |
-
{
|
| 7 |
-
"question": "ما حكم التحرش أو التنمر أو العنف ضد العامل في مكان العمل وفق قانون العمل؟",
|
| 8 |
-
"ground_truth": "حظر السخرة والعمل الجبري والتحرش والتنمر والعنف بكافة أشكاله (اللفظي والجسدي والنفسي) ضد العمال، مع تحديد جزاءات تأديبية في لوائح المنشأة."
|
| 9 |
-
},
|
| 10 |
-
{
|
| 11 |
-
"question": "ما المقصود بالتلبس وما أثره الإجرائي بشكل عام؟",
|
| 12 |
-
"ground_truth": "لمأمور الضبط القضائي في التلبس منع الحاضرين من المغادرة حتى تحرير المحضر واستدعاء من يفيد في التحقيق."
|
| 13 |
-
},
|
| 14 |
-
{
|
| 15 |
-
"question": "ما حكم نشر صور أو معلومات تنتهك خصوصية شخص دون رضاه عبر الإنترنت؟",
|
| 16 |
-
"ground_truth": "تجرم المادة الاعتداء على القيم الأسرية أو الخصوصية عبر الرسائل الكثيفة دون موافقة، أو تسليم بيانات للترويج دون موافقة، أو نشر محتوى ينتهك الخصوصية سواء كان صحيحًا أو غير صحيح."
|
| 17 |
-
},
|
| 18 |
-
{
|
| 19 |
-
"question": "ما الشروط العامة لاستحقاق الزوجة النفقة وفق قانون الأحوال الشخصية؟",
|
| 20 |
-
"ground_truth": "تجب النفقة للزوجة من تاريخ العقد الصحيح وتشمل الغذاء والكسوة والمسكن والعلاج. لا تجب النفقة إذا ارتدت أو امتنعت عن تسليم نفسها أو خرجت بدون إذن. نفقة الزوجة دين على الزوج ولها امتياز على أمواله."
|
| 21 |
-
}
|
| 22 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|