ContiAI-v4 / rag /automation.py
ziadsameh32's picture
Initial FastAPI CrewAI setup
bf25b00
from typing import List
import time
# =========================
# Query Bundle Builder
# =========================
def build_query_bundle(sub: dict, chapter_title: str, section_title: str) -> List[str]:
planning = sub.get("planning", {})
queries = []
if planning.get("core_idea"):
queries.append(planning["core_idea"])
for kp in planning.get("key_points", [])[:3]:
if isinstance(kp, str):
queries.append(kp)
for q in sub.get("suggested_queries", [])[:2]:
queries.append(q)
queries.append(sub.get("title", ""))
queries.append(f"{chapter_title} - {section_title}")
seen = set()
final = []
for q in queries:
q = q.strip()
if q and q not in seen:
seen.add(q)
final.append(q)
return final
# =========================
# Citations
# =========================
def make_in_text_citation(author, year, page_start):
a = author or "مؤلف"
y = year if isinstance(year, int) else "د.ت"
p = page_start if isinstance(page_start, int) else "؟"
return f"({a}، {y}، ص {p})"
def make_reference_apa(author, year, title):
a = author or "مؤلف"
y = year if isinstance(year, int) else "د.ت"
t = title or "مصدر بدون عنوان"
return f"{a}. ({y}). {t}."
# =========================
# RAG for Subsection (MULTI-BOOK)
# =========================
def build_rag_context_for_subsection(
rag_engines: List,
sub: dict,
chapter_title: str,
section_title: str,
top_k: int = 5,
) -> dict:
queries = build_query_bundle(sub, chapter_title, section_title)
all_hits = []
# 🔥 search in ALL source books
for rag in rag_engines:
hits = rag.retrieve(queries=queries)
all_hits.extend(hits)
if not all_hits:
return {
"query_bundle": queries,
"selected_k": 0,
"chunks": [],
"coverage_note": "لم يتم العثور على مراجع مناسبة لهذا المحور.",
}
# sort globally
all_hits = sorted(all_hits, key=lambda x: x.score, reverse=True)[:top_k]
chunks = []
for h in all_hits:
payload = h.payload or {}
chunks.append(
{
"chunk_id": h.id,
"score": h.score,
"doc_id": payload.get("doc_id"),
"page_start": payload.get("page_start"),
"page_end": payload.get("page_end"),
"title": payload.get("title"),
"author": payload.get("author"),
"year": payload.get("year"),
"text": payload.get("text"),
"in_text_citation": make_in_text_citation(
payload.get("author"),
payload.get("year"),
payload.get("page_start"),
),
"reference_apa": make_reference_apa(
payload.get("author"),
payload.get("year"),
payload.get("title"),
),
}
)
return {
"query_bundle": queries,
"selected_k": len(chunks),
"chunks": chunks,
"coverage_note": "تم اختيار أفضل المراجع المتوافقة مع فكرة المحور وحدوده.",
}
# =========================
# Book-Level Automation
# =========================
def run_rag_automation_on_book(
planned_book: dict,
rag_engines: List,
top_k: int = 5,
sleep_s: float = 0.1,
) -> dict:
for chapter in planned_book.get("chapters", []):
ch_title = chapter.get("chapter_title", "")
for section in chapter.get("sections", []):
sec_title = section.get("title", "")
for sub in section.get("subsections", []):
rag_context = build_rag_context_for_subsection(
rag_engines=rag_engines,
sub=sub,
chapter_title=ch_title,
section_title=sec_title,
top_k=top_k,
)
sub["rag_context"] = rag_context
time.sleep(sleep_s)
return planned_book