Spaces:
Sleeping
Sleeping
| from typing import List | |
| import time | |
| # ========================= | |
| # Query Bundle Builder | |
| # ========================= | |
| def build_query_bundle(sub: dict, chapter_title: str, section_title: str) -> List[str]: | |
| planning = sub.get("planning", {}) | |
| queries = [] | |
| if planning.get("core_idea"): | |
| queries.append(planning["core_idea"]) | |
| for kp in planning.get("key_points", [])[:3]: | |
| if isinstance(kp, str): | |
| queries.append(kp) | |
| for q in sub.get("suggested_queries", [])[:2]: | |
| queries.append(q) | |
| queries.append(sub.get("title", "")) | |
| queries.append(f"{chapter_title} - {section_title}") | |
| seen = set() | |
| final = [] | |
| for q in queries: | |
| q = q.strip() | |
| if q and q not in seen: | |
| seen.add(q) | |
| final.append(q) | |
| return final | |
| # ========================= | |
| # Citations | |
| # ========================= | |
| def make_in_text_citation(author, year, page_start): | |
| a = author or "مؤلف" | |
| y = year if isinstance(year, int) else "د.ت" | |
| p = page_start if isinstance(page_start, int) else "؟" | |
| return f"({a}، {y}، ص {p})" | |
| def make_reference_apa(author, year, title): | |
| a = author or "مؤلف" | |
| y = year if isinstance(year, int) else "د.ت" | |
| t = title or "مصدر بدون عنوان" | |
| return f"{a}. ({y}). {t}." | |
| # ========================= | |
| # RAG for Subsection (MULTI-BOOK) | |
| # ========================= | |
| def build_rag_context_for_subsection( | |
| rag_engines: List, | |
| sub: dict, | |
| chapter_title: str, | |
| section_title: str, | |
| top_k: int = 5, | |
| ) -> dict: | |
| queries = build_query_bundle(sub, chapter_title, section_title) | |
| all_hits = [] | |
| # 🔥 search in ALL source books | |
| for rag in rag_engines: | |
| hits = rag.retrieve(queries=queries) | |
| all_hits.extend(hits) | |
| if not all_hits: | |
| return { | |
| "query_bundle": queries, | |
| "selected_k": 0, | |
| "chunks": [], | |
| "coverage_note": "لم يتم العثور على مراجع مناسبة لهذا المحور.", | |
| } | |
| # sort globally | |
| all_hits = sorted(all_hits, key=lambda x: x.score, reverse=True)[:top_k] | |
| chunks = [] | |
| for h in all_hits: | |
| payload = h.payload or {} | |
| chunks.append( | |
| { | |
| "chunk_id": h.id, | |
| "score": h.score, | |
| "doc_id": payload.get("doc_id"), | |
| "page_start": payload.get("page_start"), | |
| "page_end": payload.get("page_end"), | |
| "title": payload.get("title"), | |
| "author": payload.get("author"), | |
| "year": payload.get("year"), | |
| "text": payload.get("text"), | |
| "in_text_citation": make_in_text_citation( | |
| payload.get("author"), | |
| payload.get("year"), | |
| payload.get("page_start"), | |
| ), | |
| "reference_apa": make_reference_apa( | |
| payload.get("author"), | |
| payload.get("year"), | |
| payload.get("title"), | |
| ), | |
| } | |
| ) | |
| return { | |
| "query_bundle": queries, | |
| "selected_k": len(chunks), | |
| "chunks": chunks, | |
| "coverage_note": "تم اختيار أفضل المراجع المتوافقة مع فكرة المحور وحدوده.", | |
| } | |
| # ========================= | |
| # Book-Level Automation | |
| # ========================= | |
| def run_rag_automation_on_book( | |
| planned_book: dict, | |
| rag_engines: List, | |
| top_k: int = 5, | |
| sleep_s: float = 0.1, | |
| ) -> dict: | |
| for chapter in planned_book.get("chapters", []): | |
| ch_title = chapter.get("chapter_title", "") | |
| for section in chapter.get("sections", []): | |
| sec_title = section.get("title", "") | |
| for sub in section.get("subsections", []): | |
| rag_context = build_rag_context_for_subsection( | |
| rag_engines=rag_engines, | |
| sub=sub, | |
| chapter_title=ch_title, | |
| section_title=sec_title, | |
| top_k=top_k, | |
| ) | |
| sub["rag_context"] = rag_context | |
| time.sleep(sleep_s) | |
| return planned_book | |