# load_documents.py – Supabase + PDF + Paragraph-Viewer import os import requests import tempfile from supabase import create_client from langchain_core.documents import Document from langchain_community.document_loaders import PyPDFLoader # ===== ENV ===== SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY") if not SUPABASE_URL or not SUPABASE_ANON_KEY: raise RuntimeError("Missing SUPABASE_URL / SUPABASE_ANON_KEY") supabase = create_client(SUPABASE_URL, SUPABASE_ANON_KEY) # ===== PDF (Prüfungsordnung) im Storage ===== PDF_FILE = "f10_bpo_ifb_tei_mif_wii_2021-01-04.pdf" PDF_URL = f"{SUPABASE_URL}/storage/v1/object/public/File%20PDF/{PDF_FILE}" # ===== Paragraph-Viewer (hg_clean.html) im Bucket "hg_viewer" ===== HG_HTML_URL = f"{SUPABASE_URL}/storage/v1/object/public/hg_viewer/hg_clean.html" def load_hg_nrw(): print(">>> Lade Hochschulgesetz NRW (§) aus Tabelle hg_nrw …") rows = ( supabase.table("hg_nrw") .select("*") .order("order_index") .execute() ).data docs = [] for r in rows: abs_id = r["abs_id"] # z.B. para_1 title = r["title"] # z.B. § 1 (Fn 44) Geltungsbereich content = r["content"] # kompletter Text inkl. Fußnoten # HTML-Viewer: