Spaces:

FabIndy
/

code-education-rag

Running

App Files Files Community

code-education-rag / src /config.py

FabIndy

Stabilize RAG core: add SUMMARY_AI, speed up LIST, clean resources and config

feddcd9 about 2 months ago

raw

history blame contribute delete

3.72 kB

	# src/config.py
	from __future__ import annotations

	import os
	import re
	from pathlib import Path


	# =========================
	# Paths (HF / local)
	# =========================

	# Base directory = root of repo (…/hf-code-education)
	BASE_DIR = Path(__file__).resolve().parents[1]

	# Data
	CHUNKS_PATH = str(Path(os.environ.get("CHUNKS_PATH", BASE_DIR / "data" / "chunks_articles.jsonl")))

	# Vectorstore (FAISS)
	DB_DIR = str(Path(os.environ.get("DB_DIR", BASE_DIR / "db" / "faiss_code_edu_by_article")))

	# Embeddings model for FAISS queries (used in QA mode)
	EMBED_MODEL = os.environ.get("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")

	# LLM (GGUF) path
	LLM_MODEL_PATH = str(Path(os.environ.get("LLM_MODEL_PATH", BASE_DIR / "models" / "model.gguf")))


	# =========================
	# Article ID regex
	# =========================

	# Match typical French code article IDs: L111-1, R421-10, D521-5, etc.
	ARTICLE_ID_RE = re.compile(r"\b([LDR]\s?\d{1,4}(?:-\d+){1,4})\b", re.IGNORECASE)


	# =========================
	# Triggers (routing)
	# =========================

	LIST_TRIGGERS = [
	"quels articles",
	"quels sont les articles",
	"articles sur",
	"articles parlant",
	"articles qui parlent",
	"trouve des articles",
	"trouver des articles",
	"liste des articles",
	"liste",
	]

	FULLTEXT_TRIGGERS = [
	"intégralité",
	"integralite",
	"texte officiel",
	"texte intégral",
	"texte integral",
	"donne l’intégralité",
	"donne l'integralite",
	"donne le texte",
	"affiche l'article",
	]

	EXPLAIN_TRIGGERS = [
	"explique",
	"expliquer",
	"synthèse",
	"synthese",
	"points clés",
	"points cles",
	]


	# =========================
	# Messages utilisateur
	# =========================

	REFUSAL = (
	"Je ne peux pas répondre à cette demande telle quelle.\n"
	"Indique un thème (mode LIST) ou un identifiant d’article (mode FULLTEXT / Résumé / Synthèse)."
	)

	SYNTHESIS_REFUSAL = (
	"Pour faire une synthèse, j’ai besoin d’un identifiant d’article (ex : D521-5)."
	)

	QA_WARNING = (
	"Réponse IA : cette réponse peut contenir des erreurs. "
	"Vérifie toujours sur le texte officiel et, en cas de doute, demande un avis juridique."
	)


	# =========================
	# QA settings (speed / safety)
	# =========================

	QA_TOP_K_FINAL = int(os.environ.get("QA_TOP_K_FINAL", "2"))
	QA_DOC_MAX_CHARS = int(os.environ.get("QA_DOC_MAX_CHARS", "700"))
	QA_MAX_TOKENS = int(os.environ.get("QA_MAX_TOKENS", "160"))
	QA_TEMPERATURE = float(os.environ.get("QA_TEMPERATURE", "0.2"))


	# =========================
	# SUMMARY_AI settings (future move out of rag_core)
	# =========================

	SUMMARY_TRIGGERS = [
	"résumé ia", "resume ia",
	"résumé", "resume",
	"résumer", "resumer",
	"summary",
	]

	SUMMARY_WARNING = (
	"Résumé IA : reformulation automatique (peut contenir des erreurs ou omissions). "
	"Vérifie toujours sur le texte officiel."
	)

	SUMMARY_DOC_MAX_CHARS = int(os.environ.get("SUMMARY_DOC_MAX_CHARS", "1200"))
	SUMMARY_MAX_TOKENS = int(os.environ.get("SUMMARY_MAX_TOKENS", "180"))
	SUMMARY_TEMPERATURE = float(os.environ.get("SUMMARY_TEMPERATURE", "0.2"))


	# =========================
	# Llama.cpp settings
	# =========================

	# Important : sur HF CPU, trop de threads peut parfois dégrader.
	# Laisse configurable. Valeur par défaut prudente.
	LLM_N_CTX = int(os.environ.get("LLM_N_CTX", "1024"))
	LLM_N_THREADS = int(os.environ.get("LLM_N_THREADS", str(max(1, (os.cpu_count() or 2) - 1))))
	LLM_N_BATCH = int(os.environ.get("LLM_N_BATCH", "128"))