Spaces:

Loren
/

api_search_articles

Sleeping

Loren commited on Nov 17, 2025

Commit

ba3281e

verified ·

1 Parent(s): e1182e0

Upload 5 files

Files changed (2) hide show

app/main.py CHANGED Viewed

@@ -5,10 +5,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
 app = FastAPI(
     title="Articles API",
     description="API pour récupérer articles et tags depuis SQLite",
@@ -16,12 +17,31 @@ app = FastAPI(
 )
 # Chargement du modèle génératif
-MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
-                                             torch_dtype=torch.float16,
-                                             device_map="auto"
-                                            )
 # CORS pour permettre l'accès depuis le navigateur
 app.add_middleware(

 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
 app = FastAPI(
     title="Articles API",
     description="API pour récupérer articles et tags depuis SQLite",
 )
 # Chargement du modèle génératif
+#MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
+#tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+#model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
+#                                             torch_dtype=torch.float16,
+#                                             device_map="auto"
+#                                            )
+model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+# Charger le tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# Config de quantization moderne (4-bit ou 8-bit)
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,          # False pour int8
+    bnb_4bit_compute_dtype=torch.float16,  # dtype des calculs
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+# Charger le modèle avec la nouvelle API
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=quant_config,
+    device_map="auto",          # pour GPU auto
+    dtype=torch.float16
+)
 # CORS pour permettre l'accès depuis le navigateur
 app.add_middleware(

app/templates/prompt_mistral_rag.py CHANGED Viewed

@@ -1,20 +1,19 @@
-RAG_PROMPT_TEMPLATE = """[INST]
-You are an AI assistant that answers questions based solely on the CONTEXT provided.
-The context consists of excerpts from blog articles on a wide variety of topics.
-RULES:
-1. Only use information present in the CONTEXT to answer the user's question.
-2. If the information is not in the CONTEXT, politely say that you do not know the answer.
-3. Do not invent, speculate, or add any information from outside sources.
-4. If the user's question is vague or unclear, ask for clarification before answering.
-5. Provide answers that are clear, concise, and natural in English.
-7. Do not cite or refer to sources outside of the provided CONTEXT.
-8. Always answer in the same language as the USER QUESTION.
-CONTEXT:
-{context}
-USER QUESTION:
-{question}
-[/INST]
 """

+RAG_PROMPT_TEMPLATE = """
+You are an AI assistant that answers questions based solely on the CONTEXT provided.
+The context consists of excerpts from blog articles on a wide variety of topics.
+RULES:
+1. Only use information present in the CONTEXT to answer the user's question.
+2. If the information is not in the CONTEXT, politely say that you do not know the answer.
+3. Do not invent, speculate, or add any information from outside sources.
+4. If the user's question is vague or unclear, ask for clarification before answering.
+5. Provide answers that are clear, concise, and natural in English.
+7. Do not cite or refer to sources outside of the provided CONTEXT.
+8. Always answer in the same language as the USER QUESTION.
+CONTEXT:
+{context}
+USER QUESTION:
+{question}
 """