Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app/main.py +27 -7
- app/templates/prompt_mistral_rag.py +18 -19
app/main.py
CHANGED
|
@@ -5,10 +5,11 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 5 |
from fastapi.responses import HTMLResponse
|
| 6 |
|
| 7 |
from pydantic import BaseModel
|
| 8 |
-
from transformers import AutoTokenizer,
|
| 9 |
import torch
|
| 10 |
from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
|
| 11 |
|
|
|
|
| 12 |
app = FastAPI(
|
| 13 |
title="Articles API",
|
| 14 |
description="API pour récupérer articles et tags depuis SQLite",
|
|
@@ -16,12 +17,31 @@ app = FastAPI(
|
|
| 16 |
)
|
| 17 |
|
| 18 |
# Chargement du modèle génératif
|
| 19 |
-
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 20 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 21 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
|
| 22 |
-
torch_dtype=torch.float16,
|
| 23 |
-
device_map="auto"
|
| 24 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# CORS pour permettre l'accès depuis le navigateur
|
| 27 |
app.add_middleware(
|
|
|
|
| 5 |
from fastapi.responses import HTMLResponse
|
| 6 |
|
| 7 |
from pydantic import BaseModel
|
| 8 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 9 |
import torch
|
| 10 |
from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
|
| 11 |
|
| 12 |
+
|
| 13 |
app = FastAPI(
|
| 14 |
title="Articles API",
|
| 15 |
description="API pour récupérer articles et tags depuis SQLite",
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
# Chargement du modèle génératif
|
| 20 |
+
#MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 21 |
+
#tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 22 |
+
#model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
|
| 23 |
+
# torch_dtype=torch.float16,
|
| 24 |
+
# device_map="auto"
|
| 25 |
+
# )
|
| 26 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 27 |
+
# Charger le tokenizer
|
| 28 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 29 |
+
|
| 30 |
+
# Config de quantization moderne (4-bit ou 8-bit)
|
| 31 |
+
quant_config = BitsAndBytesConfig(
|
| 32 |
+
load_in_4bit=True, # False pour int8
|
| 33 |
+
bnb_4bit_compute_dtype=torch.float16, # dtype des calculs
|
| 34 |
+
bnb_4bit_use_double_quant=True,
|
| 35 |
+
bnb_4bit_quant_type="nf4"
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Charger le modèle avec la nouvelle API
|
| 39 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 40 |
+
model_id,
|
| 41 |
+
quantization_config=quant_config,
|
| 42 |
+
device_map="auto", # pour GPU auto
|
| 43 |
+
dtype=torch.float16
|
| 44 |
+
)
|
| 45 |
|
| 46 |
# CORS pour permettre l'accès depuis le navigateur
|
| 47 |
app.add_middleware(
|
app/templates/prompt_mistral_rag.py
CHANGED
|
@@ -1,20 +1,19 @@
|
|
| 1 |
-
RAG_PROMPT_TEMPLATE = """
|
| 2 |
-
You are an AI assistant that answers questions based solely on the CONTEXT provided.
|
| 3 |
-
The context consists of excerpts from blog articles on a wide variety of topics.
|
| 4 |
-
|
| 5 |
-
RULES:
|
| 6 |
-
1. Only use information present in the CONTEXT to answer the user's question.
|
| 7 |
-
2. If the information is not in the CONTEXT, politely say that you do not know the answer.
|
| 8 |
-
3. Do not invent, speculate, or add any information from outside sources.
|
| 9 |
-
4. If the user's question is vague or unclear, ask for clarification before answering.
|
| 10 |
-
5. Provide answers that are clear, concise, and natural in English.
|
| 11 |
-
7. Do not cite or refer to sources outside of the provided CONTEXT.
|
| 12 |
-
8. Always answer in the same language as the USER QUESTION.
|
| 13 |
-
|
| 14 |
-
CONTEXT:
|
| 15 |
-
{context}
|
| 16 |
-
|
| 17 |
-
USER QUESTION:
|
| 18 |
-
{question}
|
| 19 |
-
[/INST]
|
| 20 |
"""
|
|
|
|
| 1 |
+
RAG_PROMPT_TEMPLATE = """
|
| 2 |
+
You are an AI assistant that answers questions based solely on the CONTEXT provided.
|
| 3 |
+
The context consists of excerpts from blog articles on a wide variety of topics.
|
| 4 |
+
|
| 5 |
+
RULES:
|
| 6 |
+
1. Only use information present in the CONTEXT to answer the user's question.
|
| 7 |
+
2. If the information is not in the CONTEXT, politely say that you do not know the answer.
|
| 8 |
+
3. Do not invent, speculate, or add any information from outside sources.
|
| 9 |
+
4. If the user's question is vague or unclear, ask for clarification before answering.
|
| 10 |
+
5. Provide answers that are clear, concise, and natural in English.
|
| 11 |
+
7. Do not cite or refer to sources outside of the provided CONTEXT.
|
| 12 |
+
8. Always answer in the same language as the USER QUESTION.
|
| 13 |
+
|
| 14 |
+
CONTEXT:
|
| 15 |
+
{context}
|
| 16 |
+
|
| 17 |
+
USER QUESTION:
|
| 18 |
+
{question}
|
|
|
|
| 19 |
"""
|