Loren commited on
Commit
ba3281e
·
verified ·
1 Parent(s): e1182e0

Upload 5 files

Browse files
Files changed (2) hide show
  1. app/main.py +27 -7
  2. app/templates/prompt_mistral_rag.py +18 -19
app/main.py CHANGED
@@ -5,10 +5,11 @@ from fastapi.middleware.cors import CORSMiddleware
5
  from fastapi.responses import HTMLResponse
6
 
7
  from pydantic import BaseModel
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
  from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
11
 
 
12
  app = FastAPI(
13
  title="Articles API",
14
  description="API pour récupérer articles et tags depuis SQLite",
@@ -16,12 +17,31 @@ app = FastAPI(
16
  )
17
 
18
  # Chargement du modèle génératif
19
- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
20
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
21
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
22
- torch_dtype=torch.float16,
23
- device_map="auto"
24
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  # CORS pour permettre l'accès depuis le navigateur
27
  app.add_middleware(
 
5
  from fastapi.responses import HTMLResponse
6
 
7
  from pydantic import BaseModel
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
9
  import torch
10
  from app.templates.prompt_mistral_rag import RAG_PROMPT_TEMPLATE
11
 
12
+
13
  app = FastAPI(
14
  title="Articles API",
15
  description="API pour récupérer articles et tags depuis SQLite",
 
17
  )
18
 
19
  # Chargement du modèle génératif
20
+ #MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
21
+ #tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
22
+ #model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,
23
+ # torch_dtype=torch.float16,
24
+ # device_map="auto"
25
+ # )
26
+ model_id = "mistralai/Mistral-7B-Instruct-v0.2"
27
+ # Charger le tokenizer
28
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
29
+
30
+ # Config de quantization moderne (4-bit ou 8-bit)
31
+ quant_config = BitsAndBytesConfig(
32
+ load_in_4bit=True, # False pour int8
33
+ bnb_4bit_compute_dtype=torch.float16, # dtype des calculs
34
+ bnb_4bit_use_double_quant=True,
35
+ bnb_4bit_quant_type="nf4"
36
+ )
37
+
38
+ # Charger le modèle avec la nouvelle API
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ model_id,
41
+ quantization_config=quant_config,
42
+ device_map="auto", # pour GPU auto
43
+ dtype=torch.float16
44
+ )
45
 
46
  # CORS pour permettre l'accès depuis le navigateur
47
  app.add_middleware(
app/templates/prompt_mistral_rag.py CHANGED
@@ -1,20 +1,19 @@
1
- RAG_PROMPT_TEMPLATE = """[INST]
2
- You are an AI assistant that answers questions based solely on the CONTEXT provided.
3
- The context consists of excerpts from blog articles on a wide variety of topics.
4
-
5
- RULES:
6
- 1. Only use information present in the CONTEXT to answer the user's question.
7
- 2. If the information is not in the CONTEXT, politely say that you do not know the answer.
8
- 3. Do not invent, speculate, or add any information from outside sources.
9
- 4. If the user's question is vague or unclear, ask for clarification before answering.
10
- 5. Provide answers that are clear, concise, and natural in English.
11
- 7. Do not cite or refer to sources outside of the provided CONTEXT.
12
- 8. Always answer in the same language as the USER QUESTION.
13
-
14
- CONTEXT:
15
- {context}
16
-
17
- USER QUESTION:
18
- {question}
19
- [/INST]
20
  """
 
1
+ RAG_PROMPT_TEMPLATE = """
2
+ You are an AI assistant that answers questions based solely on the CONTEXT provided.
3
+ The context consists of excerpts from blog articles on a wide variety of topics.
4
+
5
+ RULES:
6
+ 1. Only use information present in the CONTEXT to answer the user's question.
7
+ 2. If the information is not in the CONTEXT, politely say that you do not know the answer.
8
+ 3. Do not invent, speculate, or add any information from outside sources.
9
+ 4. If the user's question is vague or unclear, ask for clarification before answering.
10
+ 5. Provide answers that are clear, concise, and natural in English.
11
+ 7. Do not cite or refer to sources outside of the provided CONTEXT.
12
+ 8. Always answer in the same language as the USER QUESTION.
13
+
14
+ CONTEXT:
15
+ {context}
16
+
17
+ USER QUESTION:
18
+ {question}
 
19
  """