meryem1232 commited on
Commit
5b6a117
·
verified ·
1 Parent(s): d0814ee

Create apptest.py

Browse files
Files changed (1) hide show
  1. apptest.py +136 -0
apptest.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForCausalLM,
6
+ BitsAndBytesConfig,
7
+ pipeline
8
+ )
9
+
10
+ from transformers import BitsAndBytesConfig
11
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
12
+ from langchain.vectorstores import FAISS
13
+
14
+ from langchain.prompts import PromptTemplate
15
+ from langchain.schema.runnable import RunnablePassthrough
16
+ from langchain.llms import HuggingFacePipeline
17
+ from langchain.chains import LLMChain
18
+ import transformers
19
+
20
+ import transformers
21
+ model_name='mistralai/Mistral-7B-Instruct-v0.1'
22
+ from huggingface_hub import login
23
+ login(token = 'HF_TOKEN')
24
+ model_config = transformers.AutoConfig.from_pretrained(
25
+ model_name,
26
+ )
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
29
+ tokenizer.pad_token = tokenizer.eos_token
30
+ tokenizer.padding_side = "right"
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
33
+ tokenizer.pad_token = tokenizer.eos_token
34
+ tokenizer.padding_side = "right"
35
+ #################################################################
36
+ # bitsandbytes parameters
37
+ #################################################################
38
+
39
+ # Activate 4-bit precision base model loading
40
+ use_4bit = True
41
+
42
+ # Compute dtype for 4-bit base models
43
+ bnb_4bit_compute_dtype = "float16"
44
+
45
+ # Quantization type (fp4 or nf4)
46
+ bnb_4bit_quant_type = "nf4"
47
+
48
+ # Activate nested quantization for 4-bit base models (double quantization)
49
+ use_nested_quant = False
50
+ #################################################################
51
+ # Set up quantization config
52
+ #################################################################
53
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
54
+
55
+ bnb_config = BitsAndBytesConfig(
56
+ load_in_4bit=use_4bit,
57
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
58
+ bnb_4bit_compute_dtype=compute_dtype,
59
+ bnb_4bit_use_double_quant=use_nested_quant,
60
+ )
61
+ #############################################################
62
+ # Load pre-trained config
63
+ #################################################################
64
+ model = AutoModelForCausalLM.from_pretrained(
65
+ model_name,
66
+ quantization_config=bnb_config,
67
+ )
68
+ # Connect query to FAISS index using a retriever
69
+ retriever = db.as_retriever(
70
+ search_type="mmr",
71
+ search_kwargs={'k': 1}
72
+ )
73
+ from langchain.llms import HuggingFacePipeline
74
+ from langchain.prompts import PromptTemplate
75
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
76
+
77
+ text_generation_pipeline = transformers.pipeline(
78
+ model=model,
79
+ tokenizer=tokenizer,
80
+ task="text-generation",
81
+
82
+ temperature=0.02,
83
+ repetition_penalty=1.1,
84
+ return_full_text=True,
85
+ max_new_tokens=512,
86
+ )
87
+
88
+ prompt_template = """
89
+ ### [INST]
90
+ Instruction: You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided without using prior knowledge.You answer in FRENCH
91
+ Analyse carefully the context and provide a direct answer based on the context.
92
+ Answer in french only
93
+ {context}
94
+ Vous devez répondre aux questions en français.
95
+
96
+ ### QUESTION:
97
+ {question}
98
+ [/INST]
99
+ Answer in french only
100
+ Vous devez répondre aux questions en français.
101
+
102
+ """
103
+
104
+ mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
105
+
106
+ # Create prompt from prompt template
107
+ prompt = PromptTemplate(
108
+ input_variables=["question"],
109
+ template=prompt_template,
110
+ )
111
+
112
+ # Create llm chain
113
+ llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
114
+ from langchain.chains import RetrievalQA
115
+
116
+
117
+ retriever.search_kwargs = {'k':1}
118
+ qa = RetrievalQA.from_chain_type(
119
+ llm=mistral_llm,
120
+ chain_type="stuff",
121
+ retriever=retriever,
122
+ chain_type_kwargs={"prompt": prompt},
123
+ )
124
+
125
+ import gradio as gr
126
+ def qna_chatbot(message, history):
127
+
128
+ res = qa(message)
129
+ answer = res["result"]
130
+ return answer
131
+
132
+
133
+ chat_interface = gr.ChatInterface(qna_chatbot)
134
+
135
+ if __name__ == "__main__":
136
+ chat_interface.launch(debug=True)