zoya-hammad commited on
Commit
39f090f
·
1 Parent(s): 552a4ed

updated app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -15,7 +15,7 @@ from langchain_chroma import Chroma
15
  from langchain.memory import ConversationBufferMemory
16
  from langchain.chains import ConversationalRetrievalChain
17
  from langchain_ollama import ChatOllama
18
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
19
  from langchain.llms import HuggingFacePipeline
20
  from langchain.memory import ConversationBufferMemory
21
  from langchain.chains import ConversationalRetrievalChain
@@ -28,6 +28,7 @@ from random import randint
28
  import shutil
29
  from dotenv import load_dotenv
30
  from huggingface_hub import login
 
31
 
32
  db_name = "vector_db"
33
  folder = "my-knowledge-base/"
@@ -37,9 +38,21 @@ HF_TOKEN = os.getenv("HF_TOKEN")
37
  if HF_TOKEN is None:
38
  raise ValueError("HF_TOKEN is not set. Check your .env file.")
39
  login(HF_TOKEN, add_to_git_credential=True)
40
- MODEL_NAME = "mistralai/Mistral-7B-4bit"
 
 
 
 
 
 
 
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
42
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto",use_auth_token=HF_TOKEN)
 
 
 
 
 
43
 
44
 
45
  def process_files(files):
 
15
  from langchain.memory import ConversationBufferMemory
16
  from langchain.chains import ConversationalRetrievalChain
17
  from langchain_ollama import ChatOllama
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
19
  from langchain.llms import HuggingFacePipeline
20
  from langchain.memory import ConversationBufferMemory
21
  from langchain.chains import ConversationalRetrievalChain
 
28
  import shutil
29
  from dotenv import load_dotenv
30
  from huggingface_hub import login
31
+ import torch
32
 
33
  db_name = "vector_db"
34
  folder = "my-knowledge-base/"
 
38
  if HF_TOKEN is None:
39
  raise ValueError("HF_TOKEN is not set. Check your .env file.")
40
  login(HF_TOKEN, add_to_git_credential=True)
41
+
42
+ MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
43
+ bnb_config = BitsAndBytesConfig(
44
+ load_in_4bit=True, # Set to 8-bit if needed
45
+ bnb_4bit_compute_dtype=torch.float16,
46
+ bnb_4bit_use_double_quant=True,
47
+ bnb_4bit_quant_type="nf4"
48
+ )
49
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ MODEL_NAME,
52
+ quantization_config=bnb_config,
53
+ device_map="auto",
54
+ use_auth_token=HF_TOKEN
55
+ )
56
 
57
 
58
  def process_files(files):