taradutt007 commited on
Commit
eb2b7d5
·
verified ·
1 Parent(s): dcaf215

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -4,7 +4,10 @@ import re
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
- import torch
 
 
 
8
 
9
  # --- Paths ---
10
  CSV_FOLDER = "data"
@@ -16,16 +19,16 @@ d2 = pd.read_csv(f"{CSV_FOLDER}/dataset2_clean.csv")
16
  d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
17
  print("✅ CSVs loaded")
18
 
19
- # --- Load FAISS with dummy embeddings ---
20
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
21
  faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
22
  print("✅ FAISS loaded")
23
 
24
- # --- Load Mistral model ---
25
  MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
26
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
27
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto")
28
- print("✅ Mistral model loaded")
29
 
30
  # --- Property synonyms ---
31
  property_synonyms = {
@@ -144,7 +147,7 @@ def query_hea(question, top_k=5):
144
  for name, df_filtered in csv_results_dict.items():
145
  csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
146
 
147
- # --- Prompt for Mistral ---
148
  prompt = f"""
149
  You are a materials scientist. Based on the following context, answer precisely.
150
  FAISS context: {faiss_text}
@@ -154,7 +157,7 @@ Answer:
154
  """
155
 
156
  # Tokenize and generate
157
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
158
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
159
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
160
 
@@ -181,4 +184,3 @@ demo = gr.Interface(
181
  )
182
 
183
  demo.launch()
184
-
 
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import os
8
+
9
+ # --- Hugging Face token for gated models ---
10
+ HF_TOKEN = os.environ["HF_TOKEN"]
11
 
12
  # --- Paths ---
13
  CSV_FOLDER = "data"
 
19
  d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
20
  print("✅ CSVs loaded")
21
 
22
+ # --- Load FAISS ---
23
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
24
  faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
25
  print("✅ FAISS loaded")
26
 
27
+ # --- Load Mistral model (CPU-friendly) ---
28
  MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
29
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
30
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
31
+ print("✅ Mistral model loaded on CPU")
32
 
33
  # --- Property synonyms ---
34
  property_synonyms = {
 
147
  for name, df_filtered in csv_results_dict.items():
148
  csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
149
 
150
+ # Prompt for Mistral
151
  prompt = f"""
152
  You are a materials scientist. Based on the following context, answer precisely.
153
  FAISS context: {faiss_text}
 
157
  """
158
 
159
  # Tokenize and generate
160
+ inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
161
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
162
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
163
 
 
184
  )
185
 
186
  demo.launch()