Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,10 @@ import re
|
|
| 4 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
-
import
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# --- Paths ---
|
| 10 |
CSV_FOLDER = "data"
|
|
@@ -16,16 +19,16 @@ d2 = pd.read_csv(f"{CSV_FOLDER}/dataset2_clean.csv")
|
|
| 16 |
d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
|
| 17 |
print("✅ CSVs loaded")
|
| 18 |
|
| 19 |
-
# --- Load FAISS
|
| 20 |
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
|
| 21 |
faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
|
| 22 |
print("✅ FAISS loaded")
|
| 23 |
|
| 24 |
-
# --- Load Mistral model ---
|
| 25 |
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 26 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 27 |
-
model = AutoModelForCausalLM.from_pretrained(MODEL_ID,
|
| 28 |
-
print("✅ Mistral model loaded")
|
| 29 |
|
| 30 |
# --- Property synonyms ---
|
| 31 |
property_synonyms = {
|
|
@@ -144,7 +147,7 @@ def query_hea(question, top_k=5):
|
|
| 144 |
for name, df_filtered in csv_results_dict.items():
|
| 145 |
csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
|
| 146 |
|
| 147 |
-
#
|
| 148 |
prompt = f"""
|
| 149 |
You are a materials scientist. Based on the following context, answer precisely.
|
| 150 |
FAISS context: {faiss_text}
|
|
@@ -154,7 +157,7 @@ Answer:
|
|
| 154 |
"""
|
| 155 |
|
| 156 |
# Tokenize and generate
|
| 157 |
-
inputs = tokenizer(prompt, return_tensors="pt").to(
|
| 158 |
outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
|
| 159 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 160 |
|
|
@@ -181,4 +184,3 @@ demo = gr.Interface(
|
|
| 181 |
)
|
| 182 |
|
| 183 |
demo.launch()
|
| 184 |
-
|
|
|
|
| 4 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# --- Hugging Face token for gated models ---
|
| 10 |
+
HF_TOKEN = os.environ["HF_TOKEN"]
|
| 11 |
|
| 12 |
# --- Paths ---
|
| 13 |
CSV_FOLDER = "data"
|
|
|
|
| 19 |
d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
|
| 20 |
print("✅ CSVs loaded")
|
| 21 |
|
| 22 |
+
# --- Load FAISS ---
|
| 23 |
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
|
| 24 |
faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
|
| 25 |
print("✅ FAISS loaded")
|
| 26 |
|
| 27 |
+
# --- Load Mistral model (CPU-friendly) ---
|
| 28 |
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
|
| 30 |
+
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
|
| 31 |
+
print("✅ Mistral model loaded on CPU")
|
| 32 |
|
| 33 |
# --- Property synonyms ---
|
| 34 |
property_synonyms = {
|
|
|
|
| 147 |
for name, df_filtered in csv_results_dict.items():
|
| 148 |
csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
|
| 149 |
|
| 150 |
+
# Prompt for Mistral
|
| 151 |
prompt = f"""
|
| 152 |
You are a materials scientist. Based on the following context, answer precisely.
|
| 153 |
FAISS context: {faiss_text}
|
|
|
|
| 157 |
"""
|
| 158 |
|
| 159 |
# Tokenize and generate
|
| 160 |
+
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
| 161 |
outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
|
| 162 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 163 |
|
|
|
|
| 184 |
)
|
| 185 |
|
| 186 |
demo.launch()
|
|
|