Spaces:

taradutt007
/

HEA_Query

Runtime error

App Files Files Community

taradutt007 commited on Sep 13, 2025

Commit

eb2b7d5

verified ·

1 Parent(s): dcaf215

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -4,7 +4,10 @@ import re
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
 # --- Paths ---
 CSV_FOLDER = "data"
@@ -16,16 +19,16 @@ d2 = pd.read_csv(f"{CSV_FOLDER}/dataset2_clean.csv")
 d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
 print("✅ CSVs loaded")
-# --- Load FAISS with dummy embeddings ---
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
 faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
 print("✅ FAISS loaded")
-# --- Load Mistral model ---
 MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto")
-print("✅ Mistral model loaded")
 # --- Property synonyms ---
 property_synonyms = {
@@ -144,7 +147,7 @@ def query_hea(question, top_k=5):
     for name, df_filtered in csv_results_dict.items():
         csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
-    # --- Prompt for Mistral ---
     prompt = f"""
 You are a materials scientist. Based on the following context, answer precisely.
 FAISS context: {faiss_text}
@@ -154,7 +157,7 @@ Answer:
 """
     # Tokenize and generate
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -181,4 +184,3 @@ demo = gr.Interface(
 )
 demo.launch()

 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from transformers import AutoTokenizer, AutoModelForCausalLM
+import os
+# --- Hugging Face token for gated models ---
+HF_TOKEN = os.environ["HF_TOKEN"]
 # --- Paths ---
 CSV_FOLDER = "data"
 d3 = pd.read_csv(f"{CSV_FOLDER}/dataset3_clean.csv")
 print("✅ CSVs loaded")
+# --- Load FAISS ---
 embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")
 faiss_index = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
 print("✅ FAISS loaded")
+# --- Load Mistral model (CPU-friendly) ---
 MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
+model = AutoModelForCausalLM.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
+print("✅ Mistral model loaded on CPU")
 # --- Property synonyms ---
 property_synonyms = {
     for name, df_filtered in csv_results_dict.items():
         csv_context += f"\n### {name} matches:\n{df_filtered.to_string(index=False)}\n"
+    # Prompt for Mistral
     prompt = f"""
 You are a materials scientist. Based on the following context, answer precisely.
 FAISS context: {faiss_text}
 """
     # Tokenize and generate
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
     outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.0)
     answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
 )
 demo.launch()