Spaces:

IPTS-PRODDEV
/

AskNature_BioChat

Build error

App Files Files Community

Mohamed284 commited on Jan 15, 2025

Commit

7918f2a

verified ·

1 Parent(s): d2ff146

stanford-crfm/BioMedLM

Browse files

Files changed (1) hide show

app.py +31 -27

app.py CHANGED Viewed

@@ -1,26 +1,23 @@
 import os
 import json
 import pandas as pd
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from langchain_ollama import OllamaLLM, OllamaEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from operator import itemgetter
-from huggingface_hub import HfApi, HfFolder
 import gradio as gr
-from huggingface_hub import login, InferenceClient
-from langchain_community.embeddings import HuggingFaceEmbeddings  # Updated import
 USE_HF = True
-MODEL_NAME = "BioMistral/BioMistral-7B"
 with open('AskNatureNet_data.json', 'r', encoding='utf-8') as f:
     data = json.load(f)
 df = pd.DataFrame(data)
 documents = [
     f"Source: {item['Source']}\nApplication: {item['Application']}\nFunction1: {item['Function1']}\nStrategy: {item['Strategy']}"
     for item in data
@@ -28,31 +25,39 @@ documents = [
 if USE_HF:
     print("Using Hugging Face model...")
     huggingface_token = os.environ.get("AskNature_RAG")
-    # Load tokenizer and model from Hugging Face Hub
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=huggingface_token)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map="auto",
         offload_folder="offload",  # Specify the offload folder
-        token=huggingface_token
     )
     embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
     lang_model = model
 else:
-    print("Using local Ollama model...")
-    MODEL = "jsk/bio-mistral"
-    embeddings = OllamaEmbeddings(model=MODEL)
-    lang_model = OllamaLLM(model=MODEL)
-batch_size = 16
 batched_embeddings = [
-    embeddings.embed_documents(documents[i:i + batch_size])
-    for i in range(0, len(documents), batch_size)
 ]
 batched_embeddings = [embed for batch in batched_embeddings for embed in batch]
 index_path = "faiss_index"
 if os.path.exists(index_path):
     vectorstore = FAISS.load_local(index_path, embeddings)
@@ -62,30 +67,29 @@ else:
 retriever = vectorstore.as_retriever()
 template = """
 Answer the question based on the context below. If you can't
 answer the question, reply "I don't know".
 Context: {context}
 Question: {question}
 """
 prompt = PromptTemplate.from_template(template)
 chain = {
     "context": itemgetter("question") | retriever,
     "question": itemgetter("question"),
 } | prompt | lang_model | StrOutputParser()
 def rag_qa(question):
     try:
         return chain.invoke({'question': question})
     except Exception as e:
         return f"Error: {str(e)}"
-# Chatbot functionality
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -114,10 +118,10 @@ def respond(
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -135,4 +139,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import os
 import json
 import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from operator import itemgetter
 import gradio as gr
+from langchain_community.embeddings import HuggingFaceEmbeddings
+# Configuration
 USE_HF = True
+MODEL_NAME = "stanford-crfm/BioMedLM"
+BATCH_SIZE = 8  # Adjusted batch size for memory optimization
+# Load data
 with open('AskNatureNet_data.json', 'r', encoding='utf-8') as f:
     data = json.load(f)
 df = pd.DataFrame(data)
 documents = [
     f"Source: {item['Source']}\nApplication: {item['Application']}\nFunction1: {item['Function1']}\nStrategy: {item['Strategy']}"
     for item in data
 if USE_HF:
     print("Using Hugging Face model...")
     huggingface_token = os.environ.get("AskNature_RAG")
+    # Quantization configuration for 4-bit precision
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4"
+    )
+    # Load tokenizer and model with offloading and quantization
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=huggingface_token)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map="auto",
         offload_folder="offload",  # Specify the offload folder
+        quantization_config=bnb_config,
+        use_auth_token=huggingface_token
     )
     embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
     lang_model = model
 else:
+    print("Using local model...")
+    # Local model loading logic here
+# Generate embeddings in batches
 batched_embeddings = [
+    embeddings.embed_documents(documents[i:i + BATCH_SIZE])
+    for i in range(0, len(documents), BATCH_SIZE)
 ]
 batched_embeddings = [embed for batch in batched_embeddings for embed in batch]
+# FAISS index handling
 index_path = "faiss_index"
 if os.path.exists(index_path):
     vectorstore = FAISS.load_local(index_path, embeddings)
 retriever = vectorstore.as_retriever()
+# Prompt template
 template = """
 Answer the question based on the context below. If you can't
 answer the question, reply "I don't know".
 Context: {context}
 Question: {question}
 """
 prompt = PromptTemplate.from_template(template)
+# Chain definition
 chain = {
     "context": itemgetter("question") | retriever,
     "question": itemgetter("question"),
 } | prompt | lang_model | StrOutputParser()
+# Question-answering function
 def rag_qa(question):
     try:
         return chain.invoke({'question': question})
     except Exception as e:
         return f"Error: {str(e)}"
+# Gradio chatbot interface
 def respond(
     message,
     history: list[tuple[str, str]],
         top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
+# Gradio interface setup
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
 )
 if __name__ == "__main__":
+    demo.launch()