Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 29

Commit

a30b7e0

1 Parent(s): 355c5a2

Download model before running workflow (try 2)

Browse files

Files changed (2) hide show

app.py +6 -2
main.py +22 -2

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from graph import BuildGraph
 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
 from dotenv import load_dotenv
-from main import openai_model, model_id
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
 import requests
@@ -82,7 +82,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
-                duration=15,
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
@@ -211,6 +210,11 @@ def to_workflow(request: gr.Request, *args):
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
         # Call the workflow function with the @spaces.GPU decorator
         for value in run_workflow_local(*new_args):
             yield value

 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
 from dotenv import load_dotenv
+from main import openai_model, model_id, DownloadChatModel
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
 import requests
         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
+        # If graph hasn't been instantiated, download model before running workflow
+        graph = graph_instances[compute_mode].get(request.session_hash)
+        if graph is None:
+            gr.Info("Downloading model, please wait", title="Downloading model...")
+            DownloadChatModel()
         # Call the workflow function with the @spaces.GPU decorator
         for value in run_workflow_local(*new_args):
             yield value

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langchain_core.output_parsers import StrOutputParser
 from langgraph.checkpoint.memory import MemorySaver
 from langchain_core.messages import ToolMessage
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from datetime import datetime
 from dotenv import load_dotenv
 import os
@@ -128,6 +129,16 @@ def ProcessDirectory(path, compute_mode):
             print(f"Chroma: no change for {file_path}")
 def GetChatModel(compute_mode):
     """
     Get a chat model.
@@ -146,11 +157,20 @@ def GetChatModel(compute_mode):
         if compute_mode == "local" and not torch.cuda.is_available():
             raise Exception("Local chat model selected without GPU")
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
-            model_id,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )

 from langgraph.checkpoint.memory import MemorySaver
 from langchain_core.messages import ToolMessage
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from huggingface_hub import snapshot_download
 from datetime import datetime
 from dotenv import load_dotenv
 import os
             print(f"Chroma: no change for {file_path}")
+def DownloadChatModel():
+    """
+    Downloads a chat model to a local directory.
+    """
+    # Local directory is "./<repo_name>"
+    repo_name = model_id.split("/")[-1]
+    local_dir = f"./{repo_name}"
+    snapshot_download(model_id, local_dir=local_dir)
 def GetChatModel(compute_mode):
     """
     Get a chat model.
         if compute_mode == "local" and not torch.cuda.is_available():
             raise Exception("Local chat model selected without GPU")
+        # Use local directory for model if it exists
+        repo_name = model_id.split("/")[-1]
+        local_dir = f"./{repo_name}"
+        if os.path.isdir(local_dir):
+            print("Using local directory for model")
+            id_or_dir = local_dir
+        else:
+            id_or_dir = model_id
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
+        tokenizer = AutoTokenizer.from_pretrained(id_or_dir)
         model = AutoModelForCausalLM.from_pretrained(
+            id_or_dir,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )