Al-Alcoba-Inciarte commited on
Commit
afc58c7
Β·
verified Β·
1 Parent(s): 75141a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -10
app.py CHANGED
@@ -1,21 +1,77 @@
1
  import gradio as gr
2
- from langchain_ollama import OllamaLLM
 
 
 
 
 
 
3
 
4
- llm = OllamaLLM(model="deepseek-r1:1.5b")
 
5
 
6
- def query_model(prompt: str) -> str:
7
- """
8
- Uses LangChain's Ollama integration to process the input prompt.
9
- """
10
- response = llm(prompt)
11
- return response
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  iface = gr.Interface(
14
  fn=query_model,
15
- inputs="text",
 
 
 
16
  outputs="text",
17
  title="Ollama via LangChain & Gradio",
18
- description="Enter a prompt to interact with the Ollama-based model.",
19
  flagging_dir="/app/flagged"
20
  )
21
 
 
1
  import gradio as gr
2
+ import subprocess
3
+ import time
4
+ import requests
5
+ import logging
6
+ from langchain_community.llms import Ollama
7
+ from langchain.callbacks.manager import CallbackManager
8
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
9
 
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
 
13
+ # Global cache to store loaded models
14
+ loaded_models = {}
 
 
 
 
15
 
16
+ # Function to check if Ollama is running
17
+ def check_ollama_running():
18
+ url = "http://127.0.0.1:11434/api/tags"
19
+ for _ in range(10): # Try for ~10 seconds
20
+ try:
21
+ response = requests.get(url, timeout=2)
22
+ if response.status_code == 200:
23
+ logger.info("Ollama is running.")
24
+ return True
25
+ except requests.exceptions.RequestException:
26
+ logger.warning("Ollama is not running yet. Retrying...")
27
+ time.sleep(1)
28
+ raise RuntimeError("Ollama is not running. Please check the server.")
29
+
30
+ # Function to pull a model if not already available
31
+ def pull_model(model_name):
32
+ try:
33
+ logger.info(f"Pulling model: {model_name}")
34
+ subprocess.run(["ollama", "pull", model_name], check=True)
35
+ logger.info(f"Model {model_name} pulled successfully.")
36
+ except subprocess.CalledProcessError as e:
37
+ logger.error(f"Failed to pull model {model_name}: {e}")
38
+ raise
39
+
40
+ # Function to get an LLM instance with streaming enabled
41
+ def get_llm(model_name):
42
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
43
+ return Ollama(model=model_name, base_url="http://127.0.0.1:11434", callback_manager=callback_manager)
44
+
45
+ # Function to check and load a model
46
+ def check_and_load_model(model_name):
47
+ if model_name in loaded_models:
48
+ logger.info(f"Model {model_name} is already loaded.")
49
+ return loaded_models[model_name]
50
+ pull_model(model_name) # Ensure the model is available
51
+ llm = get_llm(model_name)
52
+ loaded_models[model_name] = llm
53
+ return llm
54
+
55
+ # Function to handle Gradio input with streaming
56
+ def query_model(model_name, prompt):
57
+ check_ollama_running() # Ensure Ollama is running before making requests
58
+ llm = check_and_load_model(model_name)
59
+
60
+ response = ""
61
+ for token in llm.stream(prompt):
62
+ response += token
63
+ yield response # Stream the response to Gradio in real-time
64
+
65
+ # Define the Gradio interface
66
  iface = gr.Interface(
67
  fn=query_model,
68
+ inputs=[
69
+ gr.Dropdown(["deepseek-r1:1.5b", "mistral:7b"], label="Select Model"),
70
+ gr.Textbox(label="Enter your prompt")
71
+ ],
72
  outputs="text",
73
  title="Ollama via LangChain & Gradio",
74
+ description="Enter a prompt to interact with the Ollama-based model with streaming response.",
75
  flagging_dir="/app/flagged"
76
  )
77