Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import subprocess | |
| import requests | |
| import time | |
| import logging | |
| from langchain_community.llms import Ollama | |
| from langchain.callbacks.manager import CallbackManager | |
| from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Cache for loaded models | |
| loaded_models = {} | |
| def check_ollama_running(): | |
| """Wait until Ollama is fully ready.""" | |
| url = "http://127.0.0.1:11434/api/tags" | |
| for _ in range(10): # Try for ~10 seconds | |
| try: | |
| response = requests.get(url, timeout=2) | |
| if response.status_code == 200: | |
| logger.info("Ollama is running.") | |
| return True | |
| except requests.exceptions.RequestException: | |
| logger.warning("Waiting for Ollama to start...") | |
| time.sleep(2) | |
| raise RuntimeError("Ollama is not running. Please check the server.") | |
| def pull_model(model_name): | |
| """Ensure the model is available before use.""" | |
| if model_name in loaded_models: | |
| logger.info(f"Model {model_name} is already loaded.") | |
| return | |
| try: | |
| subprocess.run(["ollama", "pull", model_name], check=True) | |
| logger.info(f"Model {model_name} pulled successfully.") | |
| loaded_models[model_name] = True | |
| except subprocess.CalledProcessError as e: | |
| logger.error(f"Failed to pull model {model_name}: {e}") | |
| raise | |
| def get_llm(model_name): | |
| """Get an LLM instance with streaming enabled.""" | |
| callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
| return Ollama(model=model_name, base_url="http://127.0.0.1:11434", callback_manager=callback_manager) | |
| def query_model(model_name, prompt): | |
| """Generate responses from the model with streaming.""" | |
| check_ollama_running() # Ensure Ollama is ready | |
| pull_model(model_name) # Make sure the model is available | |
| llm = get_llm(model_name) # Load the model | |
| response = "" | |
| for token in llm.stream(prompt): | |
| response += token | |
| yield response # Stream response in real-time | |
| # Define Gradio interface | |
| iface = gr.Interface( | |
| fn=query_model, | |
| inputs=[ | |
| gr.Dropdown(["deepseek-r1:1.5b", "mistral:7b"], label="Select Model"), | |
| gr.Textbox(label="Enter your prompt") | |
| ], | |
| outputs="text", | |
| title="Ollama via LangChain & Gradio", | |
| description="Enter a prompt to interact with the Ollama-based model with streaming response.", | |
| flagging_dir="/app/flagged" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |