Spaces:

Al-Alcoba-Inciarte
/

D-PII-Study

Runtime error

App Files Files Community

D-PII-Study / app.py

Al-Alcoba-Inciarte

Update app.py

557ff8c verified 12 months ago

raw

history blame contribute delete

2.6 kB

	import gradio as gr
	import subprocess
	import requests
	import time
	import logging
	from langchain_community.llms import Ollama
	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Cache for loaded models
	loaded_models = {}

	def check_ollama_running():
	"""Wait until Ollama is fully ready."""
	url = "http://127.0.0.1:11434/api/tags"
	for _ in range(10): # Try for ~10 seconds
	try:
	response = requests.get(url, timeout=2)
	if response.status_code == 200:
	logger.info("Ollama is running.")
	return True
	except requests.exceptions.RequestException:
	logger.warning("Waiting for Ollama to start...")
	time.sleep(2)
	raise RuntimeError("Ollama is not running. Please check the server.")

	def pull_model(model_name):
	"""Ensure the model is available before use."""
	if model_name in loaded_models:
	logger.info(f"Model {model_name} is already loaded.")
	return
	try:
	subprocess.run(["ollama", "pull", model_name], check=True)
	logger.info(f"Model {model_name} pulled successfully.")
	loaded_models[model_name] = True
	except subprocess.CalledProcessError as e:
	logger.error(f"Failed to pull model {model_name}: {e}")
	raise

	def get_llm(model_name):
	"""Get an LLM instance with streaming enabled."""
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
	return Ollama(model=model_name, base_url="http://127.0.0.1:11434", callback_manager=callback_manager)

	def query_model(model_name, prompt):
	"""Generate responses from the model with streaming."""
	check_ollama_running() # Ensure Ollama is ready
	pull_model(model_name) # Make sure the model is available
	llm = get_llm(model_name) # Load the model

	response = ""
	for token in llm.stream(prompt):
	response += token
	yield response # Stream response in real-time

	# Define Gradio interface
	iface = gr.Interface(
	fn=query_model,
	inputs=[
	gr.Dropdown(["deepseek-r1:1.5b", "mistral:7b"], label="Select Model"),
	gr.Textbox(label="Enter your prompt")
	],
	outputs="text",
	title="Ollama via LangChain & Gradio",
	description="Enter a prompt to interact with the Ollama-based model with streaming response.",
	flagging_dir="/app/flagged"
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)