import gradio as gr import requests import os # For Hugging Face Spaces deployment: # Set these environment variables in your Space settings: # OLLAMA_HOST: Your Ollama server URL # OLLAMA_MODEL: Model name (e.g., "llama2") # Get environment variables for deployment OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama2") API_URL = f"{OLLAMA_HOST}/api/generate" def generate_response(prompt): try: # Make request to local Ollama server response = requests.post( API_URL, json={ "model": OLLAMA_MODEL, "prompt": prompt, "stream": False } ) # Check for specific error cases if response.status_code == 404: return "Error: Ollama server not found. Make sure Ollama is running and OLLAMA_HOST is set correctly." elif response.status_code == 500: return "Error: Server error. Check if the model is loaded in Ollama and OLLAMA_MODEL is set correctly." response.raise_for_status() return response.json()['response'] except requests.exceptions.RequestException as e: return f"Error: {str(e)}" except Exception as e: return f"Unexpected error: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs=gr.Textbox(lines=10), title="Llama 2 Chat", description="Chat with Llama 2 model using local Ollama server. Enter your prompt below and get a response.", examples=[ ["What is artificial intelligence?"], ["Tell me a short story about a robot."], ["Explain quantum computing in simple terms."] ] ) # Launch the app if __name__ == "__main__": # For local development #demo.launch() # For Hugging Face Spaces: demo.launch(server_name="0.0.0.0", server_port=7860)