| import gradio as gr | |
| from ollama import chat, ChatResponse | |
| import subprocess | |
| import time | |
| model_id = "phi" | |
| def interact(message: str, history: list): | |
| message_dct = { | |
| "role": "user", | |
| "content": message | |
| } | |
| chat_history = [msg for msg in history] | |
| chat_history.append(message_dct) | |
| response: ChatResponse = chat( | |
| model=model_id, | |
| messages=chat_history, | |
| stream=True | |
| ) | |
| text_response = "" | |
| for chunk in response: | |
| bit = chunk["message"]["content"] | |
| text_response += bit | |
| yield text_response | |
| interface = gr.ChatInterface( | |
| fn=interact, | |
| type="messages", | |
| title="Microsoft Phi Chat Interface", | |
| description="Model: Microsoft Phi-2 (2.7B params)" | |
| ) | |
| print("\n\nStarting Ollama...\n\n") | |
| subprocess.Popen(["ollama", "serve"]) | |
| time.sleep(10) | |
| print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n") | |
| subprocess.run(["ollama", "pull", model_id]) | |
| time.sleep(5) | |
| print("\n\nMicrosoft Phi-2 started successfully!!\n\n") | |
| interface.launch(server_name="0.0.0.0", server_port=7860) |