Spaces:

ginipick
/

EXAONE-3.0-7.8B-Instruct

Paused

vilarin commited on Jun 28, 2024

Commit

86bea01

verified ·

1 Parent(s): da59244

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import spaces
 import threading
 import time
@@ -18,26 +20,28 @@ OLLAMA_SERVICE_THREAD.start()
 print("Giving ollama serve a moment")
 time.sleep(10)
-subprocess.run("~/ollama pull gemma2", shell=True)
 import copy
 import gradio as gr
 from ollama import Client
-client = Client(host='http://localhost:11434', timeout=60)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
 MODEL_NAME = MODEL_ID.split("/")[-1]
-TITLE = "<h1><center>Chatbox</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 <center>
-<p>Gemma is the large language model built by Google.
 <br>
-Feel free to test without log.
 </p>
 </center>
 """
@@ -54,10 +58,10 @@ h3 {
 }
 """
 @spaces.GPU()
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
-    print(f'message is - {message}')
-    print(f'history is - {history}')
     conversation = []
     for prompt, answer in history:
         conversation.extend([
@@ -69,7 +73,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
     print(f"Conversation is -\n{conversation}")
     response = client.chat(
-        model="gemma2",
         messages=conversation,
         stream=True,
         options={

 import os
+#remove this if in CPU
 import spaces
 import threading
 import time
 print("Giving ollama serve a moment")
 time.sleep(10)
+# Modify the model to what you want
+model = "gemma2:27b"
+subprocess.run(f"~/ollama pull {model}", shell=True)
 import copy
 import gradio as gr
 from ollama import Client
+client = Client(host='http://localhost:11434', timeout=120)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-27b-it")
 MODEL_NAME = MODEL_ID.split("/")[-1]
+TITLE = "<h1><center>ollama-Chat</center></h1>"
 DESCRIPTION = f"""
 <h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
 <center>
+<p>Feel free to test models with ollama.
 <br>
+Easy to modify and running models even in CPU.
 </p>
 </center>
 """
 }
 """
+# Remove this if in CPU
 @spaces.GPU()
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
     conversation = []
     for prompt, answer in history:
         conversation.extend([
     print(f"Conversation is -\n{conversation}")
     response = client.chat(
+        model=model,
         messages=conversation,
         stream=True,
         options={