Spaces:

Kukedlc
/

NeuralBeagle

Sleeping

App Files Files Community

Kukedlc commited on Feb 5, 2024

Commit

02bb326

verified ·

1 Parent(s): 4750cfb

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import json
+import subprocess
+import requests
+import time
+import socket
+import gradio as gr
+# Función para verificar si el servidor está activo en el puerto
+def is_server_active(host, port):
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex((host, port)) == 0
+# Descarga y ejecución del modelo
+url = "https://huggingface.co/TheBloke/firefly-llama2-13B-chat-GGUF/resolve/main/firefly-llama2-13b-chat.Q4_K_M.gguf?download=true"
+response = requests.get(url)
+with open("./model.gguf", mode="wb") as file:
+    file.write(response.content)
+print("Model downloaded")
+# Ejecutar el servidor LLM
+command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
+server_process = subprocess.Popen(command)  # Almacenamos el proceso para poder terminarlo más tarde
+print("Model server starting...")
+# Esperar a que el servidor esté activo
+while not is_server_active("0.0.0.0", 2600):
+    print("Waiting for server to start...")
+    time.sleep(5)
+print("Model server is ready!")
+def response(message, history):
+    url = "http://localhost:2600/v1/completions"
+    body = {"prompt": "[INST]"+message+"[/INST]", "max_tokens": 1024, "echo": False, "stream": False}
+    response_text = ""
+    try:
+        # Eliminado el timeout para esperar indefinidamente
+        with requests.post(url, json=body, stream=True) as stream_response:
+            for text_chunk in stream_response.iter_content(chunk_size=None):
+                text = text_chunk.decode('utf-8')
+                print("Respuesta cruda:", text)  # Imprimir la respuesta cruda para depuración
+                if text.startswith("data: "):
+                    text = text.replace("data: ", "")
+                if text.startswith("{") and "choices" in text:
+                    try:
+                        response_json = json.loads(text)
+                        part = response_json["choices"][0]["text"]
+                        print(part, end="", flush=True)
+                        response_text += part
+                    except json.JSONDecodeError as e:
+                        print("Error al decodificar JSON:", e)
+                        break
+                elif text.strip():
+                    print("Respuesta no JSON:", text)
+                    break
+    except requests.exceptions.RequestException as e:
+        print(f"Error al realizar la solicitud: {e}")
+    yield response_text
+def cleanup_server():
+    print("Closing server...")
+    server_process.terminate()  # Terminar el proceso del servidor
+    server_process.wait()  # Esperar a que el proceso termine
+    print("Server closed.")
+# Configurar y lanzar la interfaz de Gradio
+gr_interface = gr.ChatInterface(
+    fn=response,
+    title="Mistral-7B-Instruct-v0.2-GGUF Chatbot",
+    theme='syddharth/gray-minimal'
+)
+try:
+    gr_interface.launch(share=True)
+finally:
+    cleanup_server()  # Asegurarse de limpiar el servidor al finalizar