Spaces:
Sleeping
Sleeping
File size: 2,993 Bytes
9fb8183 c33198d c76bdcd 9fb8183 c76bdcd 9fb8183 c76bdcd c33198d 9fb8183 06e12a0 9fb8183 b4c9cb7 c76bdcd 9fb8183 c76bdcd 20e8d3e c76bdcd 9fb8183 c76bdcd d15ab26 c76bdcd 06e12a0 c76bdcd 06e12a0 c76bdcd 9fb8183 c76bdcd b4c9cb7 06e12a0 b4c9cb7 c76bdcd 06e12a0 c76bdcd 9fb8183 06e12a0 803a159 c76bdcd 06e12a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import json
import subprocess
import requests
import time
import socket
import gradio as gr
# Funci贸n para verificar si el servidor est谩 activo en el puerto
def is_server_active(host, port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex((host, port)) == 0
# Descarga y ejecuci贸n del modelo
url = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_0.gguf?download=true"
response = requests.get(url)
with open("./model.gguf", mode="wb") as file:
file.write(response.content)
print("Model downloaded")
# Ejecutar el servidor LLM
command = ["python3", "-m", "llama_cpp.server", "--model", "./model.gguf", "--host", "0.0.0.0", "--port", "2600", "--n_threads", "2"]
server_process = subprocess.Popen(command) # Almacenamos el proceso para poder terminarlo m谩s tarde
print("Model server starting...")
# Esperar a que el servidor est茅 activo
while not is_server_active("0.0.0.0", 2600):
print("Waiting for server to start...")
time.sleep(5)
print("Model server is ready!")
def response(message, history):
url = "http://localhost:2600/v1/completions"
body = {"prompt": "[INST]"+message+"[/INST]", "max_tokens": 1024, "echo": False, "stream": False}
response_text = ""
try:
# Eliminado el timeout para esperar indefinidamente
with requests.post(url, json=body, stream=True) as stream_response:
for text_chunk in stream_response.iter_content(chunk_size=None):
text = text_chunk.decode('utf-8')
print("Respuesta cruda:", text) # Imprimir la respuesta cruda para depuraci贸n
if text.startswith("data: "):
text = text.replace("data: ", "")
if text.startswith("{") and "choices" in text:
try:
response_json = json.loads(text)
part = response_json["choices"][0]["text"]
print(part, end="", flush=True)
response_text += part
except json.JSONDecodeError as e:
print("Error al decodificar JSON:", e)
break
elif text.strip():
print("Respuesta no JSON:", text)
break
except requests.exceptions.RequestException as e:
print(f"Error al realizar la solicitud: {e}")
yield response_text
def cleanup_server():
print("Closing server...")
server_process.terminate() # Terminar el proceso del servidor
server_process.wait() # Esperar a que el proceso termine
print("Server closed.")
# Configurar y lanzar la interfaz de Gradio
gr_interface = gr.ChatInterface(
fn=response,
title="Mistral-7B-Instruct-v0.2-GGUF Chatbot",
theme='syddharth/gray-minimal'
)
try:
gr_interface.launch(share=True)
finally:
cleanup_server() # Asegurarse de limpiar el servidor al finalizar
|