File size: 5,282 Bytes
c70a68f 05842e6 03e9e48 cafa51a 8122fab 83e0d85 03e9e48 cafa51a 03e9e48 83e0d85 03e9e48 83e0d85 03e9e48 cafa51a 03e9e48 83e0d85 03e9e48 05842e6 03e9e48 05842e6 e09b290 05842e6 03e9e48 392818b 05842e6 392818b c70a68f 05842e6 c70a68f 03e9e48 392818b 6a5b011 392818b 6a5b011 03e9e48 6a5b011 392818b 6a5b011 392818b 03e9e48 392818b 03e9e48 8122fab 03e9e48 05842e6 03e9e48 6a5b011 e09b290 2f15e94 e09b290 2f15e94 e09b290 03e9e48 e09b290 03e9e48 6a5b011 05842e6 e09b290 83e0d85 05842e6 8122fab 03e9e48 8122fab 05842e6 03e9e48 05842e6 03e9e48 cafa51a 8122fab 03e9e48 cafa51a e09b290 cafa51a c70a68f cafa51a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import gradio as gr
import requests
import os
import subprocess
import time
from threading import Thread
import stat
import json # Still needed to parse the server's response, but not the model's content
# --- CONFIGURATION ---
MODEL_PATH = "./EmotionTextClassifierLLM.gguf"
SERVER_PATH = "./llama-b6264-bin-ubuntu-x64/build/bin/llama-server"
SERVER_URL = "http://127.0.0.1:8080" # Standard address for localhost
# --- 1. PREPARATION AND VERIFICATION (unchanged) ---
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"Model not found: {MODEL_PATH}")
if not os.path.exists(SERVER_PATH):
raise FileNotFoundError(f"Server not found: {SERVER_PATH}")
print(f"Making the file {SERVER_PATH} executable...")
st = os.stat(SERVER_PATH)
os.chmod(SERVER_PATH, st.st_mode | stat.S_IEXEC)
print("Binary made executable.")
# --- 2. SERVER STARTUP (unchanged) ---
def run_server():
command = [
SERVER_PATH, "-m", MODEL_PATH, "--host", "0.0.0.0", "--port", "8080", "-c", "2048", "-t", "4",
]
print(f"Launching command: {' '.join(command)}")
server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in iter(server_process.stdout.readline, b''):
print(f"[llama-server] {line.decode('utf-8').strip()}")
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()
# Health Check (unchanged, with translated prints)
print("Waiting for the llama.cpp server to start...")
server_ready = False
start_time = time.time()
while time.time() - start_time < 60:
try:
response = requests.get(f"{SERVER_URL}/health")
if response.status_code == 200 and response.json().get("status") == "ok":
print("Server is ready! (status: ok)")
server_ready = True
break
except requests.exceptions.ConnectionError:
pass
time.sleep(1)
if not server_ready:
raise RuntimeError("The llama.cpp server could not start. Check the logs.")
# --- 3. INFERENCE FUNCTION (MODIFIED FOR MARKDOWN OUTPUT) ---
def get_emotion_from_server(user_input):
if not user_input or not user_input.strip():
# Return an empty string if the input is empty, as the output is Markdown
return ""
# Step 1: Manually reconstruct the prompt following the Jinja logic
# Note: bos_token is usually handled by the server, we don't need to add it manually.
system_block = """<start_of_turn>system
You are an emotion classification assistant. Your task is to analyze ALL given sentence and classify it emotions chosen from Contentment, Joy, Euphoria, Excitement, Disappointment, Sadness, Regret, Irritation, Frustration, Anger, Anxiety, Fear, Astonishment, Disgust, Hate, Pleasure, Desire, Affection, Trust, Distrust, Gratitude, Compassion, Admiration, Contempt, Guilt, Shame, Pride, Jealousy, Envy, Hope, Nostalgia, Relief, Curiosity, Boredom, Neutral, fatigue, Trust You can choose one or several emotions follow this format
```json
{
"emotions": [ " "
],
"explanation": "This is the explanation related to the listed emotions."
}
```
begin<end_of_turn>
"""
user_block = f"<start_of_turn>user\n{user_input.strip()}<end_of_turn>\n"
model_prompt_block = "<start_of_turn>model\n"
final_prompt = system_block + user_block + model_prompt_block
# Step 2: Use the /completion endpoint with the full prompt
endpoint = f"{SERVER_URL}/completion"
payload = {
"prompt": final_prompt,
"stream": False,
"n_predict": 256, # Give enough space for a JSON-like structure with an explanation
"stop": ["<end_of_turn>"] # Stop generation if the model produces this token
}
try:
response = requests.post(endpoint, json=payload, timeout=30)
response.raise_for_status()
response_json = response.json()
# Step 3: Extract the raw content and return it directly for Markdown rendering
# No need to parse the content as JSON anymore. The Markdown component will display it as is.
content = response_json.get("content", "").strip()
return content
except requests.exceptions.RequestException as e:
print(f"Error querying the server: {e}")
# Return a Markdown-formatted error message
return f"**Error:** The request to the server failed.\n\n```\n{e}\n```"
# --- 4. GRADIO INTERFACE (TRANSLATED AND MODIFIED FOR MARKDOWN) ---
print("Launching Gradio interface...")
demo = gr.Interface(
fn=get_emotion_from_server,
inputs=gr.Textbox(lines=5, label="Sentence to analyze", placeholder="Write a sentence here..."),
outputs=gr.Markdown(label="Emotion Analysis"), # Use the Markdown component
title="Emotion Analyzer - GGUF with Specific Template",
description="This Space manually applies the model's exact Jinja template to ensure correct classification and returns the raw model output in Markdown format.",
examples=[
["I am so incredibly happy, I just got the promotion I've been working for all year!"],
["He completely ignored my work and presented it as his own. I am beyond furious."],
["Watching that old movie brought back so many memories of my childhood."],
]
)
if __name__ == "__main__":
demo.launch() |