gradiomkine / app.py
efecelik's picture
Create app.py
e3fccea verified
import os
from typing import List, Tuple
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
# Load environment variables from .env if it exists
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct")
HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
SYSTEM_PROMPT = os.getenv(
"HF_SYSTEM_PROMPT",
"You are a concise and helpful AI assistant.",
)
# Not strictly requiring HF_TOKEN at import time so that
# the UI can still come up on Hugging Face Spaces. We will
# surface a clear guidance message from within `respond` if
# a token is missing.
# Not creating a global client when we want dynamic model selection; we'll create per-call
# Small, cloud-friendly model suggestions
RECOMMENDED_MODELS = [
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
]
def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
conversation = [f"System: {SYSTEM_PROMPT}"]
for user_msg, assistant_msg in history:
if user_msg:
conversation.append(f"User: {user_msg}")
if assistant_msg:
conversation.append(f"Assistant: {assistant_msg}")
conversation.append(f"User: {message}")
conversation.append("Assistant:")
return "\n".join(conversation)
def respond(
message: str,
history: List[Tuple[str, str]],
model_id: str = HF_MODEL_ID,
temperature: float = 0.7,
max_new_tokens: int = 512,
):
# If no token or endpoint configured, guide the user from the UI.
if not HF_TOKEN and not HF_ENDPOINT_URL:
yield (
"HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden"
" 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)."
)
return
prompt = format_prompt(message, history)
try:
# Create client per request to honor selected model or endpoint
if HF_ENDPOINT_URL:
local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN)
else:
local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN)
# Try streaming first
accumulated = ""
try:
stream = local_client.text_generation(
prompt=prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=0.95,
stream=True,
details=False,
return_full_text=False,
)
for chunk in stream:
token_text = None
# Newer huggingface_hub may return objects with .token.text
if hasattr(chunk, "token") and getattr(chunk.token, "text", None):
token_text = chunk.token.text
# Fallback for dict responses
if token_text is None and isinstance(chunk, dict):
token = chunk.get("token") or {}
token_text = token.get("text") or chunk.get("generated_text")
# Fallback if a raw string is ever yielded
if token_text is None and isinstance(chunk, str):
token_text = chunk
if token_text:
accumulated += token_text
yield accumulated
except StopIteration:
# Some servers may prematurely raise StopIteration; we'll fallback to non-streaming
pass
except Exception as stream_err:
# Log and fallback to non-streaming
print(f"[HF STREAM ERROR] {stream_err}")
# Fallback: if nothing streamed, try a single-shot generation
if not accumulated.strip():
try:
result = local_client.text_generation(
prompt=prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
top_p=0.95,
stream=False,
details=False,
return_full_text=False,
)
if isinstance(result, dict):
text = result.get("generated_text", "")
else:
text = str(result)
yield text if text.strip() else "Modelden cevap alınamadı."
except Exception as nonstream_err:
# Surface detailed error to the UI instead of a vague message
err_text = str(nonstream_err).strip()
response_text = ""
if hasattr(nonstream_err, "response"):
response = getattr(nonstream_err, "response")
response_text = getattr(response, "text", "") or ""
if response_text and response_text not in err_text:
err_text = f"{err_text} | {response_text}".strip(" |")
if not err_text:
err_text = repr(nonstream_err)
print(f"[HF NON-STREAM ERROR] {err_text}")
yield f"Bir hata oluştu: {err_text}"
except StopIteration:
print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.")
yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)."
except Exception as err: # pragma: no cover - surface errors to UI
err_text = str(err).strip()
response_text = ""
if hasattr(err, "response"):
response = getattr(err, "response")
response_text = getattr(response, "text", "") or ""
if response_text and response_text not in err_text:
err_text = f"{err_text} | {response_text}".strip(" |")
if "model_not_supported" in err_text or "not supported" in err_text:
yield (
"Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` "
"değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin."
)
return
if not err_text:
err_text = repr(err)
print(f"[HF API ERROR] {err_text}")
yield f"Bir hata oluştu: {err_text}"
demo = gr.ChatInterface(
respond,
title="Gradio HF Agent",
description=(
"Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. "
"Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz."
),
theme="soft",
additional_inputs=[
gr.Dropdown(
label="Model ID",
info="Hugging Face model repository adı",
choices=RECOMMENDED_MODELS,
value=HF_MODEL_ID,
allow_custom_value=True,
),
gr.Slider(
label="Sıcaklık (temperature)",
minimum=0.0,
maximum=1.0,
value=0.7,
step=0.05,
),
gr.Slider(
label="Maksimum yeni token",
minimum=16,
maximum=1024,
value=512,
step=16,
),
],
)
if __name__ == "__main__":
demo.queue().launch()