Spaces:

efecelik
/

gradiomkine

Sleeping

App Files Files Community

gradiomkine / app.py

efecelik

Create app.py

e3fccea verified about 1 month ago

raw

history blame contribute delete

7.19 kB

	import os
	from typing import List, Tuple

	import gradio as gr
	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	# Load environment variables from .env if it exists
	load_dotenv()

	HF_TOKEN = os.getenv("HF_TOKEN")
	HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct")
	HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
	SYSTEM_PROMPT = os.getenv(
	"HF_SYSTEM_PROMPT",
	"You are a concise and helpful AI assistant.",
	)

	# Not strictly requiring HF_TOKEN at import time so that
	# the UI can still come up on Hugging Face Spaces. We will
	# surface a clear guidance message from within `respond` if
	# a token is missing.

	# Not creating a global client when we want dynamic model selection; we'll create per-call

	# Small, cloud-friendly model suggestions
	RECOMMENDED_MODELS = [
	"Qwen/Qwen2.5-1.5B-Instruct",
	"Qwen/Qwen2.5-3B-Instruct",
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	]


	def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
	conversation = [f"System: {SYSTEM_PROMPT}"]
	for user_msg, assistant_msg in history:
	if user_msg:
	conversation.append(f"User: {user_msg}")
	if assistant_msg:
	conversation.append(f"Assistant: {assistant_msg}")
	conversation.append(f"User: {message}")
	conversation.append("Assistant:")
	return "\n".join(conversation)


	def respond(
	message: str,
	history: List[Tuple[str, str]],
	model_id: str = HF_MODEL_ID,
	temperature: float = 0.7,
	max_new_tokens: int = 512,
	):
	# If no token or endpoint configured, guide the user from the UI.
	if not HF_TOKEN and not HF_ENDPOINT_URL:
	yield (
	"HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden"
	" 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)."
	)
	return
	prompt = format_prompt(message, history)
	try:
	# Create client per request to honor selected model or endpoint
	if HF_ENDPOINT_URL:
	local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN)
	else:
	local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN)

	# Try streaming first
	accumulated = ""
	try:
	stream = local_client.text_generation(
	prompt=prompt,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=0.95,
	stream=True,
	details=False,
	return_full_text=False,
	)
	for chunk in stream:
	token_text = None
	# Newer huggingface_hub may return objects with .token.text
	if hasattr(chunk, "token") and getattr(chunk.token, "text", None):
	token_text = chunk.token.text
	# Fallback for dict responses
	if token_text is None and isinstance(chunk, dict):
	token = chunk.get("token") or {}
	token_text = token.get("text") or chunk.get("generated_text")
	# Fallback if a raw string is ever yielded
	if token_text is None and isinstance(chunk, str):
	token_text = chunk

	if token_text:
	accumulated += token_text
	yield accumulated
	except StopIteration:
	# Some servers may prematurely raise StopIteration; we'll fallback to non-streaming
	pass
	except Exception as stream_err:
	# Log and fallback to non-streaming
	print(f"[HF STREAM ERROR] {stream_err}")

	# Fallback: if nothing streamed, try a single-shot generation
	if not accumulated.strip():
	try:
	result = local_client.text_generation(
	prompt=prompt,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=0.95,
	stream=False,
	details=False,
	return_full_text=False,
	)
	if isinstance(result, dict):
	text = result.get("generated_text", "")
	else:
	text = str(result)
	yield text if text.strip() else "Modelden cevap alınamadı."
	except Exception as nonstream_err:
	# Surface detailed error to the UI instead of a vague message
	err_text = str(nonstream_err).strip()
	response_text = ""
	if hasattr(nonstream_err, "response"):
	response = getattr(nonstream_err, "response")
	response_text = getattr(response, "text", "") or ""
	if response_text and response_text not in err_text:
	err_text = f"{err_text} \| {response_text}".strip(" \|")
	if not err_text:
	err_text = repr(nonstream_err)
	print(f"[HF NON-STREAM ERROR] {err_text}")
	yield f"Bir hata oluştu: {err_text}"
	except StopIteration:
	print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.")
	yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)."
	except Exception as err: # pragma: no cover - surface errors to UI
	err_text = str(err).strip()
	response_text = ""
	if hasattr(err, "response"):
	response = getattr(err, "response")
	response_text = getattr(response, "text", "") or ""
	if response_text and response_text not in err_text:
	err_text = f"{err_text} \| {response_text}".strip(" \|")
	if "model_not_supported" in err_text or "not supported" in err_text:
	yield (
	"Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` "
	"değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin."
	)
	return
	if not err_text:
	err_text = repr(err)
	print(f"[HF API ERROR] {err_text}")
	yield f"Bir hata oluştu: {err_text}"


	demo = gr.ChatInterface(
	respond,
	title="Gradio HF Agent",
	description=(
	"Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. "
	"Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz."
	),
	theme="soft",
	additional_inputs=[
	gr.Dropdown(
	label="Model ID",
	info="Hugging Face model repository adı",
	choices=RECOMMENDED_MODELS,
	value=HF_MODEL_ID,
	allow_custom_value=True,
	),
	gr.Slider(
	label="Sıcaklık (temperature)",
	minimum=0.0,
	maximum=1.0,
	value=0.7,
	step=0.05,
	),
	gr.Slider(
	label="Maksimum yeni token",
	minimum=16,
	maximum=1024,
	value=512,
	step=16,
	),
	],
	)

	if __name__ == "__main__":
	demo.queue().launch()