import os from typing import List, Tuple import gradio as gr from dotenv import load_dotenv from huggingface_hub import InferenceClient # Load environment variables from .env if it exists load_dotenv() HF_TOKEN = os.getenv("HF_TOKEN") HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct") HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip() SYSTEM_PROMPT = os.getenv( "HF_SYSTEM_PROMPT", "You are a concise and helpful AI assistant.", ) # Not strictly requiring HF_TOKEN at import time so that # the UI can still come up on Hugging Face Spaces. We will # surface a clear guidance message from within `respond` if # a token is missing. # Not creating a global client when we want dynamic model selection; we'll create per-call # Small, cloud-friendly model suggestions RECOMMENDED_MODELS = [ "Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ] def format_prompt(message: str, history: List[Tuple[str, str]]) -> str: conversation = [f"System: {SYSTEM_PROMPT}"] for user_msg, assistant_msg in history: if user_msg: conversation.append(f"User: {user_msg}") if assistant_msg: conversation.append(f"Assistant: {assistant_msg}") conversation.append(f"User: {message}") conversation.append("Assistant:") return "\n".join(conversation) def respond( message: str, history: List[Tuple[str, str]], model_id: str = HF_MODEL_ID, temperature: float = 0.7, max_new_tokens: int = 512, ): # If no token or endpoint configured, guide the user from the UI. if not HF_TOKEN and not HF_ENDPOINT_URL: yield ( "HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden" " 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)." ) return prompt = format_prompt(message, history) try: # Create client per request to honor selected model or endpoint if HF_ENDPOINT_URL: local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN) else: local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN) # Try streaming first accumulated = "" try: stream = local_client.text_generation( prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=0.95, stream=True, details=False, return_full_text=False, ) for chunk in stream: token_text = None # Newer huggingface_hub may return objects with .token.text if hasattr(chunk, "token") and getattr(chunk.token, "text", None): token_text = chunk.token.text # Fallback for dict responses if token_text is None and isinstance(chunk, dict): token = chunk.get("token") or {} token_text = token.get("text") or chunk.get("generated_text") # Fallback if a raw string is ever yielded if token_text is None and isinstance(chunk, str): token_text = chunk if token_text: accumulated += token_text yield accumulated except StopIteration: # Some servers may prematurely raise StopIteration; we'll fallback to non-streaming pass except Exception as stream_err: # Log and fallback to non-streaming print(f"[HF STREAM ERROR] {stream_err}") # Fallback: if nothing streamed, try a single-shot generation if not accumulated.strip(): try: result = local_client.text_generation( prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=0.95, stream=False, details=False, return_full_text=False, ) if isinstance(result, dict): text = result.get("generated_text", "") else: text = str(result) yield text if text.strip() else "Modelden cevap alınamadı." except Exception as nonstream_err: # Surface detailed error to the UI instead of a vague message err_text = str(nonstream_err).strip() response_text = "" if hasattr(nonstream_err, "response"): response = getattr(nonstream_err, "response") response_text = getattr(response, "text", "") or "" if response_text and response_text not in err_text: err_text = f"{err_text} | {response_text}".strip(" |") if not err_text: err_text = repr(nonstream_err) print(f"[HF NON-STREAM ERROR] {err_text}") yield f"Bir hata oluştu: {err_text}" except StopIteration: print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.") yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)." except Exception as err: # pragma: no cover - surface errors to UI err_text = str(err).strip() response_text = "" if hasattr(err, "response"): response = getattr(err, "response") response_text = getattr(response, "text", "") or "" if response_text and response_text not in err_text: err_text = f"{err_text} | {response_text}".strip(" |") if "model_not_supported" in err_text or "not supported" in err_text: yield ( "Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` " "değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin." ) return if not err_text: err_text = repr(err) print(f"[HF API ERROR] {err_text}") yield f"Bir hata oluştu: {err_text}" demo = gr.ChatInterface( respond, title="Gradio HF Agent", description=( "Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. " "Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz." ), theme="soft", additional_inputs=[ gr.Dropdown( label="Model ID", info="Hugging Face model repository adı", choices=RECOMMENDED_MODELS, value=HF_MODEL_ID, allow_custom_value=True, ), gr.Slider( label="Sıcaklık (temperature)", minimum=0.0, maximum=1.0, value=0.7, step=0.05, ), gr.Slider( label="Maksimum yeni token", minimum=16, maximum=1024, value=512, step=16, ), ], ) if __name__ == "__main__": demo.queue().launch()