Spaces:
Sleeping
Sleeping
| import os | |
| from typing import List, Tuple | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables from .env if it exists | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct") | |
| HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip() | |
| SYSTEM_PROMPT = os.getenv( | |
| "HF_SYSTEM_PROMPT", | |
| "You are a concise and helpful AI assistant.", | |
| ) | |
| # Not strictly requiring HF_TOKEN at import time so that | |
| # the UI can still come up on Hugging Face Spaces. We will | |
| # surface a clear guidance message from within `respond` if | |
| # a token is missing. | |
| # Not creating a global client when we want dynamic model selection; we'll create per-call | |
| # Small, cloud-friendly model suggestions | |
| RECOMMENDED_MODELS = [ | |
| "Qwen/Qwen2.5-1.5B-Instruct", | |
| "Qwen/Qwen2.5-3B-Instruct", | |
| "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| ] | |
| def format_prompt(message: str, history: List[Tuple[str, str]]) -> str: | |
| conversation = [f"System: {SYSTEM_PROMPT}"] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| conversation.append(f"User: {user_msg}") | |
| if assistant_msg: | |
| conversation.append(f"Assistant: {assistant_msg}") | |
| conversation.append(f"User: {message}") | |
| conversation.append("Assistant:") | |
| return "\n".join(conversation) | |
| def respond( | |
| message: str, | |
| history: List[Tuple[str, str]], | |
| model_id: str = HF_MODEL_ID, | |
| temperature: float = 0.7, | |
| max_new_tokens: int = 512, | |
| ): | |
| # If no token or endpoint configured, guide the user from the UI. | |
| if not HF_TOKEN and not HF_ENDPOINT_URL: | |
| yield ( | |
| "HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden" | |
| " 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)." | |
| ) | |
| return | |
| prompt = format_prompt(message, history) | |
| try: | |
| # Create client per request to honor selected model or endpoint | |
| if HF_ENDPOINT_URL: | |
| local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN) | |
| else: | |
| local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN) | |
| # Try streaming first | |
| accumulated = "" | |
| try: | |
| stream = local_client.text_generation( | |
| prompt=prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=0.95, | |
| stream=True, | |
| details=False, | |
| return_full_text=False, | |
| ) | |
| for chunk in stream: | |
| token_text = None | |
| # Newer huggingface_hub may return objects with .token.text | |
| if hasattr(chunk, "token") and getattr(chunk.token, "text", None): | |
| token_text = chunk.token.text | |
| # Fallback for dict responses | |
| if token_text is None and isinstance(chunk, dict): | |
| token = chunk.get("token") or {} | |
| token_text = token.get("text") or chunk.get("generated_text") | |
| # Fallback if a raw string is ever yielded | |
| if token_text is None and isinstance(chunk, str): | |
| token_text = chunk | |
| if token_text: | |
| accumulated += token_text | |
| yield accumulated | |
| except StopIteration: | |
| # Some servers may prematurely raise StopIteration; we'll fallback to non-streaming | |
| pass | |
| except Exception as stream_err: | |
| # Log and fallback to non-streaming | |
| print(f"[HF STREAM ERROR] {stream_err}") | |
| # Fallback: if nothing streamed, try a single-shot generation | |
| if not accumulated.strip(): | |
| try: | |
| result = local_client.text_generation( | |
| prompt=prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=0.95, | |
| stream=False, | |
| details=False, | |
| return_full_text=False, | |
| ) | |
| if isinstance(result, dict): | |
| text = result.get("generated_text", "") | |
| else: | |
| text = str(result) | |
| yield text if text.strip() else "Modelden cevap alınamadı." | |
| except Exception as nonstream_err: | |
| # Surface detailed error to the UI instead of a vague message | |
| err_text = str(nonstream_err).strip() | |
| response_text = "" | |
| if hasattr(nonstream_err, "response"): | |
| response = getattr(nonstream_err, "response") | |
| response_text = getattr(response, "text", "") or "" | |
| if response_text and response_text not in err_text: | |
| err_text = f"{err_text} | {response_text}".strip(" |") | |
| if not err_text: | |
| err_text = repr(nonstream_err) | |
| print(f"[HF NON-STREAM ERROR] {err_text}") | |
| yield f"Bir hata oluştu: {err_text}" | |
| except StopIteration: | |
| print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.") | |
| yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)." | |
| except Exception as err: # pragma: no cover - surface errors to UI | |
| err_text = str(err).strip() | |
| response_text = "" | |
| if hasattr(err, "response"): | |
| response = getattr(err, "response") | |
| response_text = getattr(response, "text", "") or "" | |
| if response_text and response_text not in err_text: | |
| err_text = f"{err_text} | {response_text}".strip(" |") | |
| if "model_not_supported" in err_text or "not supported" in err_text: | |
| yield ( | |
| "Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` " | |
| "değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin." | |
| ) | |
| return | |
| if not err_text: | |
| err_text = repr(err) | |
| print(f"[HF API ERROR] {err_text}") | |
| yield f"Bir hata oluştu: {err_text}" | |
| demo = gr.ChatInterface( | |
| respond, | |
| title="Gradio HF Agent", | |
| description=( | |
| "Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. " | |
| "Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz." | |
| ), | |
| theme="soft", | |
| additional_inputs=[ | |
| gr.Dropdown( | |
| label="Model ID", | |
| info="Hugging Face model repository adı", | |
| choices=RECOMMENDED_MODELS, | |
| value=HF_MODEL_ID, | |
| allow_custom_value=True, | |
| ), | |
| gr.Slider( | |
| label="Sıcaklık (temperature)", | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.05, | |
| ), | |
| gr.Slider( | |
| label="Maksimum yeni token", | |
| minimum=16, | |
| maximum=1024, | |
| value=512, | |
| step=16, | |
| ), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |