Spaces:

efecelik
/

gradiomkine

Sleeping

App Files Files Community

efecelik commited on Nov 3, 2025

Commit

e3fccea

verified ·

1 Parent(s): 1cd9c77

Create app.py

Browse files

Files changed (1) hide show

app.py +190 -0

app.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+from typing import List, Tuple
+import gradio as gr
+from dotenv import load_dotenv
+from huggingface_hub import InferenceClient
+# Load environment variables from .env if it exists
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct")
+HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
+SYSTEM_PROMPT = os.getenv(
+    "HF_SYSTEM_PROMPT",
+    "You are a concise and helpful AI assistant.",
+)
+# Not strictly requiring HF_TOKEN at import time so that
+# the UI can still come up on Hugging Face Spaces. We will
+# surface a clear guidance message from within `respond` if
+# a token is missing.
+# Not creating a global client when we want dynamic model selection; we'll create per-call
+# Small, cloud-friendly model suggestions
+RECOMMENDED_MODELS = [
+    "Qwen/Qwen2.5-1.5B-Instruct",
+    "Qwen/Qwen2.5-3B-Instruct",
+    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+]
+def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
+    conversation = [f"System: {SYSTEM_PROMPT}"]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            conversation.append(f"User: {user_msg}")
+        if assistant_msg:
+            conversation.append(f"Assistant: {assistant_msg}")
+    conversation.append(f"User: {message}")
+    conversation.append("Assistant:")
+    return "\n".join(conversation)
+def respond(
+    message: str,
+    history: List[Tuple[str, str]],
+    model_id: str = HF_MODEL_ID,
+    temperature: float = 0.7,
+    max_new_tokens: int = 512,
+):
+    # If no token or endpoint configured, guide the user from the UI.
+    if not HF_TOKEN and not HF_ENDPOINT_URL:
+        yield (
+            "HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden"
+            " 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)."
+        )
+        return
+    prompt = format_prompt(message, history)
+    try:
+        # Create client per request to honor selected model or endpoint
+        if HF_ENDPOINT_URL:
+            local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN)
+        else:
+            local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN)
+        # Try streaming first
+        accumulated = ""
+        try:
+            stream = local_client.text_generation(
+                prompt=prompt,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=0.95,
+                stream=True,
+                details=False,
+                return_full_text=False,
+            )
+            for chunk in stream:
+                token_text = None
+                # Newer huggingface_hub may return objects with .token.text
+                if hasattr(chunk, "token") and getattr(chunk.token, "text", None):
+                    token_text = chunk.token.text
+                # Fallback for dict responses
+                if token_text is None and isinstance(chunk, dict):
+                    token = chunk.get("token") or {}
+                    token_text = token.get("text") or chunk.get("generated_text")
+                # Fallback if a raw string is ever yielded
+                if token_text is None and isinstance(chunk, str):
+                    token_text = chunk
+                if token_text:
+                    accumulated += token_text
+                    yield accumulated
+        except StopIteration:
+            # Some servers may prematurely raise StopIteration; we'll fallback to non-streaming
+            pass
+        except Exception as stream_err:
+            # Log and fallback to non-streaming
+            print(f"[HF STREAM ERROR] {stream_err}")
+        # Fallback: if nothing streamed, try a single-shot generation
+        if not accumulated.strip():
+            try:
+                result = local_client.text_generation(
+                    prompt=prompt,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=0.95,
+                    stream=False,
+                    details=False,
+                    return_full_text=False,
+                )
+                if isinstance(result, dict):
+                    text = result.get("generated_text", "")
+                else:
+                    text = str(result)
+                yield text if text.strip() else "Modelden cevap alınamadı."
+            except Exception as nonstream_err:
+                # Surface detailed error to the UI instead of a vague message
+                err_text = str(nonstream_err).strip()
+                response_text = ""
+                if hasattr(nonstream_err, "response"):
+                    response = getattr(nonstream_err, "response")
+                    response_text = getattr(response, "text", "") or ""
+                if response_text and response_text not in err_text:
+                    err_text = f"{err_text} | {response_text}".strip(" |")
+                if not err_text:
+                    err_text = repr(nonstream_err)
+                print(f"[HF NON-STREAM ERROR] {err_text}")
+                yield f"Bir hata oluştu: {err_text}"
+    except StopIteration:
+        print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.")
+        yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)."
+    except Exception as err:  # pragma: no cover - surface errors to UI
+        err_text = str(err).strip()
+        response_text = ""
+        if hasattr(err, "response"):
+            response = getattr(err, "response")
+            response_text = getattr(response, "text", "") or ""
+        if response_text and response_text not in err_text:
+            err_text = f"{err_text} | {response_text}".strip(" |")
+        if "model_not_supported" in err_text or "not supported" in err_text:
+            yield (
+                "Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` "
+                "değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin."
+            )
+            return
+        if not err_text:
+            err_text = repr(err)
+        print(f"[HF API ERROR] {err_text}")
+        yield f"Bir hata oluştu: {err_text}"
+demo = gr.ChatInterface(
+    respond,
+    title="Gradio HF Agent",
+    description=(
+        "Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. "
+        "Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz."
+    ),
+    theme="soft",
+    additional_inputs=[
+        gr.Dropdown(
+            label="Model ID",
+            info="Hugging Face model repository adı",
+            choices=RECOMMENDED_MODELS,
+            value=HF_MODEL_ID,
+            allow_custom_value=True,
+        ),
+        gr.Slider(
+            label="Sıcaklık (temperature)",
+            minimum=0.0,
+            maximum=1.0,
+            value=0.7,
+            step=0.05,
+        ),
+        gr.Slider(
+            label="Maksimum yeni token",
+            minimum=16,
+            maximum=1024,
+            value=512,
+            step=16,
+        ),
+    ],
+)
+if __name__ == "__main__":
+    demo.queue().launch()