efecelik commited on
Commit
e3fccea
·
verified ·
1 Parent(s): 1cd9c77

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -0
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Tuple
3
+
4
+ import gradio as gr
5
+ from dotenv import load_dotenv
6
+ from huggingface_hub import InferenceClient
7
+
8
+ # Load environment variables from .env if it exists
9
+ load_dotenv()
10
+
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+ HF_MODEL_ID = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-1.5B-Instruct")
13
+ HF_ENDPOINT_URL = os.getenv("HF_ENDPOINT_URL", "").strip()
14
+ SYSTEM_PROMPT = os.getenv(
15
+ "HF_SYSTEM_PROMPT",
16
+ "You are a concise and helpful AI assistant.",
17
+ )
18
+
19
+ # Not strictly requiring HF_TOKEN at import time so that
20
+ # the UI can still come up on Hugging Face Spaces. We will
21
+ # surface a clear guidance message from within `respond` if
22
+ # a token is missing.
23
+
24
+ # Not creating a global client when we want dynamic model selection; we'll create per-call
25
+
26
+ # Small, cloud-friendly model suggestions
27
+ RECOMMENDED_MODELS = [
28
+ "Qwen/Qwen2.5-1.5B-Instruct",
29
+ "Qwen/Qwen2.5-3B-Instruct",
30
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
31
+ ]
32
+
33
+
34
+ def format_prompt(message: str, history: List[Tuple[str, str]]) -> str:
35
+ conversation = [f"System: {SYSTEM_PROMPT}"]
36
+ for user_msg, assistant_msg in history:
37
+ if user_msg:
38
+ conversation.append(f"User: {user_msg}")
39
+ if assistant_msg:
40
+ conversation.append(f"Assistant: {assistant_msg}")
41
+ conversation.append(f"User: {message}")
42
+ conversation.append("Assistant:")
43
+ return "\n".join(conversation)
44
+
45
+
46
+ def respond(
47
+ message: str,
48
+ history: List[Tuple[str, str]],
49
+ model_id: str = HF_MODEL_ID,
50
+ temperature: float = 0.7,
51
+ max_new_tokens: int = 512,
52
+ ):
53
+ # If no token or endpoint configured, guide the user from the UI.
54
+ if not HF_TOKEN and not HF_ENDPOINT_URL:
55
+ yield (
56
+ "HF_TOKEN ayarlı değil. Hugging Face Space üzerinde Settings > Secrets menüsünden"
57
+ " 'HF_TOKEN' gizli değişkenini ekleyin (veya bir Inference Endpoint URL'si sağlayın)."
58
+ )
59
+ return
60
+ prompt = format_prompt(message, history)
61
+ try:
62
+ # Create client per request to honor selected model or endpoint
63
+ if HF_ENDPOINT_URL:
64
+ local_client = InferenceClient(endpoint=HF_ENDPOINT_URL, token=HF_TOKEN)
65
+ else:
66
+ local_client = InferenceClient(model=(model_id or HF_MODEL_ID), token=HF_TOKEN)
67
+
68
+ # Try streaming first
69
+ accumulated = ""
70
+ try:
71
+ stream = local_client.text_generation(
72
+ prompt=prompt,
73
+ max_new_tokens=max_new_tokens,
74
+ temperature=temperature,
75
+ top_p=0.95,
76
+ stream=True,
77
+ details=False,
78
+ return_full_text=False,
79
+ )
80
+ for chunk in stream:
81
+ token_text = None
82
+ # Newer huggingface_hub may return objects with .token.text
83
+ if hasattr(chunk, "token") and getattr(chunk.token, "text", None):
84
+ token_text = chunk.token.text
85
+ # Fallback for dict responses
86
+ if token_text is None and isinstance(chunk, dict):
87
+ token = chunk.get("token") or {}
88
+ token_text = token.get("text") or chunk.get("generated_text")
89
+ # Fallback if a raw string is ever yielded
90
+ if token_text is None and isinstance(chunk, str):
91
+ token_text = chunk
92
+
93
+ if token_text:
94
+ accumulated += token_text
95
+ yield accumulated
96
+ except StopIteration:
97
+ # Some servers may prematurely raise StopIteration; we'll fallback to non-streaming
98
+ pass
99
+ except Exception as stream_err:
100
+ # Log and fallback to non-streaming
101
+ print(f"[HF STREAM ERROR] {stream_err}")
102
+
103
+ # Fallback: if nothing streamed, try a single-shot generation
104
+ if not accumulated.strip():
105
+ try:
106
+ result = local_client.text_generation(
107
+ prompt=prompt,
108
+ max_new_tokens=max_new_tokens,
109
+ temperature=temperature,
110
+ top_p=0.95,
111
+ stream=False,
112
+ details=False,
113
+ return_full_text=False,
114
+ )
115
+ if isinstance(result, dict):
116
+ text = result.get("generated_text", "")
117
+ else:
118
+ text = str(result)
119
+ yield text if text.strip() else "Modelden cevap alınamadı."
120
+ except Exception as nonstream_err:
121
+ # Surface detailed error to the UI instead of a vague message
122
+ err_text = str(nonstream_err).strip()
123
+ response_text = ""
124
+ if hasattr(nonstream_err, "response"):
125
+ response = getattr(nonstream_err, "response")
126
+ response_text = getattr(response, "text", "") or ""
127
+ if response_text and response_text not in err_text:
128
+ err_text = f"{err_text} | {response_text}".strip(" |")
129
+ if not err_text:
130
+ err_text = repr(nonstream_err)
131
+ print(f"[HF NON-STREAM ERROR] {err_text}")
132
+ yield f"Bir hata oluştu: {err_text}"
133
+ except StopIteration:
134
+ print("[HF API ERROR] StopIteration: API'den yanıt dönerken veri alınamadı.")
135
+ yield "Bir hata oluştu: API'den yanıt alınamadı (StopIteration)."
136
+ except Exception as err: # pragma: no cover - surface errors to UI
137
+ err_text = str(err).strip()
138
+ response_text = ""
139
+ if hasattr(err, "response"):
140
+ response = getattr(err, "response")
141
+ response_text = getattr(response, "text", "") or ""
142
+ if response_text and response_text not in err_text:
143
+ err_text = f"{err_text} | {response_text}".strip(" |")
144
+ if "model_not_supported" in err_text or "not supported" in err_text:
145
+ yield (
146
+ "Seçilen model erişilebilir görünmüyor. `.env` içindeki `HF_MODEL_ID` "
147
+ "değerini, hesabınızda etkin olan bir Hugging Face sohbet modeli ile güncellemeyi deneyin."
148
+ )
149
+ return
150
+ if not err_text:
151
+ err_text = repr(err)
152
+ print(f"[HF API ERROR] {err_text}")
153
+ yield f"Bir hata oluştu: {err_text}"
154
+
155
+
156
+ demo = gr.ChatInterface(
157
+ respond,
158
+ title="Gradio HF Agent",
159
+ description=(
160
+ "Hugging Face Inference API ile konuşan basit bir sohbet arayüzü. "
161
+ "Aşağıdan model ve üretim ayarlarını değiştirebilirsiniz."
162
+ ),
163
+ theme="soft",
164
+ additional_inputs=[
165
+ gr.Dropdown(
166
+ label="Model ID",
167
+ info="Hugging Face model repository adı",
168
+ choices=RECOMMENDED_MODELS,
169
+ value=HF_MODEL_ID,
170
+ allow_custom_value=True,
171
+ ),
172
+ gr.Slider(
173
+ label="Sıcaklık (temperature)",
174
+ minimum=0.0,
175
+ maximum=1.0,
176
+ value=0.7,
177
+ step=0.05,
178
+ ),
179
+ gr.Slider(
180
+ label="Maksimum yeni token",
181
+ minimum=16,
182
+ maximum=1024,
183
+ value=512,
184
+ step=16,
185
+ ),
186
+ ],
187
+ )
188
+
189
+ if __name__ == "__main__":
190
+ demo.queue().launch()