arthu1 commited on
Commit
97b0e0f
Β·
verified Β·
1 Parent(s): 5419bbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -55
app.py CHANGED
@@ -1,70 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
-
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
16
  """
17
- client = InferenceClient(token=hf_token.token, model="arthu1/starlight-mini")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
24
 
25
- response = ""
 
 
 
 
 
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
 
41
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
 
 
 
 
67
 
 
 
68
 
69
- if __name__ == "__main__":
70
- demo.launch()
 
1
+ """
2
+ HUGGINGFACE SPACE - API FOR STARLIGHT MINI
3
+ This creates a Space with automatic API endpoints
4
+
5
+ Deploy this to get a FREE, ALWAYS-ON API for your users!
6
+ """
7
+
8
+ # ============================================
9
+ # File: app.py (Main file for your Space)
10
+ # ============================================
11
+
12
+ APP_PY = '''
13
  import gradio as gr
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM
15
+ import torch
16
+
17
+ # Load your model
18
+ print("πŸš€ Loading Starlight Mini...")
19
+
20
+ MODEL_NAME = "arthu1/starlight-mini"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_NAME,
25
+ torch_dtype=torch.float16,
26
+ device_map="auto",
27
+ low_cpu_mem_usage=True
28
+ )
29
+
30
+ print("βœ… Model loaded!")
31
+
32
+ def generate_text(
33
+ prompt: str,
34
+ max_tokens: int = 100,
35
+ temperature: float = 0.7,
36
+ top_p: float = 0.9
37
+ ) -> str:
38
  """
39
+ Generate text from prompt
40
+ This function is automatically exposed as an API endpoint!
41
  """
42
+ if not prompt:
43
+ return "Error: Please provide a prompt"
44
+
45
+ try:
46
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
47
+
48
+ with torch.no_grad():
49
+ outputs = model.generate(
50
+ **inputs,
51
+ max_new_tokens=min(max_tokens, 500),
52
+ temperature=temperature,
53
+ top_p=top_p,
54
+ do_sample=True,
55
+ pad_token_id=tokenizer.eos_token_id
56
+ )
57
+
58
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ return result
60
+
61
+ except Exception as e:
62
+ return f"Error: {str(e)}"
63
 
64
+ # Create Gradio interface with API
65
+ with gr.Blocks(title="Starlight Mini API") as demo:
66
+ gr.Markdown("""
67
+ # 🌟 Starlight Mini API
68
+ **Free AI Text Generation - 8B Parameters**
69
+
70
+ This Space provides both a web UI and automatic API endpoints!
71
+ """)
72
+
73
+ with gr.Tab("πŸ’¬ Chat Interface"):
74
+ with gr.Row():
75
+ with gr.Column():
76
+ prompt_input = gr.Textbox(
77
+ label="Prompt",
78
+ placeholder="Enter your prompt here...",
79
+ lines=5
80
+ )
81
+
82
+ with gr.Accordion("βš™οΈ Settings", open=False):
83
+ max_tokens_slider = gr.Slider(
84
+ minimum=10,
85
+ maximum=500,
86
+ value=100,
87
+ step=10,
88
+ label="Max Tokens"
89
+ )
90
+ temperature_slider = gr.Slider(
91
+ minimum=0.1,
92
+ maximum=2.0,
93
+ value=0.7,
94
+ step=0.1,
95
+ label="Temperature"
96
+ )
97
+ top_p_slider = gr.Slider(
98
+ minimum=0.1,
99
+ maximum=1.0,
100
+ value=0.9,
101
+ step=0.05,
102
+ label="Top P"
103
+ )
104
+
105
+ generate_btn = gr.Button("✨ Generate", variant="primary")
106
+
107
+ with gr.Column():
108
+ output_text = gr.Textbox(
109
+ label="Generated Text",
110
+ lines=10
111
+ )
112
+
113
+ gr.Examples(
114
+ examples=[
115
+ ["Once upon a time in a magical forest,"],
116
+ ["Explain quantum computing in simple terms:"],
117
+ ["Write a haiku about coding:"],
118
+ ],
119
+ inputs=[prompt_input]
120
+ )
121
+
122
+ with gr.Tab("πŸ“‘ API Documentation"):
123
+ gr.Markdown("""
124
+ ## Using the API
125
+
126
+ This Space automatically provides API endpoints!
127
+
128
+ ### Python Example:
129
+ ```python
130
+ from gradio_client import Client
131
+
132
+ client = Client("arthu1/Exquisite-Starlight")
133
+ result = client.predict(
134
+ prompt="Hello, Starlight!",
135
+ max_tokens=100,
136
+ temperature=0.7,
137
+ top_p=0.9,
138
+ api_name="/predict"
139
+ )
140
+ print(result)
141
+ ```
142
+
143
+ ### cURL Example:
144
+ ```bash
145
+ curl -X POST https://arthu1-exquisite-starlight.hf.space/api/predict \\
146
+ -H "Content-Type: application/json" \\
147
+ -d '{"data": ["Your prompt here", 100, 0.7, 0.9]}'
148
+ ```
149
+
150
+ ### JavaScript Example:
151
+ ```javascript
152
+ const response = await fetch(
153
+ "https://arthu1-exquisite-starlight.hf.space/api/predict",
154
+ {
155
+ method: "POST",
156
+ headers: { "Content-Type": "application/json" },
157
+ body: JSON.stringify({
158
+ data: ["Your prompt here", 100, 0.7, 0.9]
159
+ })
160
+ }
161
+ );
162
+ const result = await response.json();
163
+ console.log(result.data[0]);
164
+ ```
165
+
166
+ ### Rate Limits:
167
+ - Free tier: Generous limits for personal/small commercial use
168
+ - If you need more, consider upgrading the Space hardware
169
+
170
+ ### Support:
171
+ - Report issues on the Space's Community tab
172
+ - Share feedback and improvements!
173
+ """)
174
+
175
+ # Connect the button
176
+ generate_btn.click(
177
+ fn=generate_text,
178
+ inputs=[prompt_input, max_tokens_slider, temperature_slider, top_p_slider],
179
+ outputs=output_text
180
+ )
181
 
182
+ # Launch with API enabled
183
+ demo.launch(
184
+ show_api=True, # This enables automatic API endpoints!
185
+ share=False # Space URL is already public
186
+ )
187
+ '''
188
 
189
+ # ============================================
190
+ # File: requirements.txt
191
+ # ============================================
192
 
193
+ REQUIREMENTS = '''
194
+ gradio
195
+ transformers
196
+ torch
197
+ accelerate
198
+ gradio_client
199
+ '''
200
 
201
+ # ============================================
202
+ # File: README.md
203
+ # ============================================
 
 
 
 
 
 
 
 
204
 
205
+ README = '''---
206
+ title: Starlight Mini API
207
+ emoji: 🌟
208
+ colorFrom: purple
209
+ colorTo: blue
210
+ sdk: gradio
211
+ sdk_version: 4.44.0
212
+ app_file: app.py
213
+ pinned: false
214
+ ---
215
 
216
+ # Starlight Mini API
217
 
218
+ Free AI text generation API powered by an 8B parameter model.
219
+
220
+ ## Features
221
+ - 🌟 8B parameter model
222
+ - πŸ’¬ Web UI for testing
223
+ - πŸ“‘ Automatic API endpoints
224
+ - πŸš€ Always online (free CPU tier)
225
+ - ⚑ Fast inference
226
+
227
+ ## Usage
228
+
229
+ ### Web UI
230
+ Just visit this Space and start chatting!
231
+
232
+ ### API
233
+ See the "API Documentation" tab for code examples in Python, JavaScript, and cURL.
234
+
235
+ ## Model
236
+ Based on arthu1/starlight-mini - a fine-tuned model optimized for helpfulness and coding tasks.
237
+
238
+ ## Credits
239
+ Created by arthu1 / Nova Devs (North.ai)
240
+ '''
241
+
242
+ # ============================================
243
+ # DEPLOYMENT INSTRUCTIONS
244
+ # ============================================
245
+
246
+ INSTRUCTIONS = """
247
+ HOW TO DEPLOY YOUR SPACE:
248
+
249
+ 1. Go to https://huggingface.co/new-space
250
+
251
+ 2. Fill in:
252
+ - Space name: Exquisite-Starlight (or whatever you want)
253
+ - License: Apache-2.0
254
+ - SDK: Gradio
255
+ - Hardware: CPU (free!) or upgrade to GPU if needed
256
+
257
+ 3. Create the Space
258
+
259
+ 4. Upload these 3 files:
260
+ - app.py (the code above)
261
+ - requirements.txt
262
+ - README.md
263
+
264
+ 5. Wait 5-10 minutes for it to build
265
+
266
+ 6. Done! You get:
267
+ βœ… Public URL: https://huggingface.co/spaces/arthu1/Exquisite-Starlight
268
+ βœ… API URL: https://arthu1-exquisite-starlight.hf.space/api/predict
269
+ βœ… Web UI for testing
270
+ βœ… Always online (free!)
271
+
272
+ 7. Share the URLs with your users!
273
+
274
+ IMPORTANT:
275
+ - CPU is FREE forever
276
+ - If you need faster inference, upgrade to GPU (costs money)
277
+ - API endpoints are automatic - Gradio creates them for you!
278
+ - Users can call your API from any language
279
+
280
+ Your users will be happy because:
281
+ - Free access
282
+ - ChatGPT-like experience
283
+ - Simple API to integrate
284
+ - Always online
285
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ print(f"""
288
+ FILES TO CREATE:
289
+
290
+ 1. app.py:
291
+ {APP_PY}
292
+
293
+ 2. requirements.txt:
294
+ {REQUIREMENTS}
295
 
296
+ 3. README.md:
297
+ {README}
298
 
299
+ {INSTRUCTIONS}
300
+ """)