david167 commited on
Commit
6bf8feb
·
1 Parent(s): 19607d6

Simplify API - remove all templates, just prompt-in response-out

Browse files
Files changed (1) hide show
  1. gradio_app.py +88 -416
gradio_app.py CHANGED
@@ -1,12 +1,5 @@
1
  import os
2
  import logging
3
- import time
4
- import asyncio
5
- from typing import List, Optional, Dict, Any
6
- import threading
7
- import json
8
- import re
9
-
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import gradio as gr
@@ -15,12 +8,6 @@ import gradio as gr
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
- # Global variables for model and tokenizer
19
- model = None
20
- tokenizer = None
21
- device = None
22
- model_loaded = False
23
-
24
  class ModelManager:
25
  def __init__(self):
26
  self.model = None
@@ -34,7 +21,7 @@ class ModelManager:
34
  try:
35
  logger.info("Starting model loading...")
36
 
37
- # Check if CUDA is available and force to cuda:0
38
  if torch.cuda.is_available():
39
  torch.cuda.set_device(0)
40
  self.device = "cuda:0"
@@ -62,7 +49,7 @@ class ModelManager:
62
  self.model = AutoModelForCausalLM.from_pretrained(
63
  base_model_name,
64
  torch_dtype=torch.float16 if self.device == "cuda:0" else torch.float32,
65
- device_map={"": 0}, # Force all parameters to GPU 0
66
  trust_remote_code=True,
67
  low_cpu_mem_usage=True,
68
  use_safetensors=True,
@@ -79,447 +66,132 @@ class ModelManager:
79
  logger.error(f"Error loading model: {str(e)}")
80
  self.model_loaded = False
81
 
82
- # Start model loading in a separate thread
83
  model_manager = ModelManager()
84
 
85
- def create_json_prompt(message, template_type):
86
- """Create JSON-formatted prompts based on template type"""
87
-
88
- json_templates = {
89
- "general": {
90
- "instruction": "Extract the key points from the content and return them as a JSON array of strings. Each string should be a concise summary of an important point from the content.",
91
- "schema": """Format: ["actual key point from content", "another key point from content", "etc..."]"""
92
- },
93
- "list": {
94
- "instruction": "Extract and list the key topics or points from the content. Return them as a JSON array where each element is a specific, factual point from the content. Do not use placeholder text.",
95
- "schema": """Return a JSON array of strings, each representing a distinct point from the content. Example format: ["First specific point from the content", "Second specific point", "Third point"]"""
96
- },
97
- "questions": {
98
- "instruction": "Generate 3 diverse user and assistant prompt pairs based on the specific topic provided. Create realistic questions a user might ask and helpful assistant responses.",
99
- "schema": """Format: [{"user": "realistic question about the topic", "assistant": "helpful response"}, {"user": "different question", "assistant": "different response"}, {"user": "third question", "assistant": "third response"}]"""
100
- },
101
- "analysis": {
102
- "instruction": "Analyze the following content and respond in JSON format:",
103
- "schema": """{
104
- "summary": "brief summary of the content",
105
- "key_points": [
106
- "Key point 1",
107
- "Key point 2",
108
- "Key point 3"
109
- ],
110
- "sentiment": "positive|negative|neutral",
111
- "topics": ["topic1", "topic2", "topic3"],
112
- "complexity_score": 0.75,
113
- "word_count": 150
114
- }"""
115
- },
116
- "structured": {
117
- "instruction": "Process this information and respond in a structured JSON format:",
118
- "schema": """{
119
- "title": "extracted or generated title",
120
- "content": "processed content",
121
- "categories": ["category1", "category2"],
122
- "tags": ["tag1", "tag2", "tag3"],
123
- "priority": "high|medium|low",
124
- "action_items": [
125
- "Action item 1",
126
- "Action item 2"
127
- ]
128
- }"""
129
- }
130
- }
131
-
132
- template = json_templates.get(template_type, json_templates["general"])
133
-
134
- return f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
135
-
136
- {message}
137
-
138
- {template["instruction"]}
139
-
140
- {template["schema"]}
141
-
142
- Important: Respond with valid JSON only. No additional text. Base your response on the actual content provided, not the format examples.
143
-
144
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
145
-
146
- """
147
-
148
- def prettify_json_response(response_text):
149
- """Try to extract and prettify JSON from response"""
150
- try:
151
- # Clean the response first
152
- cleaned = response_text.strip()
153
-
154
- # Try to parse the entire response as JSON first
155
- try:
156
- parsed_json = json.loads(cleaned)
157
- return json.dumps(parsed_json, indent=2, ensure_ascii=False)
158
- except json.JSONDecodeError:
159
- pass
160
-
161
- # Try to find JSON in the response - look for both objects and arrays
162
- # Use non-greedy matching and better patterns
163
- json_patterns = [
164
- r'\[[\s\S]*?\](?=\s*$)', # Array pattern - non-greedy, end of string
165
- r'\{[\s\S]*?\}(?=\s*$)', # Object pattern - non-greedy, end of string
166
- r'\[[\s\S]*\]', # Array pattern - greedy fallback
167
- r'\{[\s\S]*\}' # Object pattern - greedy fallback
168
- ]
169
-
170
- for pattern in json_patterns:
171
- json_match = re.search(pattern, cleaned, re.MULTILINE)
172
- if json_match:
173
- json_str = json_match.group().strip()
174
- try:
175
- parsed_json = json.loads(json_str)
176
- return json.dumps(parsed_json, indent=2, ensure_ascii=False)
177
- except json.JSONDecodeError:
178
- continue
179
-
180
- # If no JSON found, return original
181
- return response_text
182
- except AttributeError:
183
- return response_text
184
-
185
- def chat_with_model(message, history, temperature, json_mode=False, json_template="general"):
186
- """Raw chat function for direct model interaction"""
187
- if not message.strip():
188
- return history, ""
189
-
190
  if not model_manager.model_loaded:
191
- response = "Model not loaded yet. Please wait..."
192
- history.append({"role": "user", "content": message})
193
- history.append({"role": "assistant", "content": response})
194
- return history, ""
195
-
196
  try:
197
- # Create prompt based on mode
198
- if json_mode:
199
- prompt = create_json_prompt(message, json_template)
200
- else:
201
- # Create a simple chat prompt
202
- prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
203
 
204
- {message}
205
 
206
  <|eot_id|><|start_header_id|>assistant<|end_header_id|>
207
 
208
  """
209
 
210
- # Generate response using the model directly
211
- inputs = model_manager.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
 
 
 
 
 
212
 
213
- # Force all inputs to the same device as the model
214
  if model_manager.device == "cuda:0":
215
- # Get the actual device of the model
216
  model_device = next(model_manager.model.parameters()).device
217
- logger.info(f"Model is on device: {model_device}")
218
-
219
- # Move all input tensors to the same device as the model
220
  inputs = {k: v.to(model_device) for k, v in inputs.items()}
221
 
 
222
  with torch.no_grad():
223
- outputs = model_manager.model.generate(
224
- **inputs,
225
- max_new_tokens=8192, # Much higher limit for complete responses
226
- temperature=temperature,
227
- top_p=0.95,
228
- do_sample=True,
229
- num_beams=1,
230
- pad_token_id=model_manager.tokenizer.eos_token_id,
231
- eos_token_id=model_manager.tokenizer.eos_token_id,
232
- early_stopping=False, # Disable early stopping
233
- repetition_penalty=1.05, # Lighter repetition penalty
234
- no_repeat_ngram_size=0, # Disable n-gram repetition blocking
235
- length_penalty=1.0, # Neutral length penalty
236
- min_new_tokens=50 # Ensure minimum response length
237
- )
238
 
239
- # Decode response
240
  generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
241
 
242
- # Debug logging
243
- logger.info(f"Full generated text length: {len(generated_text)} characters")
244
- logger.info(f"Generated text preview: {generated_text[:300]}...")
245
- logger.info(f"Generated text ending: ...{generated_text[-300:]}")
246
-
247
- # Extract the response part (remove the prompt)
248
  if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
249
  response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
250
  else:
251
- # Improved fallback: look for common JSON starting patterns
252
- response = generated_text
253
-
254
- # Try to find where the actual response starts
255
- json_start_patterns = ['[', '{', '"']
256
- for pattern in json_start_patterns:
257
- if pattern in generated_text:
258
- # Find the first occurrence that looks like the start of JSON
259
- start_idx = generated_text.find(pattern)
260
- if start_idx > len(prompt) // 2: # Make sure it's after the prompt
261
- response = generated_text[start_idx:].strip()
262
- break
263
-
264
- # Ultimate fallback: use the last portion of the text
265
- if response == generated_text:
266
- # Split by common delimiters and take the largest chunk
267
- chunks = generated_text.split('\n\n')
268
- if len(chunks) > 1:
269
- response = chunks[-1].strip()
270
- else:
271
- response = generated_text[len(prompt)//2:].strip()
272
 
273
- # Log response length for debugging
274
  logger.info(f"Generated response length: {len(response)} characters")
275
- logger.info(f"Response preview: {response[:200]}...")
276
-
277
- # Process JSON response if in JSON mode
278
- if json_mode and response:
279
- original_response = response
280
- response = prettify_json_response(response)
281
- if response != original_response:
282
- logger.info(f"JSON processing applied. New length: {len(response)}")
283
- else:
284
- logger.info("JSON processing had no effect - no valid JSON found")
285
-
286
- # Add to history
287
- history.append({"role": "user", "content": message})
288
- history.append({"role": "assistant", "content": response})
289
 
290
  except Exception as e:
291
- logger.error(f"Error in chat: {str(e)}")
292
- history.append({"role": "user", "content": message})
293
- history.append({"role": "assistant", "content": f"Error: {str(e)}"})
 
 
 
 
 
 
 
294
 
295
  return history, ""
296
 
297
- def clear_chat():
298
- """Clear the chat history"""
299
- return [], ""
300
-
301
- # Custom CSS for full-width ChatGPT-like appearance
302
- css = """
303
- .gradio-container {
304
- max-width: 100% !important;
305
- width: 100% !important;
306
- margin: 0 !important;
307
- padding: 20px !important;
308
- }
309
- #chatbot {
310
- height: 400px !important;
311
- max-height: 400px !important;
312
- min-height: 400px !important;
313
- overflow-y: auto !important;
314
- border-radius: 12px !important;
315
- border: 1px solid #e0e0e0 !important;
316
- background-color: #fafafa !important;
317
- color: #212529 !important;
318
- }
319
-
320
- /* Force all text in chatbot to be dark - nuclear option */
321
- #chatbot,
322
- #chatbot *,
323
- [data-testid="chatbot"],
324
- [data-testid="chatbot"] *,
325
- .chatbot,
326
- .chatbot *,
327
- .gr-chatbot,
328
- .gr-chatbot * {
329
- color: #212529 !important;
330
- text-shadow: none !important;
331
- }
332
-
333
- /* Ensure all chatbot text has proper contrast - More specific targeting */
334
- #chatbot .message,
335
- #chatbot .bot-message,
336
- #chatbot .user-message,
337
- #chatbot div,
338
- #chatbot p,
339
- #chatbot span,
340
- #chatbot .prose,
341
- #chatbot .markdown,
342
- .chatbot .message-content,
343
- .gradio-chatbot .message,
344
- .gradio-chatbot div,
345
- .gradio-chatbot p,
346
- .gradio-chatbot span {
347
- color: #212529 !important;
348
- }
349
-
350
- /* Target Gradio's specific chatbot classes */
351
- .chatbot .bot,
352
- .chatbot .user,
353
- .gradio-chatbot,
354
- .gradio-chatbot * {
355
- color: #212529 !important;
356
- }
357
- .message {
358
- padding: 12px 16px !important;
359
- margin: 8px 0 !important;
360
- border-radius: 12px !important;
361
- max-width: 85% !important;
362
- word-wrap: break-word !important;
363
- }
364
- .user {
365
- background-color: #007bff !important;
366
- color: white !important;
367
- margin-left: auto !important;
368
- margin-right: 0 !important;
369
- }
370
- .bot {
371
- background-color: #f8f9fa !important;
372
- border: 1px solid #e9ecef !important;
373
- margin-left: 0 !important;
374
- margin-right: auto !important;
375
- color: #212529 !important;
376
- }
377
- /* Full width input area */
378
- .gr-textbox {
379
- border-radius: 8px !important;
380
- }
381
-
382
- /* Prevent textbox from affecting layout */
383
- .gradio-textbox textarea {
384
- resize: none !important;
385
- max-height: 120px !important;
386
- min-height: 40px !important;
387
- }
388
-
389
- /* Prevent layout shifts on focus */
390
- .gradio-container .wrap {
391
- min-height: auto !important;
392
- }
393
-
394
- /* Stable row heights */
395
- .gradio-row {
396
- min-height: auto !important;
397
- }
398
- /* Responsive design for different screen sizes */
399
- @media (min-width: 1400px) {
400
- .gradio-container {
401
- padding: 40px !important;
402
- }
403
- #chatbot {
404
- height: 450px !important;
405
- max-height: 450px !important;
406
- }
407
- }
408
- @media (min-width: 1800px) {
409
- .gradio-container {
410
- padding: 60px !important;
411
- }
412
- #chatbot {
413
- height: 500px !important;
414
- max-height: 500px !important;
415
- }
416
- }
417
- """
418
-
419
- # Create simplified chat interface with JSON functionality
420
- with gr.Blocks(css=css, title="Llama Chat", theme=gr.themes.Soft()) as demo:
421
- gr.Markdown(
422
- """
423
- # 🦙 Llama Chat
424
- ### Raw interface for Llama-3.1-8B-Instruct
425
-
426
- Direct chat interface for testing prompts and having conversations with the model.
427
-
428
- **New:** Enable **JSON Response Mode** for structured outputs! Choose from templates like:
429
- - 🎯 **General**: Basic structured responses
430
- - ❓ **Questions**: Generate question sets from content
431
- - 📊 **Analysis**: Content analysis with sentiment & topics
432
- - 📋 **Structured**: Organized data with categories & actions
433
- """
434
- )
435
-
436
- # Simple chat interface
437
- chatbot = gr.Chatbot(
438
- elem_id="chatbot",
439
- label="Chat",
440
- show_label=False,
441
- avatar_images=(None, None),
442
- show_share_button=False,
443
- type="messages", # Use new message format
444
- height=400, # Reduced from 600 to 400
445
- render_markdown=True,
446
- show_copy_button=True,
447
- container=True,
448
- scale=1
449
- )
450
 
451
  with gr.Row():
452
  with gr.Column(scale=4):
 
 
 
 
 
453
  msg = gr.Textbox(
454
- placeholder="Type your message here...",
455
- show_label=False,
456
- container=False,
457
- lines=1,
458
- max_lines=3,
459
- autofocus=False,
460
- interactive=True
461
  )
 
 
 
 
462
  with gr.Column(scale=1):
463
- submit_btn = gr.Button("Send", variant="primary")
464
- with gr.Column(scale=1):
465
- clear_btn = gr.Button("Clear", variant="secondary")
466
-
467
- with gr.Row():
468
- temperature = gr.Slider(
469
- minimum=0.1,
470
- maximum=2.0,
471
- value=0.8,
472
- step=0.1,
473
- label="Temperature",
474
- info="Controls randomness (0.1=focused, 2.0=creative)"
475
- )
476
-
477
- with gr.Row():
478
- with gr.Column(scale=2):
479
- json_mode = gr.Checkbox(
480
- label="JSON Response Mode",
481
- value=False,
482
- info="Get structured JSON responses instead of regular text"
483
- )
484
- with gr.Column(scale=3):
485
- json_template = gr.Dropdown(
486
- choices=["general", "questions", "analysis", "structured"],
487
- value="general",
488
- label="JSON Template",
489
- info="Choose the type of JSON structure you want",
490
- visible=False
491
  )
 
 
 
 
 
 
 
 
 
 
 
492
 
493
- # Event handlers
494
- def respond(message, history, temp, json_enabled, json_type):
495
- return chat_with_model(message, history, temp, json_enabled, json_type)
496
-
497
- def toggle_json_template(json_enabled):
498
- return gr.update(visible=json_enabled)
499
-
500
- # Connect JSON mode toggle to template visibility
501
- json_mode.change(toggle_json_template, inputs=[json_mode], outputs=[json_template])
502
-
503
- msg.submit(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
504
- submit_btn.click(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
505
- clear_btn.click(clear_chat, outputs=[chatbot, msg])
506
-
507
- # Add footer
508
- gr.Markdown(
509
- """
510
- ---
511
- <div style="text-align: center; color: #666; font-size: 0.9em;">
512
- Built with ❤️ using Gradio and Llama-3.1-8B-Instruct •
513
- <a href="/docs" target="_blank">API Documentation</a> •
514
- JSON Mode for structured outputs
515
- </div>
516
- """
517
- )
518
 
519
  if __name__ == "__main__":
520
  demo.launch(
521
  server_name="0.0.0.0",
522
  server_port=7860,
523
- share=False,
524
- show_error=True
525
  )
 
1
  import os
2
  import logging
 
 
 
 
 
 
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
 
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
 
 
 
 
 
 
11
  class ModelManager:
12
  def __init__(self):
13
  self.model = None
 
21
  try:
22
  logger.info("Starting model loading...")
23
 
24
+ # Check if CUDA is available
25
  if torch.cuda.is_available():
26
  torch.cuda.set_device(0)
27
  self.device = "cuda:0"
 
49
  self.model = AutoModelForCausalLM.from_pretrained(
50
  base_model_name,
51
  torch_dtype=torch.float16 if self.device == "cuda:0" else torch.float32,
52
+ device_map={"": 0} if self.device == "cuda:0" else None,
53
  trust_remote_code=True,
54
  low_cpu_mem_usage=True,
55
  use_safetensors=True,
 
66
  logger.error(f"Error loading model: {str(e)}")
67
  self.model_loaded = False
68
 
69
+ # Initialize model manager
70
  model_manager = ModelManager()
71
 
72
+ def generate_response(prompt, temperature=0.8):
73
+ """Simple function to generate a response from a prompt"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  if not model_manager.model_loaded:
75
+ return "Model not loaded yet. Please wait..."
76
+
 
 
 
77
  try:
78
+ # Create the Llama-3.1 chat format
79
+ formatted_prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
 
 
 
 
80
 
81
+ {prompt}
82
 
83
  <|eot_id|><|start_header_id|>assistant<|end_header_id|>
84
 
85
  """
86
 
87
+ # Tokenize the input
88
+ inputs = model_manager.tokenizer(
89
+ formatted_prompt,
90
+ return_tensors="pt",
91
+ truncation=True,
92
+ max_length=4096
93
+ )
94
 
95
+ # Move inputs to the same device as the model
96
  if model_manager.device == "cuda:0":
 
97
  model_device = next(model_manager.model.parameters()).device
 
 
 
98
  inputs = {k: v.to(model_device) for k, v in inputs.items()}
99
 
100
+ # Generate response
101
  with torch.no_grad():
102
+ outputs = model_manager.model.generate(
103
+ **inputs,
104
+ max_new_tokens=8192,
105
+ temperature=temperature,
106
+ top_p=0.95,
107
+ do_sample=True,
108
+ num_beams=1,
109
+ pad_token_id=model_manager.tokenizer.eos_token_id,
110
+ eos_token_id=model_manager.tokenizer.eos_token_id,
111
+ early_stopping=False,
112
+ repetition_penalty=1.05,
113
+ no_repeat_ngram_size=0,
114
+ length_penalty=1.0,
115
+ min_new_tokens=50
116
+ )
117
 
118
+ # Decode the response
119
  generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
120
 
121
+ # Extract just the assistant's response
 
 
 
 
 
122
  if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
123
  response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
124
  else:
125
+ # Fallback: remove the prompt from the beginning
126
+ response = generated_text[len(formatted_prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
 
128
  logger.info(f"Generated response length: {len(response)} characters")
129
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  except Exception as e:
132
+ logger.error(f"Error generating response: {str(e)}")
133
+ return f"Error: {str(e)}"
134
+
135
+ def respond(message, history, temperature):
136
+ """Gradio interface function for chat"""
137
+ response = generate_response(message, temperature)
138
+
139
+ # Update history
140
+ history.append({"role": "user", "content": message})
141
+ history.append({"role": "assistant", "content": response})
142
 
143
  return history, ""
144
 
145
+ # Create the Gradio interface
146
+ with gr.Blocks(title="Question Generation API") as demo:
147
+ gr.Markdown("# Simple LLM API")
148
+ gr.Markdown("Send a prompt and get a response. No templates, just direct model interaction.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  with gr.Row():
151
  with gr.Column(scale=4):
152
+ chatbot = gr.Chatbot(
153
+ label="Chat",
154
+ type="messages",
155
+ height=400
156
+ )
157
  msg = gr.Textbox(
158
+ label="Message",
159
+ placeholder="Enter your prompt here...",
160
+ lines=3
 
 
 
 
161
  )
162
+ with gr.Row():
163
+ submit = gr.Button("Send", variant="primary")
164
+ clear = gr.Button("Clear")
165
+
166
  with gr.Column(scale=1):
167
+ temperature = gr.Slider(
168
+ minimum=0.1,
169
+ maximum=2.0,
170
+ value=0.8,
171
+ step=0.1,
172
+ label="Temperature",
173
+ info="Higher = more creative"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  )
175
+ gr.Markdown("""
176
+ ### API Usage
177
+ This model accepts any prompt and returns a response.
178
+
179
+ For JSON responses, include instructions in your prompt like:
180
+ - "Return as a JSON array"
181
+ - "Format as JSON"
182
+ - "List as JSON"
183
+
184
+ The model will follow your instructions.
185
+ """)
186
 
187
+ # Set up event handlers
188
+ submit.click(respond, [msg, chatbot, temperature], [chatbot, msg])
189
+ msg.submit(respond, [msg, chatbot, temperature], [chatbot, msg])
190
+ clear.click(lambda: ([], ""), outputs=[chatbot, msg])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  if __name__ == "__main__":
193
  demo.launch(
194
  server_name="0.0.0.0",
195
  server_port=7860,
196
+ share=False
 
197
  )