FlameF0X commited on
Commit
ca534e3
·
verified ·
1 Parent(s): a873184

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -48
app.py CHANGED
@@ -371,14 +371,16 @@ class ModelLoader:
371
  loader = ModelLoader()
372
  model, tokenizer = loader.load()
373
 
374
- def generate_response(message, history, temperature, top_k, top_p, max_tokens):
375
- """Generate response with streaming."""
376
- # Encode the message
377
- input_ids = tokenizer.encode(message).ids
378
  input_tensor = torch.tensor([input_ids], dtype=torch.long, device=loader.device)
379
 
 
 
 
380
  # Generate with streaming
381
- response = ""
382
  for token_id in model.generate_stream(
383
  input_tensor,
384
  max_new_tokens=max_tokens,
@@ -387,33 +389,45 @@ def generate_response(message, history, temperature, top_k, top_p, max_tokens):
387
  top_p=top_p
388
  ):
389
  token_text = tokenizer.decode([token_id])
390
- response += token_text
391
- yield response
 
 
 
 
 
 
 
 
 
392
 
393
  # Create Gradio interface
394
- with gr.Blocks(title="i3-4096ctx Model", theme=gr.themes.Soft()) as demo:
395
  gr.Markdown("""
396
- # 🚀 i3-4096ctx Language Model
 
 
397
 
398
- A hybrid RWKV-Attention model with latent context compression, supporting up to 4096 tokens of context.
399
  """)
400
 
401
  with gr.Row():
402
- with gr.Column(scale=3):
403
- chatbot = gr.Chatbot(
404
- height=500,
405
- label="Chat"
 
406
  )
407
 
408
- msg = gr.Textbox(
409
- label="Your message",
410
- placeholder="Type your message here...",
411
- lines=3
412
  )
413
 
414
  with gr.Row():
415
- submit = gr.Button("Send", variant="primary")
416
- clear = gr.Button("Clear")
417
 
418
  with gr.Column(scale=1):
419
  gr.Markdown("### Generation Settings")
@@ -424,7 +438,7 @@ with gr.Blocks(title="i3-4096ctx Model", theme=gr.themes.Soft()) as demo:
424
  value=0.8,
425
  step=0.1,
426
  label="Temperature",
427
- info="Higher = more creative"
428
  )
429
 
430
  top_k = gr.Slider(
@@ -450,45 +464,45 @@ with gr.Blocks(title="i3-4096ctx Model", theme=gr.themes.Soft()) as demo:
450
  maximum=500,
451
  value=200,
452
  step=10,
453
- label="Max tokens",
454
- info="Maximum response length"
455
  )
456
 
457
  gr.Markdown("""
458
  ### Model Info
 
459
  - **Architecture**: Hybrid RWKV-Attention
460
  - **Context**: 4096 tokens (compressed)
461
- - **Kernel**: 512 tokens
462
- - **Compression**: 32 latent tokens per chunk
 
 
 
 
 
 
463
  """)
464
 
465
- def user(user_message, history):
466
- return "", history + [[user_message, None]]
467
-
468
- def bot(history, temperature, top_k, top_p, max_tokens):
469
- user_message = history[-1][0]
470
-
471
- # Generate response with streaming
472
- for response in generate_response(
473
- user_message,
474
- history[:-1],
475
- temperature,
476
- top_k,
477
- top_p,
478
- max_tokens
479
- ):
480
- history[-1][1] = response
481
- yield history
482
-
483
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
484
- bot, [chatbot, temperature, top_k, top_p, max_tokens], chatbot
485
  )
486
 
487
- submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
488
- bot, [chatbot, temperature, top_k, top_p, max_tokens], chatbot
 
 
489
  )
490
 
491
- clear.click(lambda: None, None, chatbot, queue=False)
 
 
 
 
492
 
493
  # Launch
494
  if __name__ == "__main__":
 
371
  loader = ModelLoader()
372
  model, tokenizer = loader.load()
373
 
374
+ def generate_text(prompt, temperature, top_k, top_p, max_tokens):
375
+ """Generate text completion with streaming."""
376
+ # Encode the prompt
377
+ input_ids = tokenizer.encode(prompt).ids
378
  input_tensor = torch.tensor([input_ids], dtype=torch.long, device=loader.device)
379
 
380
+ # Start with the prompt
381
+ output_text = prompt
382
+
383
  # Generate with streaming
 
384
  for token_id in model.generate_stream(
385
  input_tensor,
386
  max_new_tokens=max_tokens,
 
389
  top_p=top_p
390
  ):
391
  token_text = tokenizer.decode([token_id])
392
+ output_text += token_text
393
+ yield output_text
394
+
395
+ # Example prompts
396
+ examples = [
397
+ ["The future of artificial intelligence is", 0.8, 50, 0.9, 200],
398
+ ["In a world where technology has advanced beyond our wildest dreams,", 0.9, 40, 0.95, 300],
399
+ ["The key principles of quantum mechanics include", 0.7, 50, 0.9, 250],
400
+ ["Once upon a time in a distant galaxy,", 1.0, 50, 0.95, 200],
401
+ ["The most important factors in climate change are", 0.7, 50, 0.9, 200],
402
+ ]
403
 
404
  # Create Gradio interface
405
+ with gr.Blocks(title="i3-4096ctx Text Completion", theme=gr.themes.Soft()) as demo:
406
  gr.Markdown("""
407
+ # 🚀 i3-4096ctx Language Model - Text Completion
408
+
409
+ A hybrid RWKV-Attention pre-trained model with latent context compression, supporting up to 4096 tokens of context.
410
 
411
+ **Note**: This is a pre-trained base model, not an instruction-tuned chat model. It performs **text completion** - give it a prompt and it will continue the text.
412
  """)
413
 
414
  with gr.Row():
415
+ with gr.Column(scale=2):
416
+ prompt_input = gr.Textbox(
417
+ label="Prompt",
418
+ placeholder="Enter your prompt here... The model will continue from where you leave off.",
419
+ lines=5
420
  )
421
 
422
+ output_text = gr.Textbox(
423
+ label="Generated Text",
424
+ lines=15,
425
+ interactive=False
426
  )
427
 
428
  with gr.Row():
429
+ generate_btn = gr.Button("Generate", variant="primary", scale=2)
430
+ clear_btn = gr.Button("Clear", scale=1)
431
 
432
  with gr.Column(scale=1):
433
  gr.Markdown("### Generation Settings")
 
438
  value=0.8,
439
  step=0.1,
440
  label="Temperature",
441
+ info="Higher = more creative, random"
442
  )
443
 
444
  top_k = gr.Slider(
 
464
  maximum=500,
465
  value=200,
466
  step=10,
467
+ label="Max new tokens",
468
+ info="Maximum length to generate"
469
  )
470
 
471
  gr.Markdown("""
472
  ### Model Info
473
+ - **Type**: Pre-trained base model
474
  - **Architecture**: Hybrid RWKV-Attention
475
  - **Context**: 4096 tokens (compressed)
476
+ - **Kernel**: 512 tokens direct
477
+ - **Compression**: 32 latent tokens/chunk
478
+
479
+ ### Tips for Better Results
480
+ - Start with a clear, specific prompt
481
+ - Lower temperature (0.5-0.8) for factual text
482
+ - Higher temperature (0.9-1.2) for creative writing
483
+ - Adjust top-k and top-p for diversity control
484
  """)
485
 
486
+ gr.Markdown("### Example Prompts")
487
+ gr.Examples(
488
+ examples=examples,
489
+ inputs=[prompt_input, temperature, top_k, top_p, max_tokens],
490
+ outputs=output_text,
491
+ fn=generate_text,
492
+ cache_examples=False
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  )
494
 
495
+ generate_btn.click(
496
+ fn=generate_text,
497
+ inputs=[prompt_input, temperature, top_k, top_p, max_tokens],
498
+ outputs=output_text
499
  )
500
 
501
+ clear_btn.click(
502
+ fn=lambda: ("", ""),
503
+ inputs=None,
504
+ outputs=[prompt_input, output_text]
505
+ )
506
 
507
  # Launch
508
  if __name__ == "__main__":