VirtualInsight commited on
Commit
ddcd52b
·
verified ·
1 Parent(s): e922296

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import gradio as gr
2
  import torch
3
  import json
 
4
  from tokenizers import Tokenizer
5
  from huggingface_hub import hf_hub_download
6
- from ModelArchitecture import Transformer, ModelConfig, generate
7
  from safetensors.torch import load_file
8
- import re
9
 
10
  # -----------------------------
11
  # Load model and tokenizer
@@ -13,7 +13,7 @@ import re
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  REPO_ID = "VirtualInsight/Lumen-Instruct"
15
 
16
- # Download model assets
17
  model_path = hf_hub_download(repo_id=REPO_ID, filename="model.safetensors")
18
  tokenizer_path = hf_hub_download(repo_id=REPO_ID, filename="tokenizer.json")
19
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
@@ -40,12 +40,12 @@ print(f"EOS token ID: {EOS_TOKEN_ID}")
40
  @torch.no_grad()
41
  def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
42
  """
43
- Generates a clean assistant-only response from the Lumen Instruct model.
44
  """
45
- # Chat-style input
46
  formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
47
 
48
- # Tokenize input
49
  input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
50
 
51
  # Generate
@@ -60,29 +60,33 @@ def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
60
  eos_token_id=EOS_TOKEN_ID,
61
  )
62
 
63
- # Decode text
64
  full_text = tokenizer.decode(output[0].tolist())
65
 
66
- # -----------------------------
67
- # Clean assistant-only response
68
- # -----------------------------
69
- # 1. Get part after last assistant marker
70
  if "<|im_start|>assistant" in full_text:
71
  response = full_text.split("<|im_start|>assistant")[-1]
 
72
  else:
73
  response = full_text
74
 
75
- # 2. Cut off at end marker if exists
76
- response = response.split("<|im_end|>")[0]
77
-
78
- # 3. Remove any lingering user/assistant labels or context lines
79
  response = re.sub(r"(?i)\buser\b.*", "", response)
80
  response = re.sub(r"(?i)\bassistant\b.*", "", response)
81
-
82
- # 4. Clean newlines and whitespace
83
  response = response.strip()
84
 
85
- return response
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # -----------------------------
88
  # Gradio Interface
 
1
  import gradio as gr
2
  import torch
3
  import json
4
+ import re
5
  from tokenizers import Tokenizer
6
  from huggingface_hub import hf_hub_download
 
7
  from safetensors.torch import load_file
8
+ from ModelArchitecture import Transformer, ModelConfig, generate
9
 
10
  # -----------------------------
11
  # Load model and tokenizer
 
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  REPO_ID = "VirtualInsight/Lumen-Instruct"
15
 
16
+ # Download model files
17
  model_path = hf_hub_download(repo_id=REPO_ID, filename="model.safetensors")
18
  tokenizer_path = hf_hub_download(repo_id=REPO_ID, filename="tokenizer.json")
19
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
 
40
  @torch.no_grad()
41
  def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
42
  """
43
+ Generates a clean assistant-only response, removing any echoed user text.
44
  """
45
+ # Chat-style prompt
46
  formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
47
 
48
+ # Tokenize
49
  input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
50
 
51
  # Generate
 
60
  eos_token_id=EOS_TOKEN_ID,
61
  )
62
 
63
+ # Decode
64
  full_text = tokenizer.decode(output[0].tolist())
65
 
66
+ # Extract assistant’s section
 
 
 
67
  if "<|im_start|>assistant" in full_text:
68
  response = full_text.split("<|im_start|>assistant")[-1]
69
+ response = response.split("<|im_end|>")[0] if "<|im_end|>" in response else response
70
  else:
71
  response = full_text
72
 
73
+ # Remove leftover role tokens and whitespace
 
 
 
74
  response = re.sub(r"(?i)\buser\b.*", "", response)
75
  response = re.sub(r"(?i)\bassistant\b.*", "", response)
 
 
76
  response = response.strip()
77
 
78
+ # 🧹 Final cleanup: remove leading user echo if present
79
+ lines = [line.strip() for line in response.splitlines() if line.strip()]
80
+ if len(lines) >= 2 and (
81
+ lines[0].lower() == prompt.strip().lower() # exact echo
82
+ or lines[0].rstrip("!?.,").lower() == prompt.strip().rstrip("!?.,").lower() # punctuation variation
83
+ or len(lines[0].split()) <= 3 # very short echo like "Hello!"
84
+ ):
85
+ lines = lines[1:] # drop the first echo line
86
+
87
+ clean_response = "\n".join(lines).strip()
88
+
89
+ return clean_response
90
 
91
  # -----------------------------
92
  # Gradio Interface