VirtualInsight commited on
Commit
e922296
·
verified ·
1 Parent(s): 28bae52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -5,6 +5,7 @@ from tokenizers import Tokenizer
5
  from huggingface_hub import hf_hub_download
6
  from ModelArchitecture import Transformer, ModelConfig, generate
7
  from safetensors.torch import load_file
 
8
 
9
  # -----------------------------
10
  # Load model and tokenizer
@@ -12,7 +13,7 @@ from safetensors.torch import load_file
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
  REPO_ID = "VirtualInsight/Lumen-Instruct"
14
 
15
- # Download model assets from Hugging Face Hub
16
  model_path = hf_hub_download(repo_id=REPO_ID, filename="model.safetensors")
17
  tokenizer_path = hf_hub_download(repo_id=REPO_ID, filename="tokenizer.json")
18
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
@@ -27,7 +28,7 @@ model.load_state_dict(load_file(model_path, device=str(device)), strict=False)
27
  model.eval()
28
 
29
  # -----------------------------
30
- # Special Tokens for Chat Format
31
  # -----------------------------
32
  EOS_TOKEN = "<|im_end|>"
33
  EOS_TOKEN_ID = tokenizer.encode(EOS_TOKEN).ids[0]
@@ -41,13 +42,13 @@ def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
41
  """
42
  Generates a clean assistant-only response from the Lumen Instruct model.
43
  """
44
- # Format input as a conversation prompt
45
  formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
46
 
47
  # Tokenize input
48
  input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
49
 
50
- # Generate output
51
  output = generate(
52
  model,
53
  input_ids,
@@ -59,18 +60,27 @@ def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
59
  eos_token_id=EOS_TOKEN_ID,
60
  )
61
 
62
- # Decode full text
63
  full_text = tokenizer.decode(output[0].tolist())
64
 
65
- # 🧹 Clean extraction of assistant’s reply only
 
 
 
66
  if "<|im_start|>assistant" in full_text:
67
  response = full_text.split("<|im_start|>assistant")[-1]
68
- response = response.split("<|im_end|>")[0] if "<|im_end|>" in response else response
69
  else:
70
  response = full_text
71
 
72
- # Remove potential leftover role tokens and clean spaces
73
- response = response.replace("assistant", "").replace("user", "").strip()
 
 
 
 
 
 
 
74
 
75
  return response
76
 
@@ -91,7 +101,7 @@ demo = gr.Interface(
91
  )
92
 
93
  # -----------------------------
94
- # Launch Interface
95
  # -----------------------------
96
  if __name__ == "__main__":
97
  demo.launch(share=True)
 
5
  from huggingface_hub import hf_hub_download
6
  from ModelArchitecture import Transformer, ModelConfig, generate
7
  from safetensors.torch import load_file
8
+ import re
9
 
10
  # -----------------------------
11
  # Load model and tokenizer
 
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  REPO_ID = "VirtualInsight/Lumen-Instruct"
15
 
16
+ # Download model assets
17
  model_path = hf_hub_download(repo_id=REPO_ID, filename="model.safetensors")
18
  tokenizer_path = hf_hub_download(repo_id=REPO_ID, filename="tokenizer.json")
19
  config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
 
28
  model.eval()
29
 
30
  # -----------------------------
31
+ # Special Tokens
32
  # -----------------------------
33
  EOS_TOKEN = "<|im_end|>"
34
  EOS_TOKEN_ID = tokenizer.encode(EOS_TOKEN).ids[0]
 
42
  """
43
  Generates a clean assistant-only response from the Lumen Instruct model.
44
  """
45
+ # Chat-style input
46
  formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
47
 
48
  # Tokenize input
49
  input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
50
 
51
+ # Generate
52
  output = generate(
53
  model,
54
  input_ids,
 
60
  eos_token_id=EOS_TOKEN_ID,
61
  )
62
 
63
+ # Decode text
64
  full_text = tokenizer.decode(output[0].tolist())
65
 
66
+ # -----------------------------
67
+ # Clean assistant-only response
68
+ # -----------------------------
69
+ # 1. Get part after last assistant marker
70
  if "<|im_start|>assistant" in full_text:
71
  response = full_text.split("<|im_start|>assistant")[-1]
 
72
  else:
73
  response = full_text
74
 
75
+ # 2. Cut off at end marker if exists
76
+ response = response.split("<|im_end|>")[0]
77
+
78
+ # 3. Remove any lingering user/assistant labels or context lines
79
+ response = re.sub(r"(?i)\buser\b.*", "", response)
80
+ response = re.sub(r"(?i)\bassistant\b.*", "", response)
81
+
82
+ # 4. Clean newlines and whitespace
83
+ response = response.strip()
84
 
85
  return response
86
 
 
101
  )
102
 
103
  # -----------------------------
104
+ # Launch
105
  # -----------------------------
106
  if __name__ == "__main__":
107
  demo.launch(share=True)