walidsobhie-code commited on
Commit
2aa22b3
·
1 Parent(s): 7a8afa9

Improve chat.py with system prompt and User/Assistant format

Browse files
Files changed (1) hide show
  1. chat.py +19 -6
chat.py CHANGED
@@ -1,6 +1,12 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
 
 
 
 
 
 
4
  print("Loading your fine-tuned Stack 2.9 model...")
5
  model = AutoModelForCausalLM.from_pretrained(
6
  '/Users/walidsobhi/stack-2-9-final-model',
@@ -11,8 +17,8 @@ tokenizer = AutoTokenizer.from_pretrained('/Users/walidsobhi/stack-2-9-final-mod
11
  print("✅ Ready!\n")
12
 
13
  # Generation settings
14
- MAX_TOKENS = 150
15
- TEMPERATURE = 0.3
16
  TOP_P = 0.9
17
  REP_PENALTY = 1.2
18
 
@@ -27,7 +33,9 @@ while True:
27
  if not prompt.strip():
28
  continue
29
 
30
- inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
 
 
31
  outputs = model.generate(
32
  **inputs,
33
  max_new_tokens=MAX_TOKENS,
@@ -38,12 +46,17 @@ while True:
38
  pad_token_id=tokenizer.eos_token_id
39
  )
40
 
41
- # Extract only the new tokens (skip the prompt)
42
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
- response = full_response[len(prompt):].strip()
 
 
 
 
 
44
 
45
  # Stop at common stop points
46
- for stop in ['\n\n\n', 'You:', 'AI:', 'User:', 'Assistant:']:
47
  if stop in response:
48
  response = response.split(stop)[0].strip()
49
 
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
+ SYSTEM_PROMPT = """You are Stack 2.9, an expert AI coding assistant.
5
+ - Answer questions naturally and helpfully
6
+ - When the user asks for code, write clean complete code
7
+ - When the user asks a question, answer in plain language
8
+ - Be concise and practical"""
9
+
10
  print("Loading your fine-tuned Stack 2.9 model...")
11
  model = AutoModelForCausalLM.from_pretrained(
12
  '/Users/walidsobhi/stack-2-9-final-model',
 
17
  print("✅ Ready!\n")
18
 
19
  # Generation settings
20
+ MAX_TOKENS = 200
21
+ TEMPERATURE = 0.4
22
  TOP_P = 0.9
23
  REP_PENALTY = 1.2
24
 
 
33
  if not prompt.strip():
34
  continue
35
 
36
+ # Prepend system prompt
37
+ full_prompt = f"{SYSTEM_PROMPT}\n\nUser: {prompt}\nAssistant:"
38
+ inputs = tokenizer(full_prompt, return_tensors='pt').to(model.device)
39
  outputs = model.generate(
40
  **inputs,
41
  max_new_tokens=MAX_TOKENS,
 
46
  pad_token_id=tokenizer.eos_token_id
47
  )
48
 
49
+ # Decode full response
50
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+
52
+ # Extract only the assistant's response (after "Assistant:")
53
+ if "Assistant:" in full_response:
54
+ response = full_response.split("Assistant:")[-1].strip()
55
+ else:
56
+ response = full_response[len(full_prompt):].strip()
57
 
58
  # Stop at common stop points
59
+ for stop in ['\n\n\n', 'User:', 'You:']:
60
  if stop in response:
61
  response = response.split(stop)[0].strip()
62