walidsobhie-code commited on
Commit
7a8afa9
·
1 Parent(s): 4ca507e

Add interactive chat script with improved generation settings

Browse files
Files changed (1) hide show
  1. chat.py +56 -0
chat.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ print("Loading your fine-tuned Stack 2.9 model...")
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ '/Users/walidsobhi/stack-2-9-final-model',
7
+ torch_dtype=torch.float16,
8
+ device_map='auto'
9
+ )
10
+ tokenizer = AutoTokenizer.from_pretrained('/Users/walidsobhi/stack-2-9-final-model')
11
+ print("✅ Ready!\n")
12
+
13
+ # Generation settings
14
+ MAX_TOKENS = 150
15
+ TEMPERATURE = 0.3
16
+ TOP_P = 0.9
17
+ REP_PENALTY = 1.2
18
+
19
+ print(f"Settings: max_tokens={MAX_TOKENS}, temperature={TEMPERATURE}, top_p={TOP_P}\n")
20
+
21
+ # Interactive loop
22
+ while True:
23
+ try:
24
+ prompt = input("You: ")
25
+ if prompt.lower() in ['quit', 'exit', 'q']:
26
+ break
27
+ if not prompt.strip():
28
+ continue
29
+
30
+ inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
31
+ outputs = model.generate(
32
+ **inputs,
33
+ max_new_tokens=MAX_TOKENS,
34
+ temperature=TEMPERATURE,
35
+ top_p=TOP_P,
36
+ repetition_penalty=REP_PENALTY,
37
+ do_sample=True,
38
+ pad_token_id=tokenizer.eos_token_id
39
+ )
40
+
41
+ # Extract only the new tokens (skip the prompt)
42
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+ response = full_response[len(prompt):].strip()
44
+
45
+ # Stop at common stop points
46
+ for stop in ['\n\n\n', 'You:', 'AI:', 'User:', 'Assistant:']:
47
+ if stop in response:
48
+ response = response.split(stop)[0].strip()
49
+
50
+ print(f"AI: {response}\n")
51
+
52
+ except KeyboardInterrupt:
53
+ print("\nExiting...")
54
+ break
55
+
56
+ print("Goodbye!")