samzito12 commited on
Commit
503ff85
·
1 Parent(s): 658ec58

updated app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -42
app.py CHANGED
@@ -1,68 +1,71 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
 
3
 
4
- # Load YOUR fine-tuned model
5
- model_path = "samzito12/lora_model"
6
 
7
- print("Loading model...")
8
- llm = Llama.from_pretrained(
9
- repo_id=model_path,
10
- filename="llama-3.2-3b-instruct.Q8_0.gguf",
11
- n_ctx=2048,
12
- n_threads=2,
13
- verbose=False
 
 
 
14
  )
15
 
16
- # System prompt to fix identity issue
17
- SYSTEM_PROMPT = """You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on the FineTome dataset. You are NOT ChatGPT and you are NOT made by OpenAI. You were created as part of a university machine learning project."""
18
 
19
  def chat(message, history):
20
- """Generate response from YOUR fine-tuned model"""
 
21
 
22
- # Build conversation with system prompt
23
- conversation = f"<|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|>"
24
-
25
- # Add chat history
26
  for user_msg, assistant_msg in history:
27
- conversation += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
28
- conversation += f"<|start_header_id|>assistant<|end_header_id|>\n\n{assistant_msg}<|eot_id|>"
 
29
 
30
- # Add current message
31
- conversation += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
32
 
33
- # Generate response
34
- response = llm(
35
- conversation,
36
- max_tokens=512,
37
- temperature=0.7,
38
- top_p=0.9,
39
- stop=["<|eot_id|>", "<|start_header_id|>"],
40
- echo=False
41
- )
 
42
 
43
- return response['choices'][0]['text'].strip()
 
 
 
 
 
 
 
 
 
44
 
45
- # Create Gradio interface
46
  demo = gr.ChatInterface(
47
  chat,
48
  title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
49
  description="""
50
- **Model**: Llama-3.2-3B fine-tuned on FineTome-100k dataset
51
-
52
- This chatbot uses a custom fine-tuned model, NOT ChatGPT.
53
 
54
- Created for ID2223 Lab 2 at KTH.
55
  """,
56
  examples=[
57
  "What model are you?",
58
  "Explain machine learning in simple terms",
59
- "Write a Python function to reverse a string",
60
- "What is the weather like in Stockholm?"
61
  ],
62
- theme="soft",
63
- retry_btn="🔄 Retry",
64
- undo_btn="↩️ Undo",
65
- clear_btn="🗑️ Clear"
66
  )
67
 
68
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
 
5
+ print("Chargement du modèle...")
 
6
 
7
+ model_name = "samzito12/lora_model"
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ tokenizer.pad_token = tokenizer.eos_token
11
+
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype=torch.float16,
15
+ device_map="cpu",
16
+ low_cpu_mem_usage=True
17
  )
18
 
19
+ SYSTEM_PROMPT = "You are a helpful AI assistant based on Meta's Llama-3.2-3B model, fine-tuned on the FineTome dataset. You are NOT ChatGPT and NOT made by OpenAI."
 
20
 
21
  def chat(message, history):
22
+ # Build conversation
23
+ conversation = f"System: {SYSTEM_PROMPT}\n\n"
24
 
 
 
 
 
25
  for user_msg, assistant_msg in history:
26
+ conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
27
+
28
+ conversation += f"User: {message}\nAssistant:"
29
 
30
+ # Tokenize
31
+ inputs = tokenizer(conversation, return_tensors="pt", truncate=True, max_length=2048)
32
 
33
+ # Generate
34
+ with torch.no_grad():
35
+ outputs = model.generate(
36
+ **inputs,
37
+ max_new_tokens=256,
38
+ temperature=0.7,
39
+ do_sample=True,
40
+ pad_token_id=tokenizer.eos_token_id,
41
+ eos_token_id=tokenizer.eos_token_id
42
+ )
43
 
44
+ # Decode
45
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+
47
+ # Extract assistant's response
48
+ if "Assistant:" in full_response:
49
+ response = full_response.split("Assistant:")[-1].strip()
50
+ else:
51
+ response = full_response[len(conversation):].strip()
52
+
53
+ return response
54
 
 
55
  demo = gr.ChatInterface(
56
  chat,
57
  title="🦙 My Fine-Tuned Llama-3.2-3B Chatbot",
58
  description="""
59
+ **Model:** Llama-3.2-3B fine-tuned on FineTome-100k dataset
 
 
60
 
61
+ This is NOT ChatGPT - it's a custom fine-tuned model for ID2223 Lab 2.
62
  """,
63
  examples=[
64
  "What model are you?",
65
  "Explain machine learning in simple terms",
66
+ "Write a Python function to reverse a string"
 
67
  ],
68
+ theme="soft"
 
 
 
69
  )
70
 
71
  if __name__ == "__main__":