eduard76 commited on
Commit
36ed5c2
·
verified ·
1 Parent(s): 04757ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -1,13 +1,33 @@
1
- from transformers import pipeline
 
2
  import gradio as gr
3
 
4
- # Load your HF model (must be on Hub)
5
- pipe = pipeline("text-generation", model="eduard76/Llama3-8b-good", trust_remote_code=True)
6
 
7
- def chat_fn(message, history):
8
- full_input = "\n".join([f"User: {turn[0]}\nAI: {turn[1]}" for turn in history] + [f"User: {message}\nAI:"])
9
- result = pipe(full_input, max_new_tokens=256, do_sample=True, temperature=0.7)
10
- response = result[0]['generated_text'].split("AI:")[-1].strip()
11
- return response
 
 
 
 
12
 
13
- gr.ChatInterface(chat_fn, title="🤖 Chat with Eduard76's LLM").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
  import gradio as gr
4
 
5
+ model_id = "eduard76/Llama3-8b-good-new"
 
6
 
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_id,
10
+ device_map="auto", # automatically uses GPU if available
11
+ torch_dtype=torch.float16,
12
+ load_in_4bit=True,
13
+ trust_remote_code=True
14
+ )
15
+ model.eval()
16
 
17
+ def chat(user_input, history):
18
+ history_text = "\n".join([f"User: {u}\nAI: {a}" for u, a in history])
19
+ prompt = f"{history_text}\nUser: {user_input}\nAI:"
20
+
21
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
22
+ with torch.no_grad():
23
+ outputs = model.generate(
24
+ **inputs,
25
+ max_new_tokens=512,
26
+ do_sample=True,
27
+ temperature=0.001
28
+ )
29
+ generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+ answer = generated.split("AI:")[-1].strip()
31
+ return answer
32
+
33
+ gr.ChatInterface(chat, title="💬 Chat with first Eduard LLM").launch()