eduard76 commited on
Commit
0ebb43f
·
verified ·
1 Parent(s): 5754868

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -28
app.py CHANGED
@@ -1,49 +1,54 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
  import torch
3
  import gradio as gr
4
 
5
  model_id = "eduard76/Llama3-8b-good-new"
6
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
8
-
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
- device_map="auto", # poate fi "cuda:0" sau "cpu" dacă ai eroare
12
- torch_dtype=torch.float16, # sau .bfloat16 dacă vrei
13
  trust_remote_code=True
14
  )
 
15
 
16
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
17
-
18
  covered_topics = {
19
  "ospf", "bgp", "eigrp", "vxlan", "evpn", "network design", "acl", "routing",
20
  "spine", "leaf", "underlay", "overlay", "mpls", "qos", "firewall",
21
  "vpn", "vlan", "subnet", "cidr"
22
  }
23
 
 
24
  def chat(user_input):
25
- prompt = f"""You are a Cisco-certified network architect trained in OSPF, BGP, EIGRP, VLAN, STP, RSTP design principles.
26
- If the user's question is unclear, clarify first.
27
- If the topic is outside OSPF, BGP, EIGRP, VLAN, STP, RSTP, respond with: "I'm not trained on that topic."
28
- Give short, clear, non-repetitive answers.
29
-
30
- User: {user_input}
31
- AI:"""
32
-
33
- response = pipe(
34
- prompt,
35
- max_new_tokens=256,
36
- do_sample=False,
37
- temperature=0.0,
38
- repetition_penalty=1.2,
39
- no_repeat_ngram_size=5,
40
- top_k=50,
41
- top_p=0.9
42
- )[0]["generated_text"]
43
-
44
- return response[len(prompt):].strip()
45
-
46
- iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Eduard's 1st virtual Architect")
 
 
 
 
47
 
48
  if __name__ == "__main__":
49
  iface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import gradio as gr
4
 
5
  model_id = "eduard76/Llama3-8b-good-new"
6
 
7
+ # Load tokenizer and model
8
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
+ device_map="auto",
12
+ torch_dtype=torch.float16,
13
  trust_remote_code=True
14
  )
15
+ model.eval()
16
 
17
+ # Lista de topicuri acoperite
 
18
  covered_topics = {
19
  "ospf", "bgp", "eigrp", "vxlan", "evpn", "network design", "acl", "routing",
20
  "spine", "leaf", "underlay", "overlay", "mpls", "qos", "firewall",
21
  "vpn", "vlan", "subnet", "cidr"
22
  }
23
 
24
+ # Funcția principală de chat
25
  def chat(user_input):
26
+ prompt = f"""### Human: {user_input}\n### Assistant:"""
27
+
28
+ input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
29
+
30
+ with torch.no_grad():
31
+ output = model.generate(
32
+ **input_ids,
33
+ max_new_tokens=256,
34
+ do_sample=False,
35
+ temperature=0.0,
36
+ repetition_penalty=1.2,
37
+ no_repeat_ngram_size=5,
38
+ top_k=50,
39
+ top_p=0.9
40
+ )
41
+
42
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
43
+
44
+ # Scoate promptul inițial din răspuns
45
+ if "### Assistant:" in response:
46
+ response = response.split("### Assistant:")[-1].strip()
47
+
48
+ return response
49
+
50
+ # Interfață Gradio
51
+ iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Eduard's Virtual Architect – LLaMA3 Fine-Tuned")
52
 
53
  if __name__ == "__main__":
54
  iface.launch()