from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr model_id = "eduard76/Llama3-8b-good-new" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) model.eval() # Lista de topicuri acoperite covered_topics = { "ospf", "bgp", "eigrp", "vxlan", "evpn", "network design", "acl", "routing", "spine", "leaf", "underlay", "overlay", "mpls", "qos", "firewall", "vpn", "vlan", "subnet", "cidr" } # Funcția principală de chat def chat(user_input): prompt = f"""### Human: {user_input}\n### Assistant:""" input_ids = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **input_ids, max_new_tokens=256, do_sample=True, temperature=0.7, repetition_penalty=1.2, no_repeat_ngram_size=5, top_k=50, top_p=0.9 ) response = tokenizer.decode(output[0], skip_special_tokens=True) # Scoate promptul inițial din răspuns if "### Assistant:" in response: response = response.split("### Assistant:")[-1].strip() return response # Interfață Gradio iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Eduard's Virtual Architect – LLaMA3 Fine-Tuned") if __name__ == "__main__": iface.launch()