Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from accelerate import disk_offload | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| ) | |
| import torch | |
| def invoke(input_text, model, tokenizer): | |
| instruction = """You are a top-rated merchandising agent. | |
| Be polite to customers and answer all their questions. | |
| """ | |
| messages = [{"role": "system", "content": instruction}, | |
| {"role": "user", "content": input_text}] | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True) | |
| outputs = model.generate(**inputs, max_new_tokens=500, num_return_sequences=1) | |
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| text = text.split("assistant")[1] | |
| return text | |
| def get_device_map() -> str: | |
| return 'cuda' if torch.cuda.is_available() else 'cpu' | |
| device = get_device_map() # 'cpu' | |
| # HuggingFace repository ID | |
| run_name = "Llama-3.2-Merchandiser" | |
| repo_id = f"NiazTahi/{run_name}" | |
| model = AutoModelForCausalLM.from_pretrained(repo_id, torch_dtype=torch.float16, device_map=device, low_cpu_mem_usage = True) | |
| disk_offload(model=model, offload_dir="offload") | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True, legacy=False) | |
| st.title("Merchandiser App") | |
| selected_task = st.sidebar.selectbox("Select NLP Task:", ["Chat with AI"]) | |
| input_text = st.text_area("Enter Text:") | |
| if st.button("Process"): | |
| if selected_task == "Chat with AI" and input_text: | |
| st.subheader("Generated Text:") | |
| result = invoke(input_text, model, tokenizer) | |
| st.write(result) | |
| else: | |
| st.info("Please enter text and select a task from the sidebar.") | |