Spaces:
Sleeping
Sleeping
| from peft import AutoPeftModelForCausalLM | |
| from transformers import GenerationConfig | |
| from transformers import AutoTokenizer, GPTQConfig | |
| import torch | |
| import streamlit as st | |
| # model = AutoModelForCausalLM.from_pretrained( | |
| # "tiiuae/falcon-7b-instruct", | |
| # torch_dtype=torch.bfloat16, | |
| # trust_remote_code=True, | |
| # device_map="auto", | |
| # low_cpu_mem_usage=True, | |
| # ) | |
| gptq_config = GPTQConfig(bits=4, disable_exllama=True) | |
| model = AutoPeftModelForCausalLM.from_pretrained( | |
| "Aneeth/zephyr_10k", | |
| return_dict=True, | |
| torch_dtype=torch.float32, | |
| trust_remote_code=True, | |
| quantization_config = gptq_config | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained("Aneeth/zephyr_10k") | |
| generation_config = GenerationConfig( | |
| do_sample=True, | |
| top_k=1, | |
| temperature=0.5, | |
| max_new_tokens=5000, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| def process_data_sample(example): | |
| processed_example = "<|system|>\n Generate an authentic job description using the given input.\n<|user|>\n" + example["instruction"] + "\n<|assistant|>\n" | |
| return processed_example | |
| def generate_text(prompt): | |
| inp_str = process_data_sample( | |
| { | |
| "instruction": prompt, | |
| } | |
| ) | |
| inputs = tokenizer(inp_str, return_tensors="pt").to("cpu") | |
| outputs = model.generate(**inputs, generation_config=generation_config) | |
| response=tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| def main(): | |
| st.title("Zephyr Inference") | |
| # Get input from user | |
| input_text = st.text_area("Input JD prompt", "Type here...") | |
| # Generate text on button click | |
| if st.button("Generate Text"): | |
| generated_text = generate_text(input_text) | |
| st.subheader("Generated Text:") | |
| st.write(generated_text) | |
| if __name__ == "__main__": | |
| main() |