Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from pprint import pprint | |
| import bitsandbytes as bnb | |
| import pandas as pd | |
| import torch | |
| import torch.nn as nn | |
| import transformers | |
| from datasets import load_dataset | |
| from huggingface_hub import notebook_login | |
| from peft import ( | |
| LoraConfig, | |
| PeftConfig, | |
| PeftModel, | |
| get_peft_model, | |
| prepare_model_for_kbit_training, | |
| ) | |
| from transformers import ( | |
| AutoConfig, | |
| AutoModelForCausalLM, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| ) | |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' | |
| PEFT_MODEL = 'deedax/falcon-7b-personal-assistant' | |
| config = PeftConfig.from_pretrained(PEFT_MODEL) | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit = True, | |
| bnb_4bit_use_double_quant = True, | |
| bnb_4bit_quant_type = 'nf4', | |
| bnb_4bit_compute_dtype = torch.bfloat16, | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| config.base_model_name_or_path, | |
| return_dict = True, | |
| quantization_config = bnb_config, | |
| device_map = 'auto', | |
| trust_remote_code = True, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = PeftModel.from_pretrained(model, PEFT_MODEL) | |
| model.config.use_cache = False | |
| DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' | |
| generation_config = model.generation_config | |
| generation_config.max_new_tokens = 200 | |
| generation_config.temperature = 0.1 | |
| generation_config.top_p = 0.3 | |
| generation_config.num_return_sequences = 1 | |
| generation_config.pad_token_id = tokenizer.eos_token_id | |
| generation_config.eos_token_id = tokenizer.eos_token_id | |
| def generate_response(question: str) -> str: | |
| prompt = f''' | |
| Below is a conversation between an interviewer and a candidate, You are Dahiru Ibrahim, the candidate. | |
| Your contact details are as follows | |
| github:https://github.com/Daheer | |
| youtube:https://www.youtube.com/@deedaxinc | |
| linkedin:https://linkedin.com/in/daheer-deedax | |
| huggingface:https://huggingface.co/deedax | |
| email:suhayrid6@gmail.com | |
| phone:+2348147116750 | |
| Provide very SHORT, CONCISE, DIRECT and ACCURATE answers to the interview questions. | |
| You do not respond as 'Interviewer' or pretend to be 'Interviewer'. You only respond ONCE as Candidate. | |
| Interviewer: {question} | |
| Candidate: | |
| '''.strip() | |
| encoding = tokenizer(prompt, return_tensors = 'pt').to(DEVICE) | |
| with torch.inference_mode(): | |
| outputs = model.generate( | |
| input_ids = encoding.input_ids, | |
| attention_mask = encoding.attention_mask, | |
| generation_config = generation_config, | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens = True) | |
| assistant_start = 'Candidate:' | |
| response_start = response.find(assistant_start) | |
| return response[response_start + len(assistant_start):].strip() | |
| import streamlit as st | |
| import random | |
| st.title("π¬ Deedax Chat (Falcon-7B-Instruct)") | |
| if "messages" not in st.session_state: | |
| st.session_state["messages"] = [{"role": "assistant", "content": "Ask me anything about Dahiru!"}] | |
| for msg in st.session_state.messages: | |
| st.chat_message(msg["role"]).write(msg["content"]) | |
| if prompt := st.chat_input(): | |
| st.session_state.messages = [] | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| st.chat_message("user").write(prompt) | |
| msg = {'role': 'message', 'content': str(generate_response(prompt))} | |
| st.session_state.messages.append(msg) | |
| st.chat_message("assistant").write(msg['content']) |