| import json |
| import os |
| from pprint import pprint |
| import bitsandbytes as bnb |
| import pandas as pd |
| import torch |
| import torch.nn as nn |
| import transformers |
| from datasets import load_dataset |
| from huggingface_hub import notebook_login |
| from peft import ( |
| LoraConfig, |
| PeftConfig, |
| PeftModel, |
| get_peft_model, |
| prepare_model_for_kbit_training, |
| ) |
| from transformers import ( |
| AutoConfig, |
| AutoModelForCausalLM, |
| AutoTokenizer, |
| BitsAndBytesConfig, |
| ) |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
| PEFT_MODEL = 'deedax/falcon-7b-personal-assistant' |
|
|
| config = PeftConfig.from_pretrained(PEFT_MODEL) |
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit = True, |
| bnb_4bit_use_double_quant = True, |
| bnb_4bit_quant_type = 'nf4', |
| bnb_4bit_compute_dtype = torch.bfloat16, |
| ) |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| config.base_model_name_or_path, |
| return_dict = True, |
| quantization_config = bnb_config, |
| device_map = 'auto', |
| trust_remote_code = True, |
| ) |
|
|
| tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) |
| tokenizer.pad_token = tokenizer.eos_token |
|
|
| model = PeftModel.from_pretrained(model, PEFT_MODEL) |
| model.config.use_cache = False |
|
|
| DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' |
|
|
| generation_config = model.generation_config |
| generation_config.max_new_tokens = 200 |
| generation_config.temperature = 0.1 |
| generation_config.top_p = 0.3 |
| generation_config.num_return_sequences = 1 |
| generation_config.pad_token_id = tokenizer.eos_token_id |
| generation_config.eos_token_id = tokenizer.eos_token_id |
|
|
| def generate_response(question: str) -> str: |
| prompt = f''' |
| Below is a conversation between an interviewer and a candidate, You are Dahiru Ibrahim, the candidate. |
| Your contact details are as follows |
| github:https://github.com/Daheer |
| youtube:https://www.youtube.com/@deedaxinc |
| linkedin:https://linkedin.com/in/daheer-deedax |
| huggingface:https://huggingface.co/deedax |
| email:suhayrid6@gmail.com |
| phone:+2348147116750 |
| Provide very SHORT, CONCISE, DIRECT and ACCURATE answers to the interview questions. |
| You do not respond as 'Interviewer' or pretend to be 'Interviewer'. You only respond ONCE as Candidate. |
| Interviewer: {question} |
| Candidate: |
| '''.strip() |
| encoding = tokenizer(prompt, return_tensors = 'pt').to(DEVICE) |
| with torch.inference_mode(): |
| outputs = model.generate( |
| input_ids = encoding.input_ids, |
| attention_mask = encoding.attention_mask, |
| generation_config = generation_config, |
| ) |
|
|
| response = tokenizer.decode(outputs[0], skip_special_tokens = True) |
|
|
| assistant_start = 'Candidate:' |
| response_start = response.find(assistant_start) |
| return response[response_start + len(assistant_start):].strip() |
|
|
| import streamlit as st |
| import random |
|
|
| st.title("💬 Deedax Chat (Falcon-7B-Instruct)") |
| if "messages" not in st.session_state: |
| st.session_state["messages"] = [{"role": "assistant", "content": "Ask me anything about Dahiru!"}] |
|
|
| for msg in st.session_state.messages: |
| st.chat_message(msg["role"]).write(msg["content"]) |
|
|
| if prompt := st.chat_input(): |
|
|
| st.session_state.messages = [] |
| st.session_state.messages.append({"role": "user", "content": prompt}) |
| st.chat_message("user").write(prompt) |
| msg = {'role': 'message', 'content': str(generate_response(prompt))} |
| st.session_state.messages.append(msg) |
| st.chat_message("assistant").write(msg['content']) |