Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| from string import Template | |
| import streamlit as st | |
| import base64 | |
| from datasets import load_dataset | |
| from datasets import Dataset | |
| import torch | |
| from tqdm import tqdm | |
| from peft import LoraConfig, get_peft_model | |
| import transformers | |
| # from transformers import AutoModelForCausalLM, AdapterConfig | |
| from transformers import AutoConfig,AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer | |
| from transformers import TrainingArguments | |
| from peft import LoraConfig | |
| from peft import * | |
| from trl import SFTTrainer, DataCollatorForCompletionOnlyLM | |
| from langchain.prompts import PromptTemplate | |
| from IPython.display import Markdown, display | |
| peft_model_id = "./" | |
| config = PeftConfig.from_pretrained(peft_model_id) | |
| quantization_config = BitsAndBytesConfig( | |
| llm_int8_enable_fp32_cpu_offload=True, # Enable offloading to CPU in float32 precision | |
| load_in_8bit_fp32_cpu_offload=True, | |
| bnb_8bit_use_fp16=False, | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| device_map = { | |
| "transformer.word_embeddings": "cpu", | |
| "transformer.word_embeddings_layernorm": "cpu", | |
| "lm_head": "cpu", | |
| "transformer.h": "cpu", | |
| "transformer.ln_f": "cpu", | |
| } | |
| model = AutoModelForCausalLM.from_pretrained( | |
| config.base_model_name_or_path, | |
| return_dict=True, | |
| quantization_config=quantization_config, | |
| device_map=device_map, | |
| trust_remote_code=True, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model = PeftModel.from_pretrained(model, peft_model_id) | |
| prompt_template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E] \ | |
| in order of the most likely to be correct to the least likely to be correct.' | |
| Question: {prompt}\n | |
| A) {a}\n | |
| B) {b}\n | |
| C) {c}\n | |
| D) {d}\n | |
| E) {e}\n | |
| Answer: """ | |
| prompt = PromptTemplate(template=prompt_template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e']) | |
| def format_text_to_prompt(example): | |
| ans = prompt.format(prompt=example['prompt'], | |
| a=example['A'], | |
| b=example['B'], | |
| c=example['C'], | |
| d=example['D'], | |
| e=example['E']) | |
| return {"ans": ans} | |
| def get_ans(text): | |
| inputs = tokenizer(text, return_tensors='pt') | |
| logits = model(input_ids=inputs['input_ids'].cuda(), attention_mask=inputs['attention_mask'].cuda()).logits[0, -1] | |
| # Create a list of tuples having (logit, 'option') format | |
| options_list = [(logits[tokenizer(' A').input_ids[-1]], 'A'), (logits[tokenizer(' B').input_ids[-1]], 'B'), (logits[tokenizer(' C').input_ids[-1]], 'C'), (logits[tokenizer(' D').input_ids[-1]], 'D'), (logits[tokenizer(' E').input_ids[-1]], 'E')] | |
| options_list = sorted(options_list, reverse=True) | |
| ans_list = [] | |
| for i in range(3): | |
| ans_list.append(options_list[i][1]) | |
| return ans_list | |
| def get_base64_of_bin_file(bin_file): | |
| with open(bin_file, 'rb') as f: | |
| data = f.read() | |
| return base64.b64encode(data).decode() | |
| def set_png_as_page_bg(png_file): | |
| img = get_base64_of_bin_file(png_file) | |
| page_bg_img = f""" | |
| <style> | |
| [data-testid="stAppViewContainer"] > .main {{ | |
| background-image: url("https://www.tata.com/content/dam/tata/images/verticals/desktop/banner_travel_umaidbhavan_desktop_1920x1080.jpg"); | |
| background-size: 200%; | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| background-attachment: local; | |
| }} | |
| [data-testid="stSidebar"] > div:first-child {{ | |
| background-image: url("data:image/png;base64,{img}"); | |
| background-position: center; | |
| background-repeat: no-repeat; | |
| background-attachment: fixed; | |
| }} | |
| [data-testid="stHeader"] {{ | |
| background: rgba(0,0,0,0); | |
| }} | |
| [data-testid="stToolbar"] {{ | |
| right: 2rem; | |
| }} | |
| </style> | |
| """ | |
| st.markdown(page_bg_img, unsafe_allow_html=True) | |
| def get_base64_encoded_image(image_path): | |
| with open(image_path, "rb") as img_file: | |
| encoded_string = base64.b64encode(img_file.read()).decode("utf-8") | |
| return encoded_string | |
| def main(): | |
| set_png_as_page_bg("net_technology_5407.jpg") | |
| image_path = "artificial-intelligence.jpg" # Replace with the actual image file path | |
| st.title("Sci-mcq-GPT") | |
| link = "https://drive.google.com/file/d/1_2TqNNyoczhxIBmU7BpOzEi2bu3MC-sx/view?usp=sharing" | |
| icon_path = "pdf download logo.png" | |
| encoded_image = get_base64_encoded_image(icon_path) | |
| lnk = f'<a href="{link}"><img src="data:image/png;base64,{encoded_image}" width="50" height="50"></a>' | |
| col = st.sidebar | |
| col.markdown(lnk, unsafe_allow_html=True) | |
| st.subheader("Ask Q&A") | |
| col1, col2 = st.columns(2) | |
| query = col1.text_area("Enter your question") | |
| if col1.button("Get Answer"): | |
| ans = get_ans(query) | |
| print(ans) | |
| col2.text_area("Sci-mcq-GPT Response", ans) | |
| else: | |
| col2.text_area("Sci-mcq-GPT Response", value="") | |
| col_sidebar = st.sidebar | |
| col_sidebar.image(image_path, caption=" ", width=300) | |
| if __name__ == "__main__": | |
| main() |