import torch import transformers import pandas as pd import numpy as np import gradio as gr from torch import cuda, bfloat16 from langchain.llms import HuggingFacePipeline from langchain.document_loaders.csv_loader import CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import ConversationalRetrievalChain model_id = 'meta-llama/Llama-2-7b-chat-hf' device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' bnb_config = transformers.BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=bfloat16 ) hf_auth = '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' model_config = transformers.AutoConfig.from_pretrained( model_id, use_auth_token=hf_auth ) model = transformers.AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, config=model_config, quantization_config=bnb_config, device_map='auto', use_auth_token=hf_auth ) model.eval() tokenizer = transformers.AutoTokenizer.from_pretrained( model_id, use_auth_token=hf_auth ) stop_list = ['\nHuman:', '\n```\n'] stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list] stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids] class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_ids in stop_token_ids: if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all(): return True return False stopping_criteria = StoppingCriteriaList([StopOnTokens()]) generate_text = transformers.pipeline( model=model, tokenizer=tokenizer, return_full_text=True, task='text-generation', stopping_criteria=stopping_criteria, temperature=0.1, max_new_tokens=512, repetition_penalty=1.1 ) llm = HuggingFacePipeline(pipeline=generate_text) data = pd.read_json('interviewQna.json') data.to_csv('interviewQna.csv', index=False) df = pd.read_csv('interviewQna.csv') df.to_csv("output.csv", index=False) loader = CSVLoader(file_path='interviewQna.csv') document = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) all_splits = text_splitter.split_documents(document) model_name = "sentence-transformers/all-mpnet-base-v2" model_kwargs = {"device": "cuda"} embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs) vectorstore = FAISS.from_documents(all_splits, embeddings) chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True) def interview_evaluator(question): chat_history = [] result = chain({"question": question, "chat_history": chat_history}) return result['answer'] iface = gr.Interface( fn=interview_evaluator, inputs=gr.Textbox(lines=2,label="Question", placeholder="Enter Question Here:"), outputs=gr.Textbox(label="Answer"), title= "CyberSage" ) if __name__ == "__main__": iface.launch()