# !pip install langchain # !pip install langchain_community # !pip install langchain_text_splitters # !pip install langchain-google-genai # !pip install gradio # !pip install openai # !pip install pypdf # !pip install chromadb # !pip install tiktoken # !pip install python-dotenv import initialize from langchain_openai import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.chains import VectorDBQA from langchain.llms import OpenAI import gradio as gr import os import requests import sys sys.path.append('../..') # For Google Colab ''' from google.colab import userdata OPENAI_API_KEY = userdata.get('OPENAI_API_KEY') hf_token = userdata.get('hf_token') GEMINI_API_KEY = userdata.get('GEMINI_API_KEY') # For Desktop from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) # Read local .env file OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] hf_token = os.environ['hf_token'] GEMINI_API_KEY = os.environ['GEMINI_API_KEY'] ''' # For Hugging Face OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') hf_token = os.environ.get('hf_token') GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY') fs_token = os.environ.get('fs_token') llm_name = "gpt-3.5-turbo" vectordb = initialize.initialize() #------------------------------------------- from langchain import HuggingFacePipeline, PromptTemplate, LLMChain, RetrievalQA from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch quantization_config = { "load_in_4bit": True, "bnb_4bit_compute_dtype": torch.float16, "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": True, } llm = HuggingFacePipeline(pipeline=pipeline) model_id = "mistralai/Mistral-7B-Instruct-v0.1" model_4bit = AutoModelForCausalLM.from_pretrained( model_id, device="cuda", quantization_config=quantization_config ) tokenizer = AutoTokenizer.from_pretrained(model_id) pipeline = pipeline( "text-generation", model=model_4bit, tokenizer=tokenizer, use_cache=True, device=0, # '0' is for GPU, 'cpu' for CPU max_length=500, do_sample=True, top_k=5, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id, ) template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context Answer the question below from the context below: {context} {question} [/INST] """ def chat_query(retrieverQA, text_query): retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True) result = retrieverQA.run(text_query) return result #------------------------------------------- # def chat_query(question, history): # llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY) # # Conversation Retrival Chain with Memory # memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # retriever=vectordb.as_retriever() # qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory) # # Replace input() with question variable for Gradio # result = qa({"question": question}) # return result['answer'] # # Chatbot only answers based on Documents # # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb) # # result = qa.run(question) # # return result # logo_path = os.path.join(os.getcwd(), "Logo.png") iface = gr.ChatInterface( fn=chat_query, title=" -----: Conversational BOT for Model-TS :----- ", description="""-- Welcome to the Language Model trained on `Model Technical Specifications` of Engineering Dept.\n\n -- The Model tries to answer the Query based on TS, GTR & Specific Requirements. \n\n -- For precise reply, please input `Specific Keywords` in your Query. \n\n """, concurrency_limit = None, examples = ["What should be the GIB height outside the GIS hall ?", "Tell me about coupling device for Outdoor Mounting." ], #theme=gr.themes.Glass(), ) iface.launch(share=True, debug=True) # What should be the GIB height outside the GIS hall ?