File size: 4,314 Bytes
8055a93
 
 
 
 
 
 
 
 
 
 
321fadd
8055a93
 
 
5052352
 
 
8055a93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b73eee
321fadd
42bb600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8055a93
42bb600
8055a93
42bb600
 
 
 
8055a93
42bb600
 
 
5052352
42bb600
 
 
 
5052352
 
8055a93
 
 
 
c46aced
8055a93
ab3aea7
6cb469b
 
8055a93
 
630fab2
 
8055a93
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# !pip install langchain
# !pip install langchain_community
# !pip install langchain_text_splitters
# !pip install langchain-google-genai
# !pip install gradio
# !pip install openai
# !pip install pypdf
# !pip install chromadb
# !pip install tiktoken
# !pip install python-dotenv

import initialize
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import VectorDBQA
from langchain.llms import OpenAI


import gradio as gr
import os
import requests

import sys
sys.path.append('../..')

# For Google Colab
'''
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
hf_token = userdata.get('hf_token')
GEMINI_API_KEY = userdata.get('GEMINI_API_KEY')
# For Desktop
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # Read local .env file
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
hf_token = os.environ['hf_token']
GEMINI_API_KEY = os.environ['GEMINI_API_KEY']
'''

# For Hugging Face
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
hf_token = os.environ.get('hf_token')
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
fs_token = os.environ.get('fs_token')

llm_name = "gpt-3.5-turbo"

vectordb = initialize.initialize()


#-------------------------------------------



from langchain import HuggingFacePipeline, PromptTemplate, LLMChain, RetrievalQA
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

quantization_config = {
    "load_in_4bit": True,
    "bnb_4bit_compute_dtype": torch.float16,
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": True,
}

llm = HuggingFacePipeline(pipeline=pipeline)
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
model_4bit = AutoModelForCausalLM.from_pretrained(
    model_id, device="cuda", quantization_config=quantization_config
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

pipeline = pipeline(
    "text-generation",
    model=model_4bit,
    tokenizer=tokenizer,
    use_cache=True,
    device=0,  # '0' is for GPU, 'cpu' for CPU
    max_length=500,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

template = """[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
Answer the question below from the context below:
{context}
{question} [/INST]
"""




def chat_query(retrieverQA, text_query):
    
    retrieverQA = RetrievalQA.from_chain_type(llm=llm, chain_type="retrieval", retriever=vectordb.as_retriever(), verbose=True)
    result = retrieverQA.run(text_query)
    return result



    
#-------------------------------------------

    
# def chat_query(question, history):
    
#     llm = ChatOpenAI(model=llm_name, temperature=0.1, api_key = OPENAI_API_KEY)

#     # Conversation Retrival Chain with Memory
#     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
#     retriever=vectordb.as_retriever()
#     qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)

#     # Replace input() with question variable for Gradio
#     result = qa({"question": question})
#     return result['answer']

#     # Chatbot only answers based on Documents
#     # qa = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key = OPENAI_API_KEY, ), chain_type="stuff", vectorstore=vectordb)
#     # result = qa.run(question)
#     # return result

    


# logo_path = os.path.join(os.getcwd(), "Logo.png")

iface = gr.ChatInterface(
    fn=chat_query,
    title=" -----:  Conversational BOT for Model-TS  :----- ",
    description="""-- Welcome to the Language Model trained on `Model Technical Specifications` of Engineering Dept.\n\n
                   -- The Model tries to answer the Query based on TS, GTR & Specific Requirements. \n\n
                   -- For precise reply, please input `Specific Keywords` in your Query. \n\n """,
    concurrency_limit = None,
    examples = ["What should be the GIB height outside the GIS hall ?", "Tell me about coupling device for Outdoor Mounting." ],
    #theme=gr.themes.Glass(),
)

iface.launch(share=True, debug=True)

# What should be the GIB height outside the GIS hall ?