File size: 7,425 Bytes
8404039 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import os, base64
import requests, json
import gradio as gr
GREEN = '\033[1;32m'
BLUE = '\033[1;34m'
RESET = '\033[0m'
URL = "https://ai1071.4dstaging.com/v1/"
VALID_ANSWER, QUERY_FAIL, INVALID_ANSWER=0 , 1, 2
VICTORIA_HARBOUR, MIC =0, 1 # VICTORIA_HARBOUR=海港飲食集團
CUSTOMER = MIC
MODES = [
{
"name": " ",
"query_mode_indx": 5,
"retrieval_temperature": 0.2, #EC19Jun2024
"path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
"sample_questions": [
"這裡可以book位嗎?", "可以book位嗎?", "Hi", "蟹", "魚", "會員", "訂枱"
"锡我?", "可唔可以幫我寫一張菜單?",
"可以加大長腳蟹嗎?","想查詢最新堂食優惠",
"有什麼優惠", "宴會菜單", "有長腳蟹?", "積分如何運作?", "點加入會員?",
"套餐可轉其他菜式嗎?", "網購限定優惠可以堂食嗎?", "當日海鮮供應情況?"
],
},{
"name": "MiC Modular Integrated Construction - HK (Beta)",
"query_mode_indx": 4,
"retrieval_temperature": 0.2, #EC19Jun2024
# "path": r"E:\workspace\RAG_data\20??????????????S",
"path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240619_mic_demo",
"sample_questions": [
"What is MIC?", "優惠措施", "Please introduce CIC", "Key Technologies of MIC",
"組裝合成建築法", "物料或產品規格", "MIC safety."
],
}
]
questions=MODES[CUSTOMER]['sample_questions']
def the_answer(response:dict): #extract answer from the response.
a=response['msg'].split('Answer(GPT4):')[1].split('References:')[0] #get answers
a.strip() # remove all linefeeds
return a
def the_references(response:dict, user_query: str):
ref_contents=[]
if response["code"]==VALID_ANSWER: #
for ref in response["data"]["source_docs"]:
content=ref["page_content"]
# ref_question=content['問題']
# ref_question_answer=content['回答']
ref_contents.append(content)
ref_contents_filtered = filter_repeated(user_query, ref_contents) #EC04Jun2024
return ref_contents_filtered #ref_contents
def filter_repeated(user_query, ref_contents: list):
# This function help to filter out the reference question that is 100% same as the user's ASKED question.
#EC04Jun2024
ref_contents_filtered = []
for ref in ref_contents:
# ref_dict = json.loads(ref)
try:
question = next(iter(ref.get('問題').values()))
except StopIteration as e:
print(e)
pass
except Exception as e:
print(e)
ref_contents_filtered.append(ref)
continue
print(question)
print("question == user_query: "+str(question == user_query))
if not question == user_query:
ref_contents_filtered.append(ref)
return ref_contents_filtered
def get_images_from_source(source_docs):
image_exts = [".jpg", ".jpeg", ".png"]
source_list = [doc['source'] for doc in source_docs]
source_img_list = [source for source in source_list if os.path.splitext(source)[1] in image_exts]
buffer_img_str = ""
for source in source_img_list:
response = requests.get(URL+f"images?image_id={source}")
if response.status_code == 200:
image_data = response.content
base64_image = base64.b64encode(image_data).decode("utf-8") #image_data.encode("base64").decode("utf-8")
# Create an HTML <img> tag
# img_name = os.path.basename(source)
img_html = f'<img src="data:image/png;base64,{base64_image}" alt="img_name">'
buffer_img_str += "\n"+img_html
# Print or use img_html as needed
# print(img_html)
else:
print("Error fetching image")
return buffer_img_str
def all_info(response):
info="\n".join([f"{GREEN}{key}{RESET}: {value}" for key, value in response.items()])
return info
def request_stream_chat(question:str, history):
global temp_source_docs
if not question:
yield "Hello! What would you like to know?"
return
payload = {
"prompt": question,
"retrieval_temperature": 0.2, #MODES[CUSTOMER]['retrieval_temperature'], #EC19Jun2024: from 0.2 -> MODES[CUSTOMER]['retrieval_temperature']
# "query_mode_indx": 5,
# "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
"query_mode_indx": MODES[CUSTOMER]['query_mode_indx'],
"path": MODES[CUSTOMER]['path'],
"stream": True,
"LLM_type": "gpt"
}
reply_buffer = ""
with requests.post(url=URL+"query", json=payload, stream=True) as r_stream:
for line in r_stream.iter_lines():
if line:
line = json.loads(line)
if line['finished']: #all the steamed content
response = line
# print(f"{RESET}-end")
# response=filter_repeated(response)
msg = response['msg']
if payload['query_mode_indx'] == 5:
source_docs_content = the_references(response, question)
source_docs_content_str = "\n".join([str(content) for content in source_docs_content])
response_str = msg+"\n\nSource documents:\n"+source_docs_content_str
else:
response_str = msg+"\n\n"+response.get('reference') #EC19Jun2024: from [] -> .get()
source_docs = response['data']['source_docs']
image_str = get_images_from_source(source_docs)
response_str += "\n"+image_str
yield response_str
break
else:
# yield line
# print(f"{BLUE}"+line['reply']+f"{RESET}", end="") #steaming chuncks.
reply_buffer += line['reply']
yield reply_buffer #line['reply']
# response=the_answer(response)+'\n' + str(the_references(response))
# return response
def my_generator(x):
for i in range(x):
yield i
if __name__ == "__main__":
# responses=[]; answers=[]; references=[]; all_infos=[]
# for q in questions:
# response=request_stream_chat(q, "dummy history")
# responses.append(response)
# all_infos.append(all_info(response))
# answers.append(the_answer(response))
# references.append(the_references(response))
gr.ChatInterface(
request_stream_chat, #a4o_response
examples=questions,
chatbot=gr.Chatbot(height=450), #300),
textbox=gr.Textbox(placeholder="喺呢度問我問題.", container=False, scale=7),
title=MODES[CUSTOMER]['name'],
description="智能査詢",
theme="soft",
cache_examples=False, #True,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
fill_height=True,
).launch(share=True) #False) #True) |