File size: 7,425 Bytes
8404039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import os, base64
import requests, json
import gradio as gr


GREEN = '\033[1;32m'
BLUE = '\033[1;34m'
RESET = '\033[0m'
URL = "https://ai1071.4dstaging.com/v1/"

VALID_ANSWER, QUERY_FAIL, INVALID_ANSWER=0 , 1, 2

VICTORIA_HARBOUR, MIC =0, 1 # VICTORIA_HARBOUR=海港飲食集團
CUSTOMER = MIC

MODES = [
    {
        "name": " ",
        "query_mode_indx": 5,
        "retrieval_temperature": 0.2, #EC19Jun2024
        "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
        "sample_questions": [
            "這裡可以book位嗎?", "可以book位嗎?", "Hi", "蟹", "魚", "會員", "訂枱"
            "锡我?", "可唔可以幫我寫一張菜單?",
            "可以加大長腳蟹嗎?","想查詢最新堂食優惠",
            "有什麼優惠", "宴會菜單", "有長腳蟹?", "積分如何運作?", "點加入會員?",
            "套餐可轉其他菜式嗎?", "網購限定優惠可以堂食嗎?", "當日海鮮供應情況?"
        ],
     
    },{
        "name": "MiC Modular Integrated Construction - HK (Beta)",
        "query_mode_indx": 4,
        "retrieval_temperature": 0.2, #EC19Jun2024
        # "path": r"E:\workspace\RAG_data\20??????????????S",
        "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240619_mic_demo",
        "sample_questions": [
            "What is MIC?", "優惠措施", "Please introduce CIC", "Key Technologies of MIC",
            "組裝合成建築法", "物料或產品規格", "MIC safety."
        ],
    }
]


questions=MODES[CUSTOMER]['sample_questions']

def the_answer(response:dict): #extract answer from the response.
    a=response['msg'].split('Answer(GPT4):')[1].split('References:')[0] #get answers 
    a.strip() # remove all linefeeds
    return a

def the_references(response:dict, user_query: str):
    ref_contents=[]
    if response["code"]==VALID_ANSWER: #       
        for ref in response["data"]["source_docs"]:
            content=ref["page_content"]
            # ref_question=content['問題']
            # ref_question_answer=content['回答']
            ref_contents.append(content)      
    ref_contents_filtered = filter_repeated(user_query, ref_contents) #EC04Jun2024
    return ref_contents_filtered #ref_contents

def filter_repeated(user_query, ref_contents: list):
# This function help to filter out the reference question that is 100% same as the user's ASKED question.
    #EC04Jun2024
    ref_contents_filtered = []
    for ref in ref_contents:
        # ref_dict = json.loads(ref)
        try:
            question = next(iter(ref.get('問題').values()))
        except StopIteration as e:
            print(e)
            pass
        except Exception as e:
            print(e)
            ref_contents_filtered.append(ref)
            continue
        print(question)
        print("question == user_query: "+str(question == user_query))
        if not question == user_query:
            ref_contents_filtered.append(ref)
    return ref_contents_filtered

def get_images_from_source(source_docs):
    image_exts = [".jpg", ".jpeg", ".png"]
    source_list = [doc['source'] for doc in source_docs]
    source_img_list = [source for source in source_list if os.path.splitext(source)[1] in image_exts]
    
    buffer_img_str = ""
    for source in source_img_list:
        response = requests.get(URL+f"images?image_id={source}")
        if response.status_code == 200:
            image_data = response.content
            base64_image = base64.b64encode(image_data).decode("utf-8") #image_data.encode("base64").decode("utf-8")
        
            # Create an HTML <img> tag
            # img_name = os.path.basename(source)
            img_html = f'<img src="data:image/png;base64,{base64_image}" alt="img_name">'
            buffer_img_str += "\n"+img_html
            
            # Print or use img_html as needed
            # print(img_html)
        else:
            print("Error fetching image")
    return buffer_img_str

def all_info(response):
    info="\n".join([f"{GREEN}{key}{RESET}: {value}" for key, value in response.items()])
    return info

def request_stream_chat(question:str, history):
    global temp_source_docs
    
    if not question:
        yield "Hello! What would you like to know?"
        return
    
    payload = {
        "prompt": question,
        "retrieval_temperature": 0.2, #MODES[CUSTOMER]['retrieval_temperature'], #EC19Jun2024: from 0.2 -> MODES[CUSTOMER]['retrieval_temperature']
        
        # "query_mode_indx": 5,
        # "path": r"E:\workspace\RAG_data\20240412_superQuery\db\EC_test_all\20240603_haigang_qa",
        
        "query_mode_indx": MODES[CUSTOMER]['query_mode_indx'],
        "path": MODES[CUSTOMER]['path'],
        
        "stream": True,
        "LLM_type": "gpt"
    }
    reply_buffer = ""
    with requests.post(url=URL+"query", json=payload, stream=True) as r_stream:
        for line in r_stream.iter_lines():
            if line:               
                line = json.loads(line)
                if line['finished']: #all the steamed content
                    response = line
                    # print(f"{RESET}-end")
                    # response=filter_repeated(response)
                    
                    msg = response['msg']
                    
                    if payload['query_mode_indx'] == 5:
                        source_docs_content = the_references(response, question)
                        source_docs_content_str = "\n".join([str(content) for content in source_docs_content])
                        response_str = msg+"\n\nSource documents:\n"+source_docs_content_str
                    else:
                        response_str = msg+"\n\n"+response.get('reference') #EC19Jun2024: from [] -> .get()
                    
                    source_docs = response['data']['source_docs']
                    image_str = get_images_from_source(source_docs)
                    response_str += "\n"+image_str
                    yield response_str
                    
                    break
                else:
                    # yield line
                    # print(f"{BLUE}"+line['reply']+f"{RESET}", end="") #steaming chuncks.
                    reply_buffer += line['reply']
                    yield reply_buffer #line['reply']
                    
    # response=the_answer(response)+'\n' + str(the_references(response))
    # return response

def my_generator(x):
    for i in range(x):
        yield i

if __name__ == "__main__":
    # responses=[]; answers=[]; references=[]; all_infos=[]
    # for q in questions:
    #     response=request_stream_chat(q, "dummy history")
        
    #     responses.append(response)  
    #     all_infos.append(all_info(response))
    #     answers.append(the_answer(response))
    #     references.append(the_references(response))
    
    gr.ChatInterface(
        request_stream_chat, #a4o_response
        examples=questions,

        chatbot=gr.Chatbot(height=450), #300),
        textbox=gr.Textbox(placeholder="喺呢度問我問題.", container=False, scale=7),
        title=MODES[CUSTOMER]['name'],
        description="智能査詢",
        theme="soft",
        cache_examples=False, #True,
        retry_btn=None,
        undo_btn="Delete Previous",
        clear_btn="Clear",
        fill_height=True,
    ).launch(share=True) #False) #True)