Spaces:

DebayanDaw
/

Chatbot_demo

Sleeping

App Files Files Community

DebayanDaw commited on Jun 16, 2023

Commit

5692274

1 Parent(s): f9007d0

Upload 6 files

Browse files

Files changed (6) hide show

app.py +333 -25
business_list.pkl +3 -0
business_numpy.npy +3 -0
individual_list.pkl +3 -0
individual_numpy.npy +3 -0
requirements (1).txt +6 -0

app.py CHANGED Viewed

@@ -1,29 +1,337 @@
-import gradio as gr
-import random
-import time
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox()
-    clear = gr.Button("Clear")
-    def user(user_message, history):
-        return gr.update(value="", interactive=False), history + [[user_message, None]]
-    def bot(history):
-        bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
-        history[-1][1] = ""
-        for character in bot_message:
-            history[-1][1] += character
-            time.sleep(0.05)
-            yield history
-    response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, chatbot, chatbot
     )
-    response.then(lambda: gr.update(interactive=True), None, [msg], queue=False)
-demo.queue()
-demo.launch()

+import pickle
+import openai
+import re
+import numpy as np
+import tensorflow_hub as hub
+import openai
+import os
+import gradio as gr
+import pandas as pd
+import io
+import fitz
+from sklearn.neighbors import NearestNeighbors
+import warnings
+warnings.filterwarnings(action='ignore')
+indi_pkl_file_path = "individual_list.pkl"  # Specify the file path from which to read the list
+busi_pkl_file_path = "business_list.pkl"  # Specify the file path from which to read the list
+indi_loaded_array_file_path= "individual_numpy.npy"
+busi_loaded_array_file_path= "business_numpy.npy"
+with open(indi_pkl_file_path, "rb") as f:
+    indi_texts = pickle.load(f)
+with open(busi_pkl_file_path, "rb") as f:
+    busi_texts = pickle.load(f)
+#df = pd.read_excel('ABS_Statewise_Tax_Revenue_data.xlsx')
+#ndf= df.copy()
+global indi_loaded_array
+global busi_loaded_array
+indi_loaded_array = np.load(indi_loaded_array_file_path)
+busi_loaded_array = np.load(busi_loaded_array_file_path)
+openAI_key = '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'
+# key1: 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
+start_page=1
+def text_to_chunks(texts, word_length=150, start_page=1):
+    text_toks = [t.split(' ') for t in texts] # length of text_toks is equal to number of pages in PDF
+    #print("text_toks: ",text_toks)
+    page_nums = []
+    chunks = []
+    for idx, words in enumerate(text_toks):
+        #print(f'idx: {idx}\nwords: {words}')
+        for i in range(0, len(words), word_length):
+            #print("i: ",i)
+            chunk = words[i:i+word_length]
+            #print("chunk: ",chunk)
+            if (i+word_length) > len(words) and (len(chunk) < word_length) and (
+                len(text_toks) != (idx+1)):
+                text_toks[idx+1] = chunk + text_toks[idx+1]
+                continue
+            chunk = ' '.join(chunk).strip()
+            chunk = f'[{idx+start_page}]' + ' ' + '"' + chunk + '"'
+            #print(chunk)
+            chunks.append(chunk)
+    return chunks
+class SemanticSearch:
+    def __init__(self,data,loaded_array, batch=1000, n_neighbors=5) :
+        self.use = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')
+        self.fitted = False
+        self.data = data
+        self.embeddings = loaded_array
+        n_neighbors = min(n_neighbors, len(self.embeddings))
+        self.nn = NearestNeighbors(n_neighbors=n_neighbors)
+        self.nn.fit(self.embeddings)
+        self.fitted = True
+    def __call__(self, text, return_data=True):
+        inp_emb = self.use([text])
+        neighbors = self.nn.kneighbors(inp_emb, return_distance=False)[0]
+        if return_data:
+            return [self.data[i] for i in neighbors]
+        else:
+            return neighbors
+def generate_text(openAI_key,prompt, engine="text-davinci-003"):
+    openai.api_key = openAI_key
+    completions = openai.Completion.create(
+        engine=engine,
+        prompt=prompt,
+        max_tokens=512,
+        n=1,
+        stop=None,
+        temperature=0,
     )
+    message = completions.choices[0].text
+    return message
+def generate_answer(question,openAI_key):
+    topn_chunks = recommender(question)
+    prompt = ""
+    prompt += 'search results:\n\n'
+    for c in topn_chunks:
+        prompt += c + '\n\n'
+    prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
+              "Consider the provided information from Australian Taxation Office (ATO) and answer only when context is clear to you. For example : Tax, Australia etc."\
+              "Keep The tone conversational "\
+              "Provide example from the text to make the answer understandable."\
+              "If there are multiple points, use bullets to write those points."\
+              "If there is any related hyperlink, include that in the answer."\
+              "If the search results mention multiple subjects with the same name, create separate answers for each. "\
+              "Only include information found in the results and don't add any additional information."\
+              "Make sure the answer is correct and don't output false content. "\
+              "If the text does not relate to the query, simply state 'Are you sure this is a taxation related query? If not kindly ask taxation related queries'." \
+              "Ignore outlier search results which has nothing to do with the question. Only answer what is asked."\
+              "The answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer:"\
+    prompt += f"Query: {question}\nAnswer:"
+    answer = generate_text(openAI_key, prompt,"text-davinci-003")
+    return answer
+def classify_chatgpt(question, openAI_key, engine='gpt-3.5-turbo-0301'):
+      openai.api_key = openAI_key
+      message = [{'role': 'system', 'content': '''We have a user query related to taxes from Australia and your task is to classify the query into one of the category from the following given two categories only:
+      1. Business Taxation Query
+      2. Individual Taxation Query
+      Context: Individual Taxation Query are those queries which are related to the individual taxation of the residents of a country (here from Australia).\n
+              Business Taxation Query are those queries which are related from the Business perspective.
+      Instructions:
+      1. Understand the query very well.\n
+      2. Return the result only as a string. \n
+      3. Do not put any punctuation mark after classifying it. \n
+      4. Do not write any extra information and just do the Classification from the mentioned two categories.'''},
+                                {'role': 'user', 'content': question}]
+      completions = openai.ChatCompletion.create(
+          model=engine,
+          messages=message,
+          max_tokens=512,
+          n=1,
+          stop=None,
+          temperature=0.7,
+      )
+      message = completions.choices[0].message['content']
+      #print(message)
+      return message
+def question_answer(question,openAI_key):
+    return generate_answer(question,openAI_key)
+indi_chunks = text_to_chunks(indi_texts, start_page=start_page)
+busi_chunks = text_to_chunks(busi_texts, start_page=start_page)
+#def struct_answer(openAI_key, engine, message, dataframe, user_question):
+#
+#  def generate_text2(openAI_key, engine, message):
+#      openai.api_key = openAI_key
+#
+#      completions = openai.ChatCompletion.create(
+#          model=engine,
+#          messages=message,
+#          max_tokens=512,
+#          n=1,
+#          stop=None,
+#          temperature=0.7,
+#      )
+#      message = completions.choices[0].message['content']
+#      return message
+#
+#  def filter_dataframe(openAI_key,engine, message,df=dataframe):
+#      out_state_year = generate_text2(openAI_key, engine, message)
+#      out_state_year = eval(out_state_year)
+#      state= [out_state_year[1]]
+#      year = [out_state_year[0]]
+#      tax_type = [out_state_year[2]]
+#      print(out_state_year)
+#      filter_df = df[(df['Year'].isin(year)) & (df['State'].isin(state)) & (df['SubSubCategory'].isin(tax_type))].reset_index().drop('index',axis=1)
+#      feeded_df = filter_df.loc[:,['Country','State','Govt_Type','Year','SubSubCategory',"Tax Amount",'Unit']]
+#      feeded_df = feeded_df.rename({'Govt_Type':'Government Type','SubSubCategory':'Type of Tax','Tax Amount':'Tax Amount Collected'},axis=1)
+#
+#      return feeded_df
+#
+#  feeded_df = filter_dataframe(openAI_key, engine, message)
+#  print(feeded_df.shape)
+#
+#  def dataframe_to_text(new_dataframe= feeded_df):
+#        new_dataframe.to_csv(r'nmdata.txt', index=None, sep='|', mode='a')
+#        text_file = open('nmdata.txt', 'r')
+#        lines = text_file.readlines()
+#        os.remove("nmdata.txt")
+#        return lines
+#
+#  def prompt_generation(x=dataframe_to_text(), y= user_question):
+#          prompt = ""
+#          for line in x:
+#            prompt+=line +'\n'
+#          prompt += '''Take above datapoint(s) as reference and answer the following question: {0}.\n Instructions while answering the question: 1. Do not generate any additional information on your own \n
+#          2. Do not do generate any number on your own. Just the number mentioned in the datapoint(s) \n
+#         3. Take your time to understand the question and than only answer the question \n
+#          4. Write answer in the form of sentence. '''.format(y)
+#          return prompt
+#
+#  def generate_text(openAI_key,prompt, engine="text-davinci-003"):
+#      openai.api_key = openAI_key
+#      completions = openai.Completion.create(
+#          engine=engine,
+#          prompt=prompt,
+#          max_tokens=512,
+#          n=1,
+#          stop=None,
+#          temperature=0
+#      )
+#      message = completions.choices[0].text
+#      return message
+#
+#  prompt_ = prompt_generation()
+#  response_ = generate_text(openAI_key, prompt_)
+#  return response_
+def output_generation(typed_ques):
+    type_of_query= classify_chatgpt(typed_ques, openAI_key)
+    global recommender
+    #print(type_of_query)
+    if type_of_query in ['Individual Taxation Query','Individual Taxation Query.']:
+        recommender = SemanticSearch(data=indi_chunks,loaded_array = indi_loaded_array)
+        out_pu = question_answer(question= typed_ques, openAI_key= openAI_key)
+        #print(out_pu)
+        return out_pu, type_of_query
+    elif type_of_query in ['Business Taxation Query','Business Taxation Query.']:
+        recommender = SemanticSearch(data=busi_chunks, loaded_array=busi_loaded_array)
+        out_pu = question_answer(question= typed_ques, openAI_key= openAI_key)
+        #print(out_pu)
+        return out_pu, type_of_query
+    #elif type_of_query in ['Data Related Query', 'Data Related Query.']:
+    #
+    #    engine="gpt-3.5-turbo-0301"
+    #    message = [{'role': 'system', 'content': '''From the provided sentence extract out the year mentioned in the sentence, State and type of Tax for which user is asking the question about.
+    #
+    #                    Instructions:
+    #                    1. Provide your output just a list in which first element is Year and second element is the State for example: [2018, "Queensland", "Land Tax"]
+    #                    2. If multiple states and year are mentioned than provide both as the result. for example: [[2018,2020], ["New South Wales","Queensland"], ["Land Tax","Gift Tax"]]
+    #                    3. Extract the exact name of the tax from the mentioned list of type of taxes below. For example, if in query motor tax is present but in the given below categories there is no category as 'motor tax' than classify into its most nearest category which has information about total tax present.
+    #                    4. Do not explain your answer. Just write the answer as mentioned in the first instruction.
+    #
+    #                    There are total 7 states in country Australia and they are as follows:
+    #                    1.  Northern Territory
+    #                    2.  Tasmania
+    #                    3.  South Australia
+    #                    4.  Western Australia
+    #                    5.  Queensland
+    #                    6.  Victoria
+    #                    7.  New South Wales
+    #
+    #                    Classify the State from above 7 names but in case, if in the sentence none of the states are mentioned than classify from the below special cases:
+    #                    1.  Give "Australia" as the result if nothing is mentioned at all.
+    #                    2.  Give "All States" as the result if the sentence is talking about all the states of Australia or all the state governments of Australia.
+    #                    3.  Give "Australian Capital Territory" as the result if the sentence is talking about the Australian Capitals territory.
+    #
+    #
+    #                    Also from the sentence classify the type of tax for which user is talking about from the below list of Tax Type:
+    #                    1. Taxes on employers payroll and labour force, 2. Land taxes, 3. Municipal rates, 4. Other taxes on property, 5. Total taxes on immovable property, 6. Estate, inheritance and gift taxes,
+    #                    7. Total taxes on property, 8. Excises and levies,  9. Taxes on government lotteries, 10. Taxes on private lotteries, 11. Taxes on gambling machines, 12. Casino taxes,
+    #                    13. Race and other sports betting taxes,  14. Other taxes on gambling,  15. Total taxes on gambling,  16. Insurance companies contributions to fire brigades,
+    #                    17. Third party insurance taxes,  18. Other taxes on insurance, 19. Total taxes on insurance, 20. Government borrowing guarantee levies,  21. Stamp duties on conveyances,
+    #                    22. Other taxes on financial and capital transactions, 23. Total taxes on financial and capital transactions, 24. Total taxes on the provision of goods and services,
+    #                    25. Stamp duty on vehicle registration, 26. Other motor vehicle taxes,  27. Total motor vehicle taxes, 28. Franchise taxes, 29. Other taxes on use of goods and performance of activities,
+    #                    30. Total taxes on use of goods and performance of activities, 31. Total Taxation Northern Territory State and Local Government, 32. Taxes received from public corporations,
+    #                    33. Taxes received from other levels of government, 34. Total Taxation Tasmania State and Local Government, 35. Total Taxation South Australia State and Local Government,
+    #                    36. Total Taxation Western Australia State and Local Government, 37. Total Taxation Queensland State and Local Government, 38. Total Taxation Victoria State and Local Government,
+    #                    39. Total Taxation New South Wales State and Local Government, 40. Total Taxation Australian Capital Territory State Government, 41. Personal income tax, 42. Government health insurance levy,
+    #                    43. Fringe benefits tax, 44. Other income tax levied on individuals, 45. Total income taxes levied on individuals, 46. Company income tax,  47. Income tax paid by superannuation funds,
+    #                    48. Other income tax levied on enterprises, 49. Total income taxes levied on enterprises, 50. Dividend withholding tax, 51. Interest withholding tax, 52. Other income taxes levied on non-residents,
+    #                    53. Total income taxes levied on non-residents, 54. Total taxes on income, 55. General taxes (sales taxes), 56. Goods and services tax (GST),
+    #                    57. Crude oil and LPG, 58. Other excises, 59. Agricultural production taxes, 60. Levies on statutory corporations, 61. Total excises and levies, 62. Taxes on international trade,
+    #                    63. Taxes on financial and capital transactions, 64. Taxes on the use of goods and performance of activities, 65. Total Taxation on Commonwealth Government'''},
+    #                                    {'role': 'user', 'content': typed_ques}]
+    #    output_ = struct_answer(openAI_key, engine, message, ndf, typed_ques).lstrip()
+    #    #print(output_)
+    #    return output_, type_of_query
+title = 'CDI Citizen Intelligence 360 Tool: Tax Advisory'
+#description = """ Citizen Intelligence 360 sources data from various government agencies and makes it accessible to citizens in a user-friendly format. This can help citizens better understand how their local government works and stay informed about important initiatives and changes in their communities. Citizen Intelligence 360 is a citizen intelligence tool that uses mapping technology to provide citizens with access to government data, performance report and other civic information. It helps to increase transparency and accountability in local government, and empowers citizens to make informed decisions and participate in the democratic process."""
+description = """Welcome to Citizen Intelligence 360 Tool, your intelligent tax companion. Harnessing the power of advanced GEN AI, the tax advisory tool is here to revolutionize your tax experience. With its unrivaled expertise and personalized guidance, the tool simplifies complex tax matters, providing accurate answers to your questions instantly. Say goodbye to confusing endless research and experience the future of tax advisory with Citizen Intelligence 360 Tool - the intelligent solution for your tax needs."""
+img_ = '''<html><head><style>
+    .image {
+      width: 500px;
+      height: 150px;
+      position: static;
+    }</style></head><body><img src= https://blog.ipleaders.in/wp-content/uploads/2020/08/HDFC_Life_Filed_Your_Returns_Here%D0%A2s_How_You_Can_Check_to_Be_Sure_Sept19-1.jpg  class=image></body></html>'''
+def gradio_chatbox(input,history):
+    history = history or []
+    typed_ques = input
+    answe, query_type = output_generation(typed_ques)
+    ccc = "{0}".format(answe)
+    output = ccc
+    history.append((input,output))
+    #print(history)
+    return history, history
+block = gr.Blocks()
+with block:
+    gr.Markdown(f'<center><h1>{title}</h1></center>')
+    gr.Markdown(f'<center><h1>{img_}</h1></center>')
+    gr.Markdown(description)
+    chatbot = gr.Chatbot(label="Tax GenAI")
+    message = gr.Textbox(label='Please ask your question',placeholder = "Welcome! This is Tax GenAI.\nHow can I assist you today?")
+    state = gr.State()
+    submit = gr.Button("SEND")
+    #submit1 = gr.Button("SEND")
+    #submit2 = gr.Button("SEND")
+    submit.style(full_width=None, size='lg' )
+    #submit1.style(full_width=None, size='lg')
+    #submit2.style(full_width=None, size='lg')
+    submit.click(gradio_chatbox, inputs=[message, state], outputs=[chatbot, state])
+block.launch()

business_list.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bf7ebb97a4a7e1b3cfec4110a1f379ad03209f66012e7a43b30437209817fe5
+size 3905795

business_numpy.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd12b4bbb6d106c4906ff2ac11fbc7a969ab2684f32b8d338d5b45dcb9ad1ca
+size 8523904

individual_list.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bacfc786a5aa9560c7b0138a9f52706982ac798860eb4a44fffa44a3492eb864
+size 2236261

individual_numpy.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:610bd857cccf1beeaa962415ba0e809622232901cbfd27f32fb3561aa347cbff
+size 5027968

requirements (1).txt ADDED Viewed

	@@ -0,0 +1,6 @@

+PyMuPDF
+openai
+frontend
+tensorflow==2.9.2
+tensorflow-hub==0.12.0
+scikit-learn==1.0.2