File size: 5,038 Bytes
4846467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
900df6f
4846467
 
 
 
 
 
 
 
 
 
 
 
900df6f
4846467
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import openai
import gradio as gr
#import nest_asyncio
import time
import asyncio
#nest_asyncio.apply()

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings

Settings.llm = OpenAI(model="gpt-3.5-turbo-0125")
# change to Huggingface embedding model 
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
Settings.node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=128)
Settings.num_output = 512
Settings.context_window = 3900


from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    PromptTemplate,
    load_index_from_storage
)

from llama_index.readers.file import PyMuPDFReader

from theme import CustomTheme  

system_prompt = (
    "You are a helpful assistant in the Bavarian ministry of science and education. "
)

context = (
    "Context information is below. \n"
    "----------------------\n"
    "{context_str}\n"
    "----------------------\n"
    "Given the context information and not prior knowledge, "
    "If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
    "Do not make up your own answers, refer only from the given information. "
    "Your answers use correct grammar and your texting style is casual. "
    "Always be friendly, always reply in German! "
    "Put the page number of the information that you are referring to in brackets after the answer. "
)

prompt = (
    "Context information is below. \n"
    "----------------------\n"
    "{context_str}\n"
    "----------------------\n"
    "Given the context information and not prior knowledge, "
    "If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
    "Do not make up your own answers, refer only from the given information. "
    "Your answers use correct grammar and your texting style is casual. "
    "Always be friendly, always reply in German! "
    "Put the page number of the information that you are referring to in brackets after the answer. "
)

prompt_template = PromptTemplate(prompt)



# check if storage already exists
if not os.path.exists("./storage"):
    # load the documents and create the index
    #documents = SimpleDirectoryReader("data").load_data()
    loader = PyMuPDFReader()
    documents = loader.load(file_path="./data/Rahmenvereinbarung-2023-2027_ohne-Unterschrift.pdf")
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist()
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)

chat_engine = index.as_chat_engine(
    chat_mode= "context", system_prompt=system_prompt, context_template=context)

query_engine = index.as_query_engine(streaming=True)
#query_engine = index.as_query_engine(similarity_top_k=5)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": prompt_template}
)


default_text="Ich beantworte Fragen zur Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG. Wie kann ich helfen?"

bot_examples = [
    "Was sind die 3 zentralen Themen im Text?",
    "Erstelle jeweils eine Zusammenfassung zu den zentralen Themen",
    "Welche Vereinbarungen wurden zwischen den Universitäten und dem Ministerium getroffen?",
    "Wie unterscheiden sich die Vereinbarungen der Universitäten von den Vereinbarungen der Hochschulen für angewandte Wissenschaften?",
    "Welche Maßnahmen sind zum Ausbau der Wissenschaftskommunikation vorgesehen?",
]

submit_button = gr.Button(
    value="Ask me",
    elem_classes=["ask-button"],
)

def response(message, history):
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    
    histories = chat_engine.chat_history
    answer = chat_engine.stream_chat(message, chat_history=histories)

    output_text = ""
    for token in answer.response_gen:
        time.sleep(0.1)

        output_text += token
        yield output_text

    #return str(answer)


def main():
    openai.api_key = os.environ["OPENAI_API_KEY"]
    custom_theme = CustomTheme()

    desc = "[Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG](https://www.stmwk.bayern.de/download/22215_Rahmenvereinbarung_inkl_Unterschriften.pdf%C2%A0)"

    # default_text noch einbauen
    chatbot = gr.Chatbot(
        layout='bubbles',
        #height=600,
        value=[[None, default_text]]
    )

    chat_interface = gr.ChatInterface(
        fn=response,
        retry_btn=None,
        undo_btn=None,
        title="MUC.DAI Chatbot",
        submit_btn=submit_button,
        clear_btn=None,
        theme=custom_theme,
        chatbot=chatbot,
        description=desc,
        css="style.css",
        examples=bot_examples,
    )

    chat_interface.launch(inbrowser=True, debug=True)


if __name__ == "__main__":
    main()