Upload 3 files
Browse files
app.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import openai
|
| 3 |
+
import gradio as gr
|
| 4 |
+
#import nest_asyncio
|
| 5 |
+
import time
|
| 6 |
+
import asyncio
|
| 7 |
+
#nest_asyncio.apply()
|
| 8 |
+
|
| 9 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 10 |
+
from llama_index.core.node_parser import SentenceSplitter
|
| 11 |
+
from llama_index.llms.openai import OpenAI
|
| 12 |
+
from llama_index.core import Settings
|
| 13 |
+
|
| 14 |
+
Settings.llm = OpenAI(model="gpt-3.5-turbo-0125")
|
| 15 |
+
# change to Huggingface embedding model
|
| 16 |
+
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
|
| 17 |
+
Settings.node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=128)
|
| 18 |
+
Settings.num_output = 512
|
| 19 |
+
Settings.context_window = 3900
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
from llama_index.core import (
|
| 23 |
+
VectorStoreIndex,
|
| 24 |
+
StorageContext,
|
| 25 |
+
PromptTemplate,
|
| 26 |
+
load_index_from_storage
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
from llama_index.readers.file import PyMuPDFReader
|
| 30 |
+
|
| 31 |
+
from theme import CustomTheme
|
| 32 |
+
|
| 33 |
+
system_prompt = (
|
| 34 |
+
"You are a helpful assistant in the Bavarian ministry of science and education. "
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
context = (
|
| 38 |
+
"Context information is below. \n"
|
| 39 |
+
"----------------------\n"
|
| 40 |
+
"{context_str}\n"
|
| 41 |
+
"----------------------\n"
|
| 42 |
+
"Given the context information and not prior knowledge, "
|
| 43 |
+
"If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
|
| 44 |
+
"Do not make up your own answers, refer only from the given information. "
|
| 45 |
+
"Your answers use correct grammar and your texting style is casual. "
|
| 46 |
+
"Always be friendly, always reply in German! "
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
prompt = (
|
| 50 |
+
"Context information is below. \n"
|
| 51 |
+
"----------------------\n"
|
| 52 |
+
"{context_str}\n"
|
| 53 |
+
"----------------------\n"
|
| 54 |
+
"Given the context information and not prior knowledge, "
|
| 55 |
+
"If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
|
| 56 |
+
"Do not make up your own answers, refer only from the given information. "
|
| 57 |
+
"Your answers use correct grammar and your texting style is casual. "
|
| 58 |
+
"Always be friendly, always reply in German! "
|
| 59 |
+
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
prompt_template = PromptTemplate(prompt)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# check if storage already exists
|
| 67 |
+
if not os.path.exists("./storage"):
|
| 68 |
+
# load the documents and create the index
|
| 69 |
+
#documents = SimpleDirectoryReader("data").load_data()
|
| 70 |
+
loader = PyMuPDFReader()
|
| 71 |
+
documents = loader.load(file_path="./data/Rahmenvereinbarung-2023-2027_ohne-Unterschrift.pdf")
|
| 72 |
+
index = VectorStoreIndex.from_documents(documents)
|
| 73 |
+
# store it for later
|
| 74 |
+
index.storage_context.persist()
|
| 75 |
+
else:
|
| 76 |
+
# load the existing index
|
| 77 |
+
storage_context = StorageContext.from_defaults(persist_dir="./storage")
|
| 78 |
+
index = load_index_from_storage(storage_context)
|
| 79 |
+
|
| 80 |
+
chat_engine = index.as_chat_engine(
|
| 81 |
+
chat_mode= "context", system_prompt=system_prompt, context_template=context)
|
| 82 |
+
|
| 83 |
+
query_engine = index.as_query_engine(streaming=True)
|
| 84 |
+
#query_engine = index.as_query_engine(similarity_top_k=5)
|
| 85 |
+
query_engine.update_prompts(
|
| 86 |
+
{"response_synthesizer:text_qa_template": prompt_template}
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
default_text="Ich beantworte Fragen zur Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG. Wie kann ich helfen?"
|
| 91 |
+
|
| 92 |
+
bot_examples = [
|
| 93 |
+
"Was sind die 3 zentralen Themen im Text?",
|
| 94 |
+
"Erstelle jeweils eine Zusammenfassung zu den zentralen Themen",
|
| 95 |
+
"Welche Vereinbarungen wurden zwischen den Universitäten und dem Ministerium getroffen?",
|
| 96 |
+
"Wie unterscheiden sich die Vereinbarungen der Universitäten von den Vereinbarungen der Hochschulen für angewandte Wissenschaften?",
|
| 97 |
+
"Welche Maßnahmen sind zum Ausbau der Wissenschaftskommunikation vorgesehen?",
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
submit_button = gr.Button(
|
| 101 |
+
value="Ask me",
|
| 102 |
+
elem_classes=["ask-button"],
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
def response(message, history):
|
| 106 |
+
loop = asyncio.new_event_loop()
|
| 107 |
+
asyncio.set_event_loop(loop)
|
| 108 |
+
|
| 109 |
+
histories = chat_engine.chat_history
|
| 110 |
+
answer = chat_engine.stream_chat(message, chat_history=histories)
|
| 111 |
+
|
| 112 |
+
output_text = ""
|
| 113 |
+
for token in answer.response_gen:
|
| 114 |
+
time.sleep(0.1)
|
| 115 |
+
|
| 116 |
+
output_text += token
|
| 117 |
+
yield output_text
|
| 118 |
+
|
| 119 |
+
#return str(answer)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def main():
|
| 123 |
+
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 124 |
+
custom_theme = CustomTheme()
|
| 125 |
+
|
| 126 |
+
desc = "[Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG](https://www.stmwk.bayern.de/download/22215_Rahmenvereinbarung_inkl_Unterschriften.pdf%C2%A0)"
|
| 127 |
+
|
| 128 |
+
# default_text noch einbauen
|
| 129 |
+
chatbot = gr.Chatbot(
|
| 130 |
+
layout='bubbles',
|
| 131 |
+
#height=600,
|
| 132 |
+
value=[[None, default_text]]
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
chat_interface = gr.ChatInterface(
|
| 136 |
+
fn=response,
|
| 137 |
+
retry_btn=None,
|
| 138 |
+
undo_btn=None,
|
| 139 |
+
title="MUC.DAI Chatbot",
|
| 140 |
+
submit_btn=submit_button,
|
| 141 |
+
clear_btn=None,
|
| 142 |
+
theme=custom_theme,
|
| 143 |
+
chatbot=chatbot,
|
| 144 |
+
description=desc,
|
| 145 |
+
css="style.css",
|
| 146 |
+
examples=bot_examples,
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
chat_interface.launch(inbrowser=True, debug=True)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
main()
|
style.css
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.gap.panel {
|
| 2 |
+
padding: 0 !important;
|
| 3 |
+
}
|
| 4 |
+
/*
|
| 5 |
+
div {
|
| 6 |
+
background-image: url("https://mediapool.hm.edu/media/mucdai/keyvisuals_1/mucdai_keyvisual_16_portrait_m.jpg") !important;
|
| 7 |
+
}
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
div.gap > .stretch {
|
| 12 |
+
display: none !important;
|
| 13 |
+
}
|
| 14 |
+
div.gap.panel > div.gr-group {
|
| 15 |
+
position: absolute;
|
| 16 |
+
bottom: 0;
|
| 17 |
+
}
|
| 18 |
+
h1 {
|
| 19 |
+
font-size: 24px !important;
|
| 20 |
+
}
|
| 21 |
+
.ask-button {
|
| 22 |
+
background-color: var(--color-accent);
|
| 23 |
+
font-weight: bold;
|
| 24 |
+
letter-spacing: 0.1rem;
|
| 25 |
+
}
|
| 26 |
+
.title {
|
| 27 |
+
color: var(--block-label-text-color) !important;
|
| 28 |
+
}
|
| 29 |
+
.gallery-item {
|
| 30 |
+
color: var(--block-label-text-color) !important;
|
| 31 |
+
}
|
| 32 |
+
div.message-wrap {
|
| 33 |
+
margin-bottom: 32px !important;
|
| 34 |
+
}
|
theme.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gradio.themes.soft import Soft
|
| 2 |
+
from gradio.themes.utils import fonts
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class CustomTheme(Soft):
|
| 6 |
+
|
| 7 |
+
def __init__(self):
|
| 8 |
+
super().__init__(
|
| 9 |
+
font=fonts.GoogleFont("Roboto")
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
white = "#FFFFFF"
|
| 13 |
+
purple = "#2F1009D"
|
| 14 |
+
red = "#FC5555"
|
| 15 |
+
|
| 16 |
+
primary = white
|
| 17 |
+
secondary = "#e6e6e6"
|
| 18 |
+
panel_color = red
|
| 19 |
+
accent = purple
|
| 20 |
+
accent_soft = "#49637a28"
|
| 21 |
+
|
| 22 |
+
primary_dark = "#121212"
|
| 23 |
+
secondary_dark = "#242424"
|
| 24 |
+
panel_color_dark = red
|
| 25 |
+
accent_dark = purple
|
| 26 |
+
accent_soft_dark = "#101727"
|
| 27 |
+
text_color_dark = white
|
| 28 |
+
|
| 29 |
+
super().set(
|
| 30 |
+
# LIGHT MODE
|
| 31 |
+
body_background_fill=primary,
|
| 32 |
+
background_fill_secondary=primary,
|
| 33 |
+
panel_background_fill=panel_color,
|
| 34 |
+
border_color_primary=primary,
|
| 35 |
+
block_background_fill=secondary,
|
| 36 |
+
block_border_color=primary,
|
| 37 |
+
block_label_background_fill=primary,
|
| 38 |
+
input_background_fill="#DADFE6",
|
| 39 |
+
input_border_color=secondary,
|
| 40 |
+
button_secondary_background_fill=accent,
|
| 41 |
+
button_secondary_text_color=white,
|
| 42 |
+
color_accent_soft=accent_soft,
|
| 43 |
+
border_color_accent_subdued=white,
|
| 44 |
+
|
| 45 |
+
# DARK MODE
|
| 46 |
+
body_background_fill_dark=primary_dark,
|
| 47 |
+
background_fill_secondary_dark=secondary_dark,
|
| 48 |
+
panel_background_fill_dark=secondary_dark,
|
| 49 |
+
border_color_primary_dark=primary_dark,
|
| 50 |
+
block_background_fill_dark=secondary_dark,
|
| 51 |
+
block_border_color_dark=secondary_dark,
|
| 52 |
+
block_label_background_fill_dark=primary_dark,
|
| 53 |
+
block_label_text_color_dark=text_color_dark,
|
| 54 |
+
input_background_fill_dark=panel_color_dark,
|
| 55 |
+
input_border_color_dark=secondary_dark,
|
| 56 |
+
button_primary_background_fill_dark=accent_dark,
|
| 57 |
+
button_primary_text_color_dark=primary_dark,
|
| 58 |
+
color_accent_soft_dark=accent_soft_dark,
|
| 59 |
+
border_color_accent_subdued_dark=accent_soft_dark,
|
| 60 |
+
|
| 61 |
+
#block_radius="15px",
|
| 62 |
+
)
|