File size: 5,038 Bytes
4846467 900df6f 4846467 900df6f 4846467 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | import os
import openai
import gradio as gr
#import nest_asyncio
import time
import asyncio
#nest_asyncio.apply()
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
Settings.llm = OpenAI(model="gpt-3.5-turbo-0125")
# change to Huggingface embedding model
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
Settings.node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=128)
Settings.num_output = 512
Settings.context_window = 3900
from llama_index.core import (
VectorStoreIndex,
StorageContext,
PromptTemplate,
load_index_from_storage
)
from llama_index.readers.file import PyMuPDFReader
from theme import CustomTheme
system_prompt = (
"You are a helpful assistant in the Bavarian ministry of science and education. "
)
context = (
"Context information is below. \n"
"----------------------\n"
"{context_str}\n"
"----------------------\n"
"Given the context information and not prior knowledge, "
"If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
"Do not make up your own answers, refer only from the given information. "
"Your answers use correct grammar and your texting style is casual. "
"Always be friendly, always reply in German! "
"Put the page number of the information that you are referring to in brackets after the answer. "
)
prompt = (
"Context information is below. \n"
"----------------------\n"
"{context_str}\n"
"----------------------\n"
"Given the context information and not prior knowledge, "
"If you don't know the answer, tell the user that you can't answer the question - DO NOT MAKE UP AN ANSWER. "
"Do not make up your own answers, refer only from the given information. "
"Your answers use correct grammar and your texting style is casual. "
"Always be friendly, always reply in German! "
"Put the page number of the information that you are referring to in brackets after the answer. "
)
prompt_template = PromptTemplate(prompt)
# check if storage already exists
if not os.path.exists("./storage"):
# load the documents and create the index
#documents = SimpleDirectoryReader("data").load_data()
loader = PyMuPDFReader()
documents = loader.load(file_path="./data/Rahmenvereinbarung-2023-2027_ohne-Unterschrift.pdf")
index = VectorStoreIndex.from_documents(documents)
# store it for later
index.storage_context.persist()
else:
# load the existing index
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)
chat_engine = index.as_chat_engine(
chat_mode= "context", system_prompt=system_prompt, context_template=context)
query_engine = index.as_query_engine(streaming=True)
#query_engine = index.as_query_engine(similarity_top_k=5)
query_engine.update_prompts(
{"response_synthesizer:text_qa_template": prompt_template}
)
default_text="Ich beantworte Fragen zur Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG. Wie kann ich helfen?"
bot_examples = [
"Was sind die 3 zentralen Themen im Text?",
"Erstelle jeweils eine Zusammenfassung zu den zentralen Themen",
"Welche Vereinbarungen wurden zwischen den Universitäten und dem Ministerium getroffen?",
"Wie unterscheiden sich die Vereinbarungen der Universitäten von den Vereinbarungen der Hochschulen für angewandte Wissenschaften?",
"Welche Maßnahmen sind zum Ausbau der Wissenschaftskommunikation vorgesehen?",
]
submit_button = gr.Button(
value="Ask me",
elem_classes=["ask-button"],
)
def response(message, history):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
histories = chat_engine.chat_history
answer = chat_engine.stream_chat(message, chat_history=histories)
output_text = ""
for token in answer.response_gen:
time.sleep(0.1)
output_text += token
yield output_text
#return str(answer)
def main():
openai.api_key = os.environ["OPENAI_API_KEY"]
custom_theme = CustomTheme()
desc = "[Rahmenvereinbarung Hochschulen 2023 - 2027 gemäß Art. 8 Abs. 1 BayHIG](https://www.stmwk.bayern.de/download/22215_Rahmenvereinbarung_inkl_Unterschriften.pdf%C2%A0)"
# default_text noch einbauen
chatbot = gr.Chatbot(
layout='bubbles',
#height=600,
value=[[None, default_text]]
)
chat_interface = gr.ChatInterface(
fn=response,
retry_btn=None,
undo_btn=None,
title="MUC.DAI Chatbot",
submit_btn=submit_button,
clear_btn=None,
theme=custom_theme,
chatbot=chatbot,
description=desc,
css="style.css",
examples=bot_examples,
)
chat_interface.launch(inbrowser=True, debug=True)
if __name__ == "__main__":
main()
|