import gradio as gr from ebooklib import epub from bs4 import BeautifulSoup from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.llms import OpenAI # === EPUB-Datei verarbeiten === def load_epub(epub_path): book = epub.read_epub(epub_path) text = [] for item in book.get_items(): if item.get_type() == epub.ITEM_DOCUMENT: soup = BeautifulSoup(item.get_content(), "html.parser") text.append(soup.get_text()) return "\n".join(text) # === Text aufteilen === def split_text(text): splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) return splitter.split_text(text) # === Vektordatenbank erstellen === def create_vectorstore(texts): embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en") return Chroma.from_texts(texts, embeddings) # === LLM (GPT-4 oder Open-Source) === def load_llm(): return OpenAI(model_name="gpt-4") # Nutzt GPT-4 (ersetze ggf. durch Open-Source) # === Q&A-Kette erstellen === def create_qa_chain(llm, vectorstore): return RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=vectorstore.as_retriever()) # === Chatbot-Funktion === def chatbot(epub_file, question): text = load_epub(epub_file.name) texts = split_text(text) vectorstore = create_vectorstore(texts) llm = load_llm() qa_chain = create_qa_chain(llm, vectorstore) return qa_chain.run(question) # === Gradio UI === with gr.Blocks() as demo: gr.Markdown("## 📖 E-Book Chatbot mit LangChain") epub_input = gr.File(label="Lade eine EPUB-Datei hoch") question_input = gr.Textbox(label="Stelle eine Frage zu deinem Buch") answer_output = gr.Textbox(label="Antwort", interactive=False) submit_button = gr.Button("Frage stellen") submit_button.click(chatbot, inputs=[epub_input, question_input], outputs=answer_output) # === App starten === if __name__ == "__main__": demo.launch()