foozy commited on
Commit
638c7af
·
verified ·
1 Parent(s): 06be05f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ebooklib import epub
3
+ from bs4 import BeautifulSoup
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.llms import OpenAI
9
+
10
+ # === EPUB-Datei verarbeiten ===
11
+ def load_epub(epub_path):
12
+ book = epub.read_epub(epub_path)
13
+ text = []
14
+ for item in book.get_items():
15
+ if item.get_type() == epub.ITEM_DOCUMENT:
16
+ soup = BeautifulSoup(item.get_content(), "html.parser")
17
+ text.append(soup.get_text())
18
+ return "\n".join(text)
19
+
20
+ # === Text aufteilen ===
21
+ def split_text(text):
22
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
23
+ return splitter.split_text(text)
24
+
25
+ # === Vektordatenbank erstellen ===
26
+ def create_vectorstore(texts):
27
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
28
+ return Chroma.from_texts(texts, embeddings)
29
+
30
+ # === LLM (GPT-4 oder Open-Source) ===
31
+ def load_llm():
32
+ return OpenAI(model_name="gpt-4") # Nutzt GPT-4 (ersetze ggf. durch Open-Source)
33
+
34
+ # === Q&A-Kette erstellen ===
35
+ def create_qa_chain(llm, vectorstore):
36
+ return RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=vectorstore.as_retriever())
37
+
38
+ # === Chatbot-Funktion ===
39
+ def chatbot(epub_file, question):
40
+ text = load_epub(epub_file.name)
41
+ texts = split_text(text)
42
+ vectorstore = create_vectorstore(texts)
43
+ llm = load_llm()
44
+ qa_chain = create_qa_chain(llm, vectorstore)
45
+ return qa_chain.run(question)
46
+
47
+ # === Gradio UI ===
48
+ with gr.Blocks() as demo:
49
+ gr.Markdown("## 📖 E-Book Chatbot mit LangChain")
50
+ epub_input = gr.File(label="Lade eine EPUB-Datei hoch")
51
+ question_input = gr.Textbox(label="Stelle eine Frage zu deinem Buch")
52
+ answer_output = gr.Textbox(label="Antwort", interactive=False)
53
+ submit_button = gr.Button("Frage stellen")
54
+
55
+ submit_button.click(chatbot, inputs=[epub_input, question_input], outputs=answer_output)
56
+
57
+ # === App starten ===
58
+ if __name__ == "__main__":
59
+ demo.launch()