Spaces:
Build error
Build error
Commit ·
687bbdc
1
Parent(s): 868a18b
change main.py to FastAPI + Gradio mount point; Test the functions locally and fix some bugs
Browse files- .gitignore +1 -1
- app/apis/generator.py +9 -6
- app/app.py +1 -0
- app/front_end/gradio_ui.py +197 -0
- app/main.py +25 -3
- app/managers/chapter_manager.py +3 -2
- app/managers/summary_manager.py +3 -6
- app/managers/vector_manager.py +21 -15
- app/tests/langchain_test.ipynb +500 -5
- app/tests/summarizer_prompt_engineering.ipynb +0 -0
- app/utils/utils.py +0 -10
.gitignore
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
data/
|
| 6 |
docker/
|
| 7 |
k8s/
|
| 8 |
-
scripts/
|
| 9 |
app/Archived
|
| 10 |
Archived/
|
| 11 |
.env
|
|
|
|
| 5 |
data/
|
| 6 |
docker/
|
| 7 |
k8s/
|
| 8 |
+
# scripts/
|
| 9 |
app/Archived
|
| 10 |
Archived/
|
| 11 |
.env
|
app/apis/generator.py
CHANGED
|
@@ -6,7 +6,7 @@ import subprocess
|
|
| 6 |
import json
|
| 7 |
from ollama import generate
|
| 8 |
import os
|
| 9 |
-
from app.managers.chapter_manager import chapter_chain,
|
| 10 |
from app.managers.summary_manager import summary_chain
|
| 11 |
|
| 12 |
|
|
@@ -32,9 +32,9 @@ class ChapterOutput(BaseModel):
|
|
| 32 |
|
| 33 |
@router.post("/change")
|
| 34 |
def change_chapter(input: ChapterOutput):
|
| 35 |
-
|
| 36 |
summary_chain(input.chapter, chapter_num=input.chapter_num)
|
| 37 |
-
return
|
| 38 |
|
| 39 |
@router.post("/get_all")
|
| 40 |
def get_all_chapters():
|
|
@@ -45,11 +45,14 @@ def get_all_chapters():
|
|
| 45 |
content = f.read()
|
| 46 |
all_chapters.append({"filename": filename, "content": content})
|
| 47 |
all_chapters.sort(key=lambda x: int(x["filename"].split("_")[1].split(".")[0]))
|
| 48 |
-
return all_chapters
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
@router.post("/get_one")
|
| 51 |
-
def get_one_chapter(
|
| 52 |
-
filename = f"chapter_{chapter_num:03}.txt"
|
| 53 |
filepath = os.path.join("data/samples/raws", filename)
|
| 54 |
if os.path.exists(filepath):
|
| 55 |
with open(filepath, "r", encoding="utf-8") as f:
|
|
|
|
| 6 |
import json
|
| 7 |
from ollama import generate
|
| 8 |
import os
|
| 9 |
+
from app.managers.chapter_manager import chapter_chain, update_chapter
|
| 10 |
from app.managers.summary_manager import summary_chain
|
| 11 |
|
| 12 |
|
|
|
|
| 32 |
|
| 33 |
@router.post("/change")
|
| 34 |
def change_chapter(input: ChapterOutput):
|
| 35 |
+
update_chapter(input.chapter, chapter_num=input.chapter_num)
|
| 36 |
summary_chain(input.chapter, chapter_num=input.chapter_num)
|
| 37 |
+
return {"message": "Success"}
|
| 38 |
|
| 39 |
@router.post("/get_all")
|
| 40 |
def get_all_chapters():
|
|
|
|
| 45 |
content = f.read()
|
| 46 |
all_chapters.append({"filename": filename, "content": content})
|
| 47 |
all_chapters.sort(key=lambda x: int(x["filename"].split("_")[1].split(".")[0]))
|
| 48 |
+
return {"all_chapters": all_chapters}
|
| 49 |
|
| 50 |
+
class ChapterNumber(BaseModel):
|
| 51 |
+
chapter_num: int
|
| 52 |
+
|
| 53 |
@router.post("/get_one")
|
| 54 |
+
def get_one_chapter(input: ChapterNumber):
|
| 55 |
+
filename = f"chapter_{input.chapter_num:03}.txt"
|
| 56 |
filepath = os.path.join("data/samples/raws", filename)
|
| 57 |
if os.path.exists(filepath):
|
| 58 |
with open(filepath, "r", encoding="utf-8") as f:
|
app/app.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from app.main import app
|
app/front_end/gradio_ui.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
|
| 4 |
+
API_BASE = "http://localhost:8000"
|
| 5 |
+
|
| 6 |
+
def get_texts(lang):
|
| 7 |
+
if lang == "中文":
|
| 8 |
+
return {
|
| 9 |
+
"title": "小说生成系统",
|
| 10 |
+
"generate_tab": "生成章节",
|
| 11 |
+
"change_tab": "修改章节",
|
| 12 |
+
"get_all_tab": "查看所有章节",
|
| 13 |
+
"get_one_tab": "查阅单章",
|
| 14 |
+
"input_text": "请写下你想要写的新章节包含什么样的故事情节?",
|
| 15 |
+
"change_text": "请输入修改后的内容",
|
| 16 |
+
"chapter_num": "章节编号",
|
| 17 |
+
"generate_button": "生成",
|
| 18 |
+
"change_button": "修改",
|
| 19 |
+
"get_all_button": "查看全部",
|
| 20 |
+
"get_one_button": "查阅",
|
| 21 |
+
"output": "输出结果",
|
| 22 |
+
"lang_label": "选择界面语言",
|
| 23 |
+
"view_one_dropdown": "章节编号",
|
| 24 |
+
"view_one_output": "章节内容",
|
| 25 |
+
"view_one_submit": "提交修改",
|
| 26 |
+
"view_one_edit": "编辑",
|
| 27 |
+
"view_one_status": "修改状态",
|
| 28 |
+
}
|
| 29 |
+
else:
|
| 30 |
+
return {
|
| 31 |
+
"title": "AI Novel Generator",
|
| 32 |
+
"generate_tab": "Generate Chapter",
|
| 33 |
+
"change_tab": "Edit Chapter",
|
| 34 |
+
"get_all_tab": "View All",
|
| 35 |
+
"get_one_tab": "View One",
|
| 36 |
+
"input_text": "Please write down what kind of storyline you would like the new chapter you are writing to contain?",
|
| 37 |
+
"change_text": "Enter new content to replace",
|
| 38 |
+
"chapter_num": "Chapter Number",
|
| 39 |
+
"generate_button": "Generate",
|
| 40 |
+
"change_button": "Update",
|
| 41 |
+
"get_all_button": "Get All",
|
| 42 |
+
"get_one_button": "Get One",
|
| 43 |
+
"output": "Output",
|
| 44 |
+
"lang_label": "Select UI language",
|
| 45 |
+
"view_one_dropdown": "Chapter Number",
|
| 46 |
+
"view_one_output": "Chapter Content",
|
| 47 |
+
"view_one_submit": "Submit Change",
|
| 48 |
+
"view_one_edit": "Edit chapter",
|
| 49 |
+
"view_one_status": "Change Status",
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ==== API call functions ====
|
| 55 |
+
|
| 56 |
+
def generate(text):
|
| 57 |
+
response = requests.post(f"{API_BASE}/generate", json={"query": text})
|
| 58 |
+
return response.json().get("chapter", "")
|
| 59 |
+
|
| 60 |
+
def change(chapter_num, new_content):
|
| 61 |
+
response = requests.post(f"{API_BASE}/change", json={
|
| 62 |
+
"chapter": new_content,
|
| 63 |
+
"chapter_num": chapter_num
|
| 64 |
+
})
|
| 65 |
+
return response.json().get("message", "Success")
|
| 66 |
+
|
| 67 |
+
def get_all():
|
| 68 |
+
response = requests.post(f"{API_BASE}/get_all")
|
| 69 |
+
all_chapters = response.json().get("all_chapters", [])
|
| 70 |
+
chapter_count = len(all_chapters)
|
| 71 |
+
options = [f"{i+1}" for i in range(chapter_count)]
|
| 72 |
+
return options, all_chapters
|
| 73 |
+
|
| 74 |
+
def get_one(chapter_num):
|
| 75 |
+
response = requests.post(f"{API_BASE}/get_one", json={"chapter_num": chapter_num})
|
| 76 |
+
return response.json().get("content", response.json().get("error", "Chapter not found."))
|
| 77 |
+
|
| 78 |
+
def get_one_cached(chapter_num, cached_chapters):
|
| 79 |
+
try:
|
| 80 |
+
print("load_from_cache")
|
| 81 |
+
return cached_chapters[int(chapter_num) - 1]["content"]
|
| 82 |
+
except (IndexError, ValueError, TypeError):
|
| 83 |
+
print("load_from_backend")
|
| 84 |
+
return get_one(chapter_num)
|
| 85 |
+
|
| 86 |
+
def on_generate(prompt):
|
| 87 |
+
chapter = generate(prompt)
|
| 88 |
+
options, chapters = get_all()
|
| 89 |
+
return chapter, gr.update(choices=options, value=options[-1]), chapters
|
| 90 |
+
|
| 91 |
+
def on_load():
|
| 92 |
+
options, chapters = get_all()
|
| 93 |
+
return gr.update(choices=options, value=options[-1]), chapters
|
| 94 |
+
|
| 95 |
+
def on_select_chapter(chapter_num, cached):
|
| 96 |
+
chapter = get_one_cached(chapter_num, cached)
|
| 97 |
+
return chapter, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
| 98 |
+
|
| 99 |
+
def on_edit():
|
| 100 |
+
return gr.update(interactive=True), gr.update(visible=True), gr.update(visible=False)
|
| 101 |
+
|
| 102 |
+
def on_submit_edit(chapter_num, new_content):
|
| 103 |
+
msg = change(chapter_num, new_content)
|
| 104 |
+
return gr.update(interactive=False), gr.update(visible=False), gr.update(visible=True), msg
|
| 105 |
+
|
| 106 |
+
# ==== Gradio UI ====
|
| 107 |
+
def build_ui():
|
| 108 |
+
with gr.Blocks() as demo:
|
| 109 |
+
|
| 110 |
+
def update_labels(lang):
|
| 111 |
+
txt = get_texts(lang)
|
| 112 |
+
return (
|
| 113 |
+
gr.update(value=txt["title"]),
|
| 114 |
+
gr.update(label=txt["input_text"]), gr.update(label=txt["output"]),
|
| 115 |
+
gr.update(value=txt["generate_button"]),
|
| 116 |
+
gr.update(label=txt["lang_label"]),
|
| 117 |
+
gr.update(label=txt["generate_tab"]),
|
| 118 |
+
gr.update(label=txt["view_one_dropdown"]),
|
| 119 |
+
gr.update(label=txt["view_one_output"]),
|
| 120 |
+
gr.update(value=txt["view_one_submit"]),
|
| 121 |
+
gr.update(value=txt["view_one_edit"]),
|
| 122 |
+
gr.update(label=txt["view_one_status"]),
|
| 123 |
+
gr.update(label=txt["get_one_tab"]),
|
| 124 |
+
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
txt = get_texts("English")
|
| 128 |
+
|
| 129 |
+
title = gr.Markdown(f"# {txt['title']}")
|
| 130 |
+
lang_dropdown = gr.Dropdown(choices=["English", "中文"], value="English", label=txt["lang_label"])
|
| 131 |
+
|
| 132 |
+
cached_chapters = gr.State([])
|
| 133 |
+
|
| 134 |
+
with gr.Tab(txt["generate_tab"]) as generate_tab:
|
| 135 |
+
input_box = gr.Textbox(label=txt["input_text"])
|
| 136 |
+
output_box = gr.Textbox(label=txt["output"])
|
| 137 |
+
generate_btn = gr.Button(txt["generate_button"])
|
| 138 |
+
# generate_btn.click(fn=generate, inputs=input_box, outputs=output_box)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# with gr.Tab(txt["change_tab"]) as change_tab:
|
| 142 |
+
# chapter_num = gr.Number(label=txt["chapter_num"])
|
| 143 |
+
# new_content = gr.Textbox(label=txt["change_text"])
|
| 144 |
+
# result = gr.Textbox(label=txt["output"])
|
| 145 |
+
# change_btn = gr.Button(txt["change_button"])
|
| 146 |
+
# change_btn.click(fn=change, inputs=[chapter_num, new_content], outputs=result)
|
| 147 |
+
|
| 148 |
+
# with gr.Tab(txt["get_all_tab"]) as get_all_tab:
|
| 149 |
+
# result_all = gr.Textbox(label=txt["output"], lines=20)
|
| 150 |
+
# all_btn = gr.Button(txt["get_all_button"])
|
| 151 |
+
# all_btn.click(fn=get_all, outputs=result_all)
|
| 152 |
+
|
| 153 |
+
# with gr.Tab(txt["get_one_tab"]) as get_one_tab:
|
| 154 |
+
# one_chapter_num = gr.Number(label=txt["chapter_num"])
|
| 155 |
+
# one_result = gr.Textbox(label=txt["output"], lines=10)
|
| 156 |
+
# one_btn = gr.Button(txt["get_one_button"])
|
| 157 |
+
# one_btn.click(fn=get_one, inputs=one_chapter_num, outputs=one_result)
|
| 158 |
+
|
| 159 |
+
with gr.Tab("View One") as view_one_tab:
|
| 160 |
+
view_one_dropdown = gr.Dropdown(label=txt["view_one_dropdown"], choices=[], visible=True)
|
| 161 |
+
view_one_output = gr.Textbox(label=txt["view_one_output"], lines=10, interactive=False)
|
| 162 |
+
view_one_submit = gr.Button(txt["view_one_submit"], visible=False)
|
| 163 |
+
view_one_edit = gr.Button(txt["view_one_edit"], visible=True)
|
| 164 |
+
view_one_status = gr.Textbox(label=txt["view_one_status"], visible=False)
|
| 165 |
+
|
| 166 |
+
view_one_dropdown.change(on_select_chapter, inputs=[view_one_dropdown, cached_chapters], outputs=[view_one_output, view_one_submit, view_one_edit, view_one_status])
|
| 167 |
+
view_one_edit.click(on_edit, outputs=[view_one_output, view_one_submit, view_one_edit])
|
| 168 |
+
view_one_submit.click(on_submit_edit, inputs=[view_one_dropdown, view_one_output], outputs=[view_one_output, view_one_submit, view_one_edit, view_one_status])
|
| 169 |
+
|
| 170 |
+
generate_btn.click(fn=on_generate, inputs=input_box,
|
| 171 |
+
outputs=[output_box, view_one_dropdown, cached_chapters])
|
| 172 |
+
|
| 173 |
+
demo.load(lambda: on_load(), outputs=[view_one_dropdown, cached_chapters])
|
| 174 |
+
|
| 175 |
+
lang_dropdown.change(
|
| 176 |
+
fn=update_labels,
|
| 177 |
+
inputs=lang_dropdown,
|
| 178 |
+
outputs=[
|
| 179 |
+
title,
|
| 180 |
+
input_box, output_box, generate_btn,
|
| 181 |
+
lang_dropdown,
|
| 182 |
+
generate_tab,
|
| 183 |
+
view_one_dropdown,
|
| 184 |
+
view_one_output,
|
| 185 |
+
view_one_submit,
|
| 186 |
+
view_one_edit,
|
| 187 |
+
view_one_status,
|
| 188 |
+
view_one_tab,
|
| 189 |
+
]
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# demo.launch()
|
| 193 |
+
return demo
|
| 194 |
+
|
| 195 |
+
if __name__ == "__main__":
|
| 196 |
+
ui = build_ui()
|
| 197 |
+
ui.launch()
|
app/main.py
CHANGED
|
@@ -1,13 +1,35 @@
|
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
from app.apis import generator, extractor
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
app = FastAPI(title="AI Novelist RAG")
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
app.include_router(generator.router)
|
| 8 |
app.include_router(extractor.router)
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
@app.get("/")
|
| 12 |
-
def
|
| 13 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/main.py
|
| 2 |
from fastapi import FastAPI
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from starlette.responses import RedirectResponse
|
| 5 |
+
|
| 6 |
from app.apis import generator, extractor
|
| 7 |
+
from app.front_end.gradio_ui import build_ui
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import uvicorn
|
| 11 |
|
| 12 |
app = FastAPI(title="AI Novelist RAG")
|
| 13 |
|
| 14 |
+
# 允许 CORS(可选)
|
| 15 |
+
app.add_middleware(
|
| 16 |
+
CORSMiddleware,
|
| 17 |
+
allow_origins=["*"],
|
| 18 |
+
allow_methods=["*"],
|
| 19 |
+
allow_headers=["*"],
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# 后端 API
|
| 23 |
app.include_router(generator.router)
|
| 24 |
app.include_router(extractor.router)
|
| 25 |
|
| 26 |
+
# 前端(Gradio UI)
|
| 27 |
+
demo = build_ui()
|
| 28 |
+
app = gr.mount_gradio_app(app, demo, path="/ui") # 访问 /ui 查看 Gradio 页面
|
| 29 |
|
| 30 |
@app.get("/")
|
| 31 |
+
def root():
|
| 32 |
+
return RedirectResponse(url="/ui") # 访问根页面自动跳转到 Gradio UI
|
| 33 |
+
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
app/managers/chapter_manager.py
CHANGED
|
@@ -52,13 +52,15 @@ def add_chapter(chapter: str, chapter_num: int):
|
|
| 52 |
content=chapter,
|
| 53 |
metadata={"type": "chapter", "chapter": chapter_num}
|
| 54 |
)
|
|
|
|
| 55 |
|
| 56 |
def update_chapter(chapter: str, chapter_num: int):
|
| 57 |
vm.update_document(
|
| 58 |
store_type="chapter",
|
| 59 |
-
|
| 60 |
new_content=chapter
|
| 61 |
)
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
|
|
@@ -68,7 +70,6 @@ def chapter_chain(query):
|
|
| 68 |
prompt = setup_prompt(query, context_info)
|
| 69 |
chapter = generate_chapter(prompt)
|
| 70 |
new_chapter_num = context_info[0] + 1
|
| 71 |
-
save_chapter_to_file(chapter, new_chapter_num)
|
| 72 |
add_chapter(chapter, new_chapter_num)
|
| 73 |
return chapter
|
| 74 |
|
|
|
|
| 52 |
content=chapter,
|
| 53 |
metadata={"type": "chapter", "chapter": chapter_num}
|
| 54 |
)
|
| 55 |
+
save_chapter_to_file(chapter, chapter_num)
|
| 56 |
|
| 57 |
def update_chapter(chapter: str, chapter_num: int):
|
| 58 |
vm.update_document(
|
| 59 |
store_type="chapter",
|
| 60 |
+
chapter_num=chapter_num,
|
| 61 |
new_content=chapter
|
| 62 |
)
|
| 63 |
+
save_chapter_to_file(chapter, chapter_num)
|
| 64 |
|
| 65 |
|
| 66 |
|
|
|
|
| 70 |
prompt = setup_prompt(query, context_info)
|
| 71 |
chapter = generate_chapter(prompt)
|
| 72 |
new_chapter_num = context_info[0] + 1
|
|
|
|
| 73 |
add_chapter(chapter, new_chapter_num)
|
| 74 |
return chapter
|
| 75 |
|
app/managers/summary_manager.py
CHANGED
|
@@ -25,15 +25,17 @@ def add_summary(summary: str, chapter_num: int):
|
|
| 25 |
content=summary,
|
| 26 |
metadata={"type": "summary", "chapter": chapter_num}
|
| 27 |
)
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
def update_summary(summary: str, chapter_num: int):
|
| 31 |
|
| 32 |
vm.update_document(
|
| 33 |
store_type="summary",
|
| 34 |
-
|
| 35 |
new_content=summary
|
| 36 |
)
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
def get_relevant_summaries(query: str, top_k: int = 10):
|
|
@@ -46,16 +48,11 @@ def get_relevant_summaries(query: str, top_k: int = 10):
|
|
| 46 |
|
| 47 |
def summary_chain(text: str, chapter_num: int = None) -> str:
|
| 48 |
new_chapter_num = get_latest_chapter_num() + 1
|
| 49 |
-
print("I'm here 1")
|
| 50 |
if chapter_num is None or chapter_num >= new_chapter_num:
|
| 51 |
chapter_num = new_chapter_num
|
| 52 |
-
print("I'm here 2")
|
| 53 |
summary = generate_summary(text)
|
| 54 |
-
print("I'm here 3")
|
| 55 |
-
save_summary_to_file(summary, chapter_num)
|
| 56 |
add_summary(summary, chapter_num)
|
| 57 |
else:
|
| 58 |
summary = generate_summary(text)
|
| 59 |
-
save_summary_to_file(summary, chapter_num) # cover old file
|
| 60 |
update_summary(summary, chapter_num)
|
| 61 |
return summary
|
|
|
|
| 25 |
content=summary,
|
| 26 |
metadata={"type": "summary", "chapter": chapter_num}
|
| 27 |
)
|
| 28 |
+
save_summary_to_file(summary, chapter_num)
|
| 29 |
|
| 30 |
|
| 31 |
def update_summary(summary: str, chapter_num: int):
|
| 32 |
|
| 33 |
vm.update_document(
|
| 34 |
store_type="summary",
|
| 35 |
+
chapter_num=chapter_num,
|
| 36 |
new_content=summary
|
| 37 |
)
|
| 38 |
+
save_summary_to_file(summary, chapter_num)
|
| 39 |
|
| 40 |
|
| 41 |
def get_relevant_summaries(query: str, top_k: int = 10):
|
|
|
|
| 48 |
|
| 49 |
def summary_chain(text: str, chapter_num: int = None) -> str:
|
| 50 |
new_chapter_num = get_latest_chapter_num() + 1
|
|
|
|
| 51 |
if chapter_num is None or chapter_num >= new_chapter_num:
|
| 52 |
chapter_num = new_chapter_num
|
|
|
|
| 53 |
summary = generate_summary(text)
|
|
|
|
|
|
|
| 54 |
add_summary(summary, chapter_num)
|
| 55 |
else:
|
| 56 |
summary = generate_summary(text)
|
|
|
|
| 57 |
update_summary(summary, chapter_num)
|
| 58 |
return summary
|
app/managers/vector_manager.py
CHANGED
|
@@ -53,26 +53,32 @@ def add_document(store_type: str, content: str, metadata: dict):
|
|
| 53 |
save_vectorstore(vs, store_type)
|
| 54 |
|
| 55 |
|
| 56 |
-
def update_document(store_type: str,
|
|
|
|
|
|
|
| 57 |
vs = load_vectorstore(store_type)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
if doc.metadata.get("chapter") ==
|
| 62 |
]
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
new_vs = create_new_vectorstore(Embedding_model)
|
| 74 |
-
new_vs.add_documents(docs)
|
| 75 |
-
save_vectorstore(new_vs, store_type)
|
| 76 |
|
| 77 |
|
| 78 |
def get_relevant_documents(store_type: str, query: str, top_k: int = 10) -> List[Document]:
|
|
|
|
| 53 |
save_vectorstore(vs, store_type)
|
| 54 |
|
| 55 |
|
| 56 |
+
def update_document(store_type: str, chapter_num: str, new_content: str):
|
| 57 |
+
print(f"Updating chapter {chapter_num} in store: {store_type}")
|
| 58 |
+
|
| 59 |
vs = load_vectorstore(store_type)
|
| 60 |
|
| 61 |
+
target_doc_ids = [
|
| 62 |
+
doc_id for doc_id, doc in vs.docstore._dict.items()
|
| 63 |
+
if doc.metadata.get("chapter") == chapter_num
|
| 64 |
]
|
| 65 |
+
|
| 66 |
+
if not target_doc_ids:
|
| 67 |
+
raise ValueError(f"No document found for chapter: {chapter_num}")
|
| 68 |
|
| 69 |
+
for doc_id in target_doc_ids:
|
| 70 |
+
vs.delete([doc_id])
|
| 71 |
+
|
| 72 |
+
new_doc = Document(
|
| 73 |
+
page_content=new_content,
|
| 74 |
+
metadata={"type": store_type, "chapter": chapter_num}
|
| 75 |
+
)
|
| 76 |
+
vs.add_documents([new_doc])
|
| 77 |
+
|
| 78 |
+
save_vectorstore(vs, store_type)
|
| 79 |
+
|
| 80 |
+
print(f"Chapter {chapter_num} successfully updated.")
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def get_relevant_documents(store_type: str, query: str, top_k: int = 10) -> List[Document]:
|
app/tests/langchain_test.ipynb
CHANGED
|
@@ -2,20 +2,515 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"id": "16ea7d3d",
|
| 7 |
"metadata": {},
|
| 8 |
"outputs": [],
|
| 9 |
"source": [
|
| 10 |
-
"
|
| 11 |
-
"from langchain.
|
| 12 |
-
"from langchain.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
]
|
| 14 |
}
|
| 15 |
],
|
| 16 |
"metadata": {
|
| 17 |
"kernelspec": {
|
| 18 |
-
"display_name": "
|
| 19 |
"language": "python",
|
| 20 |
"name": "python3"
|
| 21 |
},
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
"id": "16ea7d3d",
|
| 7 |
"metadata": {},
|
| 8 |
"outputs": [],
|
| 9 |
"source": [
|
| 10 |
+
"import os\n",
|
| 11 |
+
"from langchain.vectorstores import FAISS\n",
|
| 12 |
+
"from langchain.docstore import InMemoryDocstore\n",
|
| 13 |
+
"from langchain_huggingface import HuggingFaceEmbeddings\n",
|
| 14 |
+
"from langchain.schema import Document\n",
|
| 15 |
+
"import faiss\n",
|
| 16 |
+
"\n"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"execution_count": 2,
|
| 22 |
+
"id": "31e17418",
|
| 23 |
+
"metadata": {},
|
| 24 |
+
"outputs": [
|
| 25 |
+
{
|
| 26 |
+
"data": {
|
| 27 |
+
"text/plain": [
|
| 28 |
+
"True"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
"execution_count": 2,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"output_type": "execute_result"
|
| 34 |
+
}
|
| 35 |
+
],
|
| 36 |
+
"source": [
|
| 37 |
+
"os.path.exists(\"conda_env_test.ipynb\")"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": 3,
|
| 43 |
+
"id": "cb6dccc3",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"\n",
|
| 48 |
+
"os.chdir('..\\..')"
|
| 49 |
+
]
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"cell_type": "code",
|
| 53 |
+
"execution_count": 4,
|
| 54 |
+
"id": "f15a094d",
|
| 55 |
+
"metadata": {},
|
| 56 |
+
"outputs": [
|
| 57 |
+
{
|
| 58 |
+
"name": "stderr",
|
| 59 |
+
"output_type": "stream",
|
| 60 |
+
"text": [
|
| 61 |
+
"c:\\Users\\nanfangwuyu\\.conda\\envs\\novel_rag\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 62 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 63 |
+
"Device set to use cuda:0\n"
|
| 64 |
+
]
|
| 65 |
+
}
|
| 66 |
+
],
|
| 67 |
+
"source": [
|
| 68 |
+
"from app.models.bart_large_cnn import BartSummaryModel\n",
|
| 69 |
+
"Summary_Model = BartSummaryModel()"
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": 5,
|
| 75 |
+
"id": "d5b232b0",
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"outputs": [
|
| 78 |
+
{
|
| 79 |
+
"data": {
|
| 80 |
+
"text/plain": [
|
| 81 |
+
"'Jack is a young mage with a spirit as untamed as the tempest itself. He has a rare affinity for all five elements that set him apart from others. Despite his burgeoning power, he felt the weight of an uncertain destiny resting heavily on his shoulders. Little did he know, the world was on the brink of change.'"
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
"execution_count": 5,
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"output_type": "execute_result"
|
| 87 |
+
}
|
| 88 |
+
],
|
| 89 |
+
"source": [
|
| 90 |
+
"text = \"\"\"\n",
|
| 91 |
+
"**Chapter 1: Embers of Destiny**\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"In the realm of Eldoria, where the five elements danced in a delicate balance, the air crackled with a potential that whispered of ancient magics. Fire, fierce and unyielding, flickered in the heart of the volcanoes; water flowed with grace through the rivers and oceans, nurturing life in its embrace; wind roamed free, a playful spirit that caressed the mountains; electricity surged in the stormy skies, a wild tempest of power; and earth, the steadfast guardian, cradled the very foundations of existence. Here, in this vibrant tapestry woven by elemental forces, the fate of kingdoms hung by a thread.\n",
|
| 94 |
+
"\n",
|
| 95 |
+
"Amidst the verdant valleys of Eldoria stood Jack, a young mage with a spirit as untamed as the tempest itself. He was a figure of quiet strength, with tousled chestnut hair that caught the light of the setting sun and eyes as deep as the ocean’s depths—reflecting a soul that yearned for adventure. Jack had long been aware of his gifts, a rare affinity for all five elements that set him apart from others. Yet, despite his burgeoning power, he felt the weight of an uncertain destiny resting heavily on his shoulders.\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"As he stood atop the cliffs of Varyn, the wind tousling his hair, Jack gazed out at the swirling mists of the horizon. The clouds, tinged with the colors of twilight, whispered promises of journeys yet to be taken. Today, a flicker of intuition ignited within him, a stirring that foretold an awakening—a call to harness the elemental forces that lay dormant, waiting for the right moment to erupt. With a heart full of determination and a mind sharpened by dreams, Jack took a deep breath, ready to carve his path through the ever-shifting tides of fate. Little did he know, the world was on the brink of change, and he was destined to be its catalyst.\n",
|
| 98 |
+
"\"\"\"\n",
|
| 99 |
+
"Summary_Model.summarize(text)"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": 4,
|
| 105 |
+
"id": "864c3438",
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [
|
| 108 |
+
{
|
| 109 |
+
"name": "stderr",
|
| 110 |
+
"output_type": "stream",
|
| 111 |
+
"text": [
|
| 112 |
+
"c:\\Users\\nanfangwuyu\\.conda\\envs\\novel_rag\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
| 113 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
| 114 |
+
"Device set to use cuda:0\n"
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"name": "stdout",
|
| 119 |
+
"output_type": "stream",
|
| 120 |
+
"text": [
|
| 121 |
+
"Reload existing faiss\n"
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"data": {
|
| 126 |
+
"text/plain": [
|
| 127 |
+
"0"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
"execution_count": 4,
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"output_type": "execute_result"
|
| 133 |
+
}
|
| 134 |
+
],
|
| 135 |
+
"source": [
|
| 136 |
+
"\n",
|
| 137 |
+
"from app.managers import vector_manager as vm\n",
|
| 138 |
+
"import os\n",
|
| 139 |
+
"\n",
|
| 140 |
+
"SAVE_DIR = \"data/samples/raws\"\n",
|
| 141 |
+
"os.makedirs(SAVE_DIR, exist_ok=True)\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"def get_latest_chapter_num(store_type=\"summary\"):\n",
|
| 146 |
+
" vectorstore = vm.load_vectorstore(store_type)\n",
|
| 147 |
+
" all_docs = vectorstore.docstore._dict.values()\n",
|
| 148 |
+
" chapter_numbers = [doc.metadata.get(\"chapter\", 0) for doc in all_docs if isinstance(doc.metadata.get(\"chapter\", 0), int)]\n",
|
| 149 |
+
" latest_chapter_num = max(chapter_numbers) if chapter_numbers else 0\n",
|
| 150 |
+
" return latest_chapter_num\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"get_latest_chapter_num()"
|
| 153 |
+
]
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"cell_type": "code",
|
| 157 |
+
"execution_count": 4,
|
| 158 |
+
"id": "2421634b",
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"outputs": [],
|
| 161 |
+
"source": [
|
| 162 |
+
"def create_new_vectorstore(embedding_model):\n",
|
| 163 |
+
" index = faiss.IndexFlatL2(len(embedding_model.embed_query(\"hello world\")))\n",
|
| 164 |
+
" vectorstore = vector_store = FAISS(\n",
|
| 165 |
+
" embedding_function=embedding_model,\n",
|
| 166 |
+
" index=index,\n",
|
| 167 |
+
" docstore=InMemoryDocstore(),\n",
|
| 168 |
+
" index_to_docstore_id={},\n",
|
| 169 |
+
" )\n",
|
| 170 |
+
" return vectorstore"
|
| 171 |
+
]
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"cell_type": "code",
|
| 175 |
+
"execution_count": 5,
|
| 176 |
+
"id": "e42f4e7b",
|
| 177 |
+
"metadata": {},
|
| 178 |
+
"outputs": [
|
| 179 |
+
{
|
| 180 |
+
"name": "stdout",
|
| 181 |
+
"output_type": "stream",
|
| 182 |
+
"text": [
|
| 183 |
+
"True\n"
|
| 184 |
+
]
|
| 185 |
+
}
|
| 186 |
+
],
|
| 187 |
+
"source": [
|
| 188 |
+
"VECTORSTORE_PATH = \"data/vectorstore/summary_index\"\n",
|
| 189 |
+
"embedding_model = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
|
| 190 |
+
"\n",
|
| 191 |
+
"# Step 0: 加载或初始化向量库\n",
|
| 192 |
+
"if os.path.exists(VECTORSTORE_PATH):\n",
|
| 193 |
+
" print(True)\n",
|
| 194 |
+
" vectorstore = FAISS.load_local(VECTORSTORE_PATH, embedding_model, allow_dangerous_deserialization=True)\n",
|
| 195 |
+
"else:\n",
|
| 196 |
+
" vectorstore = create_new_vectorstore(embedding_model)\n",
|
| 197 |
+
" "
|
| 198 |
+
]
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"cell_type": "code",
|
| 202 |
+
"execution_count": 6,
|
| 203 |
+
"id": "3ccd8712",
|
| 204 |
+
"metadata": {},
|
| 205 |
+
"outputs": [
|
| 206 |
+
{
|
| 207 |
+
"data": {
|
| 208 |
+
"text/plain": [
|
| 209 |
+
"{}"
|
| 210 |
+
]
|
| 211 |
+
},
|
| 212 |
+
"execution_count": 6,
|
| 213 |
+
"metadata": {},
|
| 214 |
+
"output_type": "execute_result"
|
| 215 |
+
}
|
| 216 |
+
],
|
| 217 |
+
"source": [
|
| 218 |
+
"vectorstore = create_new_vectorstore(embedding_model)\n",
|
| 219 |
+
"vectorstore.docstore._dict"
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"cell_type": "code",
|
| 224 |
+
"execution_count": 28,
|
| 225 |
+
"id": "5f179882",
|
| 226 |
+
"metadata": {},
|
| 227 |
+
"outputs": [],
|
| 228 |
+
"source": [
|
| 229 |
+
"# Step 1: 获取所有文档 & 最新章节编号\n",
|
| 230 |
+
"all_docs = vectorstore.docstore._dict.values()\n",
|
| 231 |
+
"chapter_numbers = [doc.metadata.get(\"chapter\", 0) for doc in all_docs if isinstance(doc.metadata.get(\"chapter\", 0), int)]\n",
|
| 232 |
+
"latest_chapter = max(chapter_numbers) if chapter_numbers else 0\n"
|
| 233 |
+
]
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"cell_type": "code",
|
| 237 |
+
"execution_count": 29,
|
| 238 |
+
"id": "c219e6ef",
|
| 239 |
+
"metadata": {},
|
| 240 |
+
"outputs": [
|
| 241 |
+
{
|
| 242 |
+
"data": {
|
| 243 |
+
"text/plain": [
|
| 244 |
+
"(dict_values([Document(id='e948cd24-4678-436f-b98f-9514db311f42', metadata={'chapter': 1}, page_content='In the magical realm of Eldoria, where fire, water, wind, electricity, and earth govern existence, Jack, a unique mage cloaked in indigo, possesses the rare ability to master all five elements. Revered and envied by townsfolk, he grapples with the loneliness of his gifts. As he stands at the edge of the Whispering Woods one evening, feeling the elemental energies around him, he senses a change in the air. Unbeknownst to him, the universe is weaving a fate that will challenge the core of his powers and alter his destiny.'), Document(id='27fff7cd-20be-4b44-be42-50af318b6e80', metadata={'chapter': 2}, page_content='In the twilight of Eldoria, Jack stands at the edge of the Whispering Woods, feeling a deep connection to the ancient trees. Drawn by an electric anticipation, he ventures into the forest, where reality blurs and whispers grow louder. Amidst the scents of earth and nightshade, he senses a change marked by a foreign smoke. Following a flickering glow, he discovers a clearing with a stone pedestal radiating energy. Compelled by an inner voice, Jack realizes that his destiny is intertwined with the secrets of this sacred place.')]),\n",
|
| 245 |
+
" 2)"
|
| 246 |
+
]
|
| 247 |
+
},
|
| 248 |
+
"execution_count": 29,
|
| 249 |
+
"metadata": {},
|
| 250 |
+
"output_type": "execute_result"
|
| 251 |
+
}
|
| 252 |
+
],
|
| 253 |
+
"source": [
|
| 254 |
+
"all_docs, latest_chapter"
|
| 255 |
+
]
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"cell_type": "code",
|
| 259 |
+
"execution_count": 30,
|
| 260 |
+
"id": "3319424c",
|
| 261 |
+
"metadata": {},
|
| 262 |
+
"outputs": [],
|
| 263 |
+
"source": [
|
| 264 |
+
"# Step 2: 获取最新章节 summary\n",
|
| 265 |
+
"latest_summary_docs = [doc for doc in all_docs if doc.metadata.get(\"chapter\") == latest_chapter]\n",
|
| 266 |
+
"latest_summary_doc = latest_summary_docs[0] if latest_summary_docs else \"\"\n",
|
| 267 |
+
"\n"
|
| 268 |
+
]
|
| 269 |
+
},
|
| 270 |
+
{
|
| 271 |
+
"cell_type": "code",
|
| 272 |
+
"execution_count": 31,
|
| 273 |
+
"id": "ea1a62ac",
|
| 274 |
+
"metadata": {},
|
| 275 |
+
"outputs": [],
|
| 276 |
+
"source": [
|
| 277 |
+
"# Step 3: 用户主题输入\n",
|
| 278 |
+
"if latest_chapter == 0:\n",
|
| 279 |
+
" user_query = \"Help me write a fantasy novel. Introduces the worldview and the male protagonist. The world has five elements: fire, water, wind, electricity, and earth. The male protagonist names Jack, is a powerful mage.\" \n",
|
| 280 |
+
"else:\n",
|
| 281 |
+
" user_query = \"Continue writing.\"\n",
|
| 282 |
+
"\n",
|
| 283 |
+
"# 检索相关章节概要\n",
|
| 284 |
+
"retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 3})\n",
|
| 285 |
+
"related_docs = retriever.get_relevant_documents(user_query)\n",
|
| 286 |
+
"# related_chapter_numbers = [doc.metadata.get(\"chapter\", 0) for doc in related_docs if isinstance(doc.metadata.get(\"chapter\", 0), int)]\n",
|
| 287 |
+
"if latest_summary_doc in related_docs:\n",
|
| 288 |
+
" related_docs.remove(latest_summary_doc)\n",
|
| 289 |
+
"related_summaries = \"\\n\\n\".join([\"Summary of Chapter {}:\\n\".format(doc.metadata.get(\"chapter\")) + doc.page_content for doc in related_docs])"
|
| 290 |
+
]
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"cell_type": "code",
|
| 294 |
+
"execution_count": 32,
|
| 295 |
+
"id": "b39d9d19",
|
| 296 |
+
"metadata": {},
|
| 297 |
+
"outputs": [
|
| 298 |
+
{
|
| 299 |
+
"data": {
|
| 300 |
+
"text/plain": [
|
| 301 |
+
"([Document(id='e948cd24-4678-436f-b98f-9514db311f42', metadata={'chapter': 1}, page_content='In the magical realm of Eldoria, where fire, water, wind, electricity, and earth govern existence, Jack, a unique mage cloaked in indigo, possesses the rare ability to master all five elements. Revered and envied by townsfolk, he grapples with the loneliness of his gifts. As he stands at the edge of the Whispering Woods one evening, feeling the elemental energies around him, he senses a change in the air. Unbeknownst to him, the universe is weaving a fate that will challenge the core of his powers and alter his destiny.')],\n",
|
| 302 |
+
" 'Summary of Chapter 1:\\nIn the magical realm of Eldoria, where fire, water, wind, electricity, and earth govern existence, Jack, a unique mage cloaked in indigo, possesses the rare ability to master all five elements. Revered and envied by townsfolk, he grapples with the loneliness of his gifts. As he stands at the edge of the Whispering Woods one evening, feeling the elemental energies around him, he senses a change in the air. Unbeknownst to him, the universe is weaving a fate that will challenge the core of his powers and alter his destiny.')"
|
| 303 |
+
]
|
| 304 |
+
},
|
| 305 |
+
"execution_count": 32,
|
| 306 |
+
"metadata": {},
|
| 307 |
+
"output_type": "execute_result"
|
| 308 |
+
}
|
| 309 |
+
],
|
| 310 |
+
"source": [
|
| 311 |
+
"related_docs, related_summaries"
|
| 312 |
+
]
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"cell_type": "code",
|
| 316 |
+
"execution_count": 33,
|
| 317 |
+
"id": "f37f3f8d",
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"outputs": [
|
| 320 |
+
{
|
| 321 |
+
"name": "stdout",
|
| 322 |
+
"output_type": "stream",
|
| 323 |
+
"text": [
|
| 324 |
+
"You are a novelist working on a long novel.\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"Here is a summary of the previously on few related chapters:\n",
|
| 327 |
+
"\n",
|
| 328 |
+
"\"Summary of Chapter 1:\n",
|
| 329 |
+
"In the magical realm of Eldoria, where fire, water, wind, electricity, and earth govern existence, Jack, a unique mage cloaked in indigo, possesses the rare ability to master all five elements. Revered and envied by townsfolk, he grapples with the loneliness of his gifts. As he stands at the edge of the Whispering Woods one evening, feeling the elemental energies around him, he senses a change in the air. Unbeknownst to him, the universe is weaving a fate that will challenge the core of his powers and alter his destiny.\"\n",
|
| 330 |
+
"\n",
|
| 331 |
+
"And here is a summary of the previously on the last chapter (Chapter 2): \n",
|
| 332 |
+
"\n",
|
| 333 |
+
"\"In the twilight of Eldoria, Jack stands at the edge of the Whispering Woods, feeling a deep connection to the ancient trees. Drawn by an electric anticipation, he ventures into the forest, where reality blurs and whispers grow louder. Amidst the scents of earth and nightshade, he senses a change marked by a foreign smoke. Following a flickering glow, he discovers a clearing with a stone pedestal radiating energy. Compelled by an inner voice, Jack realizes that his destiny is intertwined with the secrets of this sacred place.\"\n",
|
| 334 |
+
"\n",
|
| 335 |
+
"Please continue to WRITE the CONTENT of the NEXT CHAPTER - chapter 3, keeping it logically coherent, within about 300 words. \n",
|
| 336 |
+
"\n",
|
| 337 |
+
"Here is what the user expects to see in the next chapter: \"Continue writing.\"\n",
|
| 338 |
+
"\n",
|
| 339 |
+
" \n"
|
| 340 |
+
]
|
| 341 |
+
}
|
| 342 |
+
],
|
| 343 |
+
"source": [
|
| 344 |
+
"# Step 4: 构建 prompt 生成内容\n",
|
| 345 |
+
"if latest_chapter > 1:\n",
|
| 346 |
+
"\n",
|
| 347 |
+
" prompt = f\"\"\"You are a novelist working on a long novel.\n",
|
| 348 |
+
"\n",
|
| 349 |
+
"Here is a summary of the previously on few related chapters:\n",
|
| 350 |
+
"\n",
|
| 351 |
+
"\"{related_summaries}\"\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"And here is a summary of the previously on the last chapter (Chapter {latest_summary_doc.metadata.get(\"chapter\")}): \n",
|
| 354 |
+
"\n",
|
| 355 |
+
"\"{latest_summary_doc.page_content}\"\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"Please continue to WRITE the CONTENT of the NEXT CHAPTER - chapter {latest_chapter + 1}, keeping it logically coherent, within about 300 words. \n",
|
| 358 |
+
"\n",
|
| 359 |
+
"Here is what the user expects to see in the next chapter: \"{user_query}\"\n",
|
| 360 |
+
"\n",
|
| 361 |
+
" \"\"\"\n",
|
| 362 |
+
"elif latest_chapter == 1:\n",
|
| 363 |
+
" prompt = f\"\"\"You are a novelist working on a long novel.\n",
|
| 364 |
+
"\n",
|
| 365 |
+
"Here is a summary of the previously on the first chapter (Chapter {latest_summary_doc.metadata.get(\"chapter\")}): \n",
|
| 366 |
+
"\n",
|
| 367 |
+
"\"{latest_summary_doc.page_content}\"\n",
|
| 368 |
+
"\n",
|
| 369 |
+
"Please continue to WRITE the CONTENT of the NEXT CHAPTER - chapter {latest_chapter + 1}, keeping it logically coherent, within about 300 words. \n",
|
| 370 |
+
"\n",
|
| 371 |
+
"Here is what the user expects to see in the next chapter: \"{user_query}\"\n",
|
| 372 |
+
"\"\"\"\n",
|
| 373 |
+
"else:\n",
|
| 374 |
+
" prompt = f\"\"\"You are a novelist working on a long novel.\n",
|
| 375 |
+
"\n",
|
| 376 |
+
"Please WRITE the content of the chapter 1 within about 300 words. \n",
|
| 377 |
+
"\n",
|
| 378 |
+
"Here is what the user expects to see in the chapter 1: \"{user_query}\"\n",
|
| 379 |
+
"\n",
|
| 380 |
+
" \"\"\"\n",
|
| 381 |
+
"\n",
|
| 382 |
+
"# # 你封装的模型类,含 generate & summarize 方法\n",
|
| 383 |
+
"\n",
|
| 384 |
+
"print(prompt)"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "code",
|
| 389 |
+
"execution_count": null,
|
| 390 |
+
"id": "93d0b566",
|
| 391 |
+
"metadata": {},
|
| 392 |
+
"outputs": [],
|
| 393 |
+
"source": [
|
| 394 |
+
"from app.models.model import LLM\n",
|
| 395 |
+
"generated_chapter = LLM.generate(prompt, max_tokens=512)\n",
|
| 396 |
+
"generated_summary = LLM.summarize(generated_chapter)"
|
| 397 |
+
]
|
| 398 |
+
},
|
| 399 |
+
{
|
| 400 |
+
"cell_type": "code",
|
| 401 |
+
"execution_count": 35,
|
| 402 |
+
"id": "1b51a228",
|
| 403 |
+
"metadata": {},
|
| 404 |
+
"outputs": [
|
| 405 |
+
{
|
| 406 |
+
"name": "stdout",
|
| 407 |
+
"output_type": "stream",
|
| 408 |
+
"text": [
|
| 409 |
+
"Generated Chapter:\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"**Chapter 3: The Awakening of Shadows**\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"Jack stepped into the clearing, the air crackling with an otherworldly energy that hummed beneath his skin. The stone pedestal, a relic of ages past, loomed before him, its surface etched with runes that glowed faintly in the twilight. The whispers of the forest grew louder, transforming into a symphony of voices, each one tugging at the corners of his mind, urging him to unlock the secrets hidden within the stone.\n",
|
| 414 |
+
"\n",
|
| 415 |
+
"As he approached, the flickering glow intensified, casting dancing shadows across the trees. Jack could feel the weight of history pressing upon him, as if the very essence of the forest had gathered to witness this moment. A sense of purpose surged within him, igniting a flicker of hope amidst the loneliness he had often felt. Was this the connection he had long sought—the intertwining of his fate with something greater than himself?\n",
|
| 416 |
+
"\n",
|
| 417 |
+
"Tentatively, he reached out, his fingers brushing against the cool stone. Instantly, a jolt of energy coursed through him, igniting every element he commanded. Fire sparked to life in his fingertips, water swirled around his ankles, earth pulsed beneath his feet, wind whipped through his hair, and electricity crackled in the air. In that moment, Jack felt not like an outsider but rather a conduit of the very forces that shaped Eldoria.\n",
|
| 418 |
+
"\n",
|
| 419 |
+
"The runes flared brighter, revealing a vision—a shadowy figure cloaked in darkness, standing at the precipice of chaos. Jack’s heart raced as he realized that this figure held the key to his destiny. The whispers coalesced into a single voice, resonating deep within him: \"You must choose, Jack. Embrace the light or succumb to the shadows.\"\n",
|
| 420 |
+
"\n",
|
| 421 |
+
"With his heart pounding, Jack stood at the crossroads of fate, poised to confront the challenges that lay ahead. The echoes of the forest faded into silence, leaving him alone with the weight of his choice. \n",
|
| 422 |
+
"\n",
|
| 423 |
+
"Generated Summary:\n",
|
| 424 |
+
"\n",
|
| 425 |
+
"In Chapter 3, \"The Awakening of Shadows,\" Jack enters a mystical clearing in the forest of Eldoria, where he discovers a glowing stone pedestal inscribed with ancient runes. As he approaches, he feels a surge of energy and connection to the elemental forces around him. The whispers of the forest reveal a shadowy figure representing chaos, urging Jack to choose between light and darkness. Empowered and filled with purpose, Jack stands at a pivotal moment in his destiny, ready to confront the challenges that await him.\n"
|
| 426 |
+
]
|
| 427 |
+
}
|
| 428 |
+
],
|
| 429 |
+
"source": [
|
| 430 |
+
"print(f\"Generated Chapter:\\n\\n{generated_chapter} \\n\\nGenerated Summary:\\n\\n{generated_summary}\")"
|
| 431 |
+
]
|
| 432 |
+
},
|
| 433 |
+
{
|
| 434 |
+
"cell_type": "code",
|
| 435 |
+
"execution_count": null,
|
| 436 |
+
"id": "16893feb",
|
| 437 |
+
"metadata": {},
|
| 438 |
+
"outputs": [],
|
| 439 |
+
"source": [
|
| 440 |
+
"# generated_summary = \"Jack and Jill went up the hill to fetch a pail of water. Jack fell down and broke his crown, and Jill came tumbling after.\" # 👈 替换成模型生成的内容\n",
|
| 441 |
+
"# generated_summary = \"Lucy and her brother went to the market to buy some groceries. They met a friendly dog on the way.\" # 👈 替换成模型生成的内容\n",
|
| 442 |
+
"# generated_summary = \"The sun was shining brightly as they walked home. They had a great time at the market.\" # 👈 替换成模型生成的内容"
|
| 443 |
+
]
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"cell_type": "code",
|
| 447 |
+
"execution_count": 36,
|
| 448 |
+
"id": "90e8904e",
|
| 449 |
+
"metadata": {},
|
| 450 |
+
"outputs": [
|
| 451 |
+
{
|
| 452 |
+
"data": {
|
| 453 |
+
"text/plain": [
|
| 454 |
+
"['a855b3b5-75ab-4e3b-88c7-81763ee7fd7f']"
|
| 455 |
+
]
|
| 456 |
+
},
|
| 457 |
+
"execution_count": 36,
|
| 458 |
+
"metadata": {},
|
| 459 |
+
"output_type": "execute_result"
|
| 460 |
+
}
|
| 461 |
+
],
|
| 462 |
+
"source": [
|
| 463 |
+
"# Step 5: 构建并添加文档\n",
|
| 464 |
+
"latest_chapter += 1\n",
|
| 465 |
+
"new_doc = Document(\n",
|
| 466 |
+
" page_content=generated_summary,\n",
|
| 467 |
+
" metadata={\"chapter\": latest_chapter}\n",
|
| 468 |
+
")\n",
|
| 469 |
+
"vectorstore.add_documents([new_doc])\n"
|
| 470 |
+
]
|
| 471 |
+
},
|
| 472 |
+
{
|
| 473 |
+
"cell_type": "code",
|
| 474 |
+
"execution_count": 37,
|
| 475 |
+
"id": "7260c9ef",
|
| 476 |
+
"metadata": {},
|
| 477 |
+
"outputs": [
|
| 478 |
+
{
|
| 479 |
+
"data": {
|
| 480 |
+
"text/plain": [
|
| 481 |
+
"{'e948cd24-4678-436f-b98f-9514db311f42': Document(id='e948cd24-4678-436f-b98f-9514db311f42', metadata={'chapter': 1}, page_content='In the magical realm of Eldoria, where fire, water, wind, electricity, and earth govern existence, Jack, a unique mage cloaked in indigo, possesses the rare ability to master all five elements. Revered and envied by townsfolk, he grapples with the loneliness of his gifts. As he stands at the edge of the Whispering Woods one evening, feeling the elemental energies around him, he senses a change in the air. Unbeknownst to him, the universe is weaving a fate that will challenge the core of his powers and alter his destiny.'),\n",
|
| 482 |
+
" '27fff7cd-20be-4b44-be42-50af318b6e80': Document(id='27fff7cd-20be-4b44-be42-50af318b6e80', metadata={'chapter': 2}, page_content='In the twilight of Eldoria, Jack stands at the edge of the Whispering Woods, feeling a deep connection to the ancient trees. Drawn by an electric anticipation, he ventures into the forest, where reality blurs and whispers grow louder. Amidst the scents of earth and nightshade, he senses a change marked by a foreign smoke. Following a flickering glow, he discovers a clearing with a stone pedestal radiating energy. Compelled by an inner voice, Jack realizes that his destiny is intertwined with the secrets of this sacred place.'),\n",
|
| 483 |
+
" 'a855b3b5-75ab-4e3b-88c7-81763ee7fd7f': Document(id='a855b3b5-75ab-4e3b-88c7-81763ee7fd7f', metadata={'chapter': 3}, page_content='In Chapter 3, \"The Awakening of Shadows,\" Jack enters a mystical clearing in the forest of Eldoria, where he discovers a glowing stone pedestal inscribed with ancient runes. As he approaches, he feels a surge of energy and connection to the elemental forces around him. The whispers of the forest reveal a shadowy figure representing chaos, urging Jack to choose between light and darkness. Empowered and filled with purpose, Jack stands at a pivotal moment in his destiny, ready to confront the challenges that await him.')}"
|
| 484 |
+
]
|
| 485 |
+
},
|
| 486 |
+
"execution_count": 37,
|
| 487 |
+
"metadata": {},
|
| 488 |
+
"output_type": "execute_result"
|
| 489 |
+
}
|
| 490 |
+
],
|
| 491 |
+
"source": [
|
| 492 |
+
"vectorstore.docstore._dict"
|
| 493 |
+
]
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"cell_type": "code",
|
| 497 |
+
"execution_count": 20,
|
| 498 |
+
"id": "cd92e6e8",
|
| 499 |
+
"metadata": {},
|
| 500 |
+
"outputs": [],
|
| 501 |
+
"source": [
|
| 502 |
+
"# Step 6: 保存向量库\n",
|
| 503 |
+
"vectorstore.save_local(VECTORSTORE_PATH)\n",
|
| 504 |
+
"\n",
|
| 505 |
+
"# Step 7: 保存章节内容和总结到本地(你已有逻辑)\n",
|
| 506 |
+
"# save_chapter_text(latest_chapter + 1, generated_chapter)\n",
|
| 507 |
+
"# save_summary_text(latest_chapter + 1, generated_summary)\n"
|
| 508 |
]
|
| 509 |
}
|
| 510 |
],
|
| 511 |
"metadata": {
|
| 512 |
"kernelspec": {
|
| 513 |
+
"display_name": "novel_rag",
|
| 514 |
"language": "python",
|
| 515 |
"name": "python3"
|
| 516 |
},
|
app/tests/summarizer_prompt_engineering.ipynb
ADDED
|
File without changes
|
app/utils/utils.py
CHANGED
|
@@ -1,11 +1 @@
|
|
| 1 |
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
# def summarize_a_chapter(text: str) -> str:
|
| 5 |
-
# """
|
| 6 |
-
# Summarize a chapter using the model.
|
| 7 |
-
# """
|
| 8 |
-
# # Assuming `model` is a pre-defined object that can summarize text
|
| 9 |
-
# summary = model.summarize(text)
|
| 10 |
-
# return summary
|
| 11 |
-
|
|
|
|
| 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|