yuran986 commited on
Commit
d933810
·
verified ·
1 Parent(s): 9822afa

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +83 -13
src/streamlit_app.py CHANGED
@@ -1,20 +1,90 @@
 
1
  import streamlit as st
2
- from transformers import pipeline
3
- from PIL import Image
4
 
5
- pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
 
 
 
 
6
 
7
- st.title("Hot Dog? Or Not?")
 
8
 
9
- file_name = st.file_uploader("Upload a hot dog candidate image")
 
10
 
11
- if file_name is not None:
12
- col1, col2 = st.columns(2)
 
 
 
 
 
 
13
 
14
- image = Image.open(file_name)
15
- col1.image(image, use_column_width=True)
16
- predictions = pipeline(image)
17
 
18
- col2.header("Probabilities")
19
- for p in predictions:
20
- col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import streamlit as st
 
 
3
 
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
8
+ from langchain.chains import RetrievalQA
9
 
10
+ # 1. 读取环境变量(在 Hugging Face Space 中添加环境变量 OPENAI_API_KEY)
11
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
 
13
+ st.set_page_config(page_title="RAG 文档问答 Demo", layout="wide")
14
+ st.title("📄 RAG 文档问答 Demo (LangChain + Chroma + Streamlit)")
15
 
16
+ st.markdown(
17
+ """
18
+ 说明:
19
+ 1. 上传一份 PDF 文件(例如论文、说明文档)。
20
+ 2. 稍等系统构建索引。
21
+ 3. 在下方输入问题,将基于文档内容进行回答。
22
+ """
23
+ )
24
 
25
+ # 2. 侧边栏:上传文件
26
+ uploaded_file = st.sidebar.file_uploader("上传 PDF 文件", type=["pdf"])
 
27
 
28
+ if not OPENAI_API_KEY:
29
+ st.error("请在环境变量中设置 OPENAI_API_KEY 才能调用 OpenAI 接口。")
30
+ st.stop()
31
+
32
+ if uploaded_file:
33
+ # 把上传的文件临时保存到本地(Space 的临时存储)
34
+ temp_pdf_path = "temp.pdf"
35
+ with open(temp_pdf_path, "wb") as f:
36
+ f.write(uploaded_file.getbuffer())
37
+
38
+ # 3. 加载 PDF 文档
39
+ loader = PyPDFLoader(temp_pdf_path)
40
+ pages = loader.load()
41
+
42
+ # 4. 切分文本
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size=1000,
45
+ chunk_overlap=200,
46
+ length_function=len,
47
+ )
48
+ documents = text_splitter.split_documents(pages)
49
+
50
+ st.sidebar.write(f"文档页数: {len(pages)}")
51
+ st.sidebar.write(f"切分后的文本块数: {len(documents)}")
52
+
53
+ # 5. 构建向量库(Chroma)
54
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
55
+ vectorstore = Chroma.from_documents(documents, embedding=embeddings)
56
+
57
+ # 6. 构建 RAG QA 链
58
+ llm = ChatOpenAI(
59
+ temperature=0.1,
60
+ model="gpt-4o-mini", # 或 gpt-4o / gpt-3.5-turbo 等
61
+ openai_api_key=OPENAI_API_KEY
62
+ )
63
+
64
+ qa_chain = RetrievalQA.from_chain_type(
65
+ llm=llm,
66
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
67
+ chain_type="stuff", # 简单拼接检索到的文本块
68
+ return_source_documents=True
69
+ )
70
+
71
+ # 7. 用户提问
72
+ user_question = st.text_input("在此输入关于文档的问题:", "")
73
+
74
+ if st.button("生成回答") and user_question.strip():
75
+ with st.spinner("正在检索并生成回答,请稍候..."):
76
+ result = qa_chain(user_question)
77
+ answer = result["result"]
78
+ source_docs = result["source_documents"]
79
+
80
+ st.subheader("回答:")
81
+ st.write(answer)
82
+
83
+ with st.expander("查看参考片段(检索到的文档内容)"):
84
+ for i, doc in enumerate(source_docs):
85
+ st.markdown(f"**片段 {i+1}:**")
86
+ st.write(doc.page_content)
87
+ st.markdown("---")
88
+
89
+ else:
90
+ st.info("请先在左侧上传一个 PDF 文件。")