udituen commited on
Commit
231aa86
·
1 Parent(s): 5499943

fix document upload

Browse files
Files changed (2) hide show
  1. README.md +1 -5
  2. src/streamlit_app.py +5 -55
README.md CHANGED
@@ -11,9 +11,5 @@ pinned: false
11
  short_description: Upload a document and ask questions based on its content
12
  ---
13
 
14
- # Welcome to Streamlit!
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
11
  short_description: Upload a document and ask questions based on its content
12
  ---
13
 
14
+ # Welcome to DocsQA!
15
 
 
 
 
 
src/streamlit_app.py CHANGED
@@ -5,52 +5,6 @@ from langchain.chains import RetrievalQA
5
  from langchain_community.llms import HuggingFacePipeline
6
  from transformers import pipeline
7
 
8
- # # ----------------------
9
- # # Helper: Load and process uploaded file
10
- # # ----------------------
11
- # def read_uploaded_file(uploaded_file):
12
- # text = uploaded_file.read().decode("utf-8")
13
- # docs = text.split("\n")
14
- # return docs
15
-
16
- # # ----------------------
17
- # # Load lightweight LLM
18
- # # ----------------------e
19
- # @st.cache_resource
20
- # def load_llm():
21
- # pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
22
- # return HuggingFacePipeline(pipeline=pipe)
23
-
24
- # # ----------------------
25
- # # Build retriever from uploaded content
26
- # # ----------------------
27
- # def build_retriever(docs):
28
- # embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
29
- # db = FAISS.from_texts(docs, embeddings)
30
- # return db.as_retriever()
31
-
32
- # # ----------------------
33
- # # Streamlit UI
34
- # # ----------------------
35
-
36
- # uploaded_file = st.file_uploader("Upload a `.txt` file with agricultural content", type=["txt"])
37
- # query = st.text_input("Ask a question based on your uploaded file:")
38
-
39
- # # Check if user uploaded a file
40
- # if uploaded_file:
41
- # docs = read_uploaded_file(uploaded_file)
42
- # retriever = build_retriever(docs)
43
- # llm = load_llm()
44
- # qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
45
-
46
- # if query:
47
- # with st.spinner("Generating answer..."):
48
- # result = qa_chain.run(query)
49
- # st.success(result)
50
- # else:
51
- # st.info("Please upload a `.txt` file to begin.")
52
-
53
-
54
  # ----------------------
55
  # Sample Text Content
56
  # ----------------------
@@ -61,23 +15,20 @@ Composting is an organic way to enrich the soil.
61
  Weed management is essential for higher productivity."""
62
 
63
  EXAMPLE_QUESTIONS = [
64
- "What is this document about?"
65
  "What is the role of fertilizers in agriculture?",
66
  "Why is crop rotation important?",
67
  "How does composting help farming?",
68
  ]
69
 
70
- # ----------------------
71
  # Helper: Read uploaded file
72
- # ----------------------
73
  def read_uploaded_file(uploaded_file):
74
  text = uploaded_file.read().decode("utf-8")
75
  docs = text.split("\n")
76
  return docs
77
 
78
- # ----------------------
79
  # Load lightweight LLM
80
- # ----------------------
81
  @st.cache_resource
82
  def load_llm():
83
  pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
@@ -85,9 +36,8 @@ def load_llm():
85
 
86
  # extract
87
 
88
- # ----------------------
89
  # Build retriever from uploaded content
90
- # ----------------------
91
  def build_retriever(docs):
92
  # if docs.type == pdf
93
  # use langchain pymupdf to extract the text from the document
@@ -96,9 +46,8 @@ def build_retriever(docs):
96
  db = FAISS.from_texts(docs, embeddings)
97
  return db.as_retriever()
98
 
99
- # ----------------------
100
  # Streamlit UI
101
- # ----------------------
102
  st.title("DocsQA: Upload & Ask")
103
 
104
  st.markdown("Upload a text file and ask questions about its contents.")
@@ -120,6 +69,7 @@ uploaded_file = st.file_uploader("Upload your `.txt` file", type=["txt"])
120
  query = st.text_input("Ask a question:")
121
 
122
  if uploaded_file:
 
123
  docs = read_uploaded_file(uploaded_file)
124
  retriever = build_retriever(docs)
125
  llm = load_llm()
 
5
  from langchain_community.llms import HuggingFacePipeline
6
  from transformers import pipeline
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # ----------------------
9
  # Sample Text Content
10
  # ----------------------
 
15
  Weed management is essential for higher productivity."""
16
 
17
  EXAMPLE_QUESTIONS = [
18
+ "What is this document about?",
19
  "What is the role of fertilizers in agriculture?",
20
  "Why is crop rotation important?",
21
  "How does composting help farming?",
22
  ]
23
 
24
+
25
  # Helper: Read uploaded file
 
26
  def read_uploaded_file(uploaded_file):
27
  text = uploaded_file.read().decode("utf-8")
28
  docs = text.split("\n")
29
  return docs
30
 
 
31
  # Load lightweight LLM
 
32
  @st.cache_resource
33
  def load_llm():
34
  pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
 
36
 
37
  # extract
38
 
39
+
40
  # Build retriever from uploaded content
 
41
  def build_retriever(docs):
42
  # if docs.type == pdf
43
  # use langchain pymupdf to extract the text from the document
 
46
  db = FAISS.from_texts(docs, embeddings)
47
  return db.as_retriever()
48
 
49
+
50
  # Streamlit UI
 
51
  st.title("DocsQA: Upload & Ask")
52
 
53
  st.markdown("Upload a text file and ask questions about its contents.")
 
69
  query = st.text_input("Ask a question:")
70
 
71
  if uploaded_file:
72
+ st.success("file uploaded")
73
  docs = read_uploaded_file(uploaded_file)
74
  retriever = build_retriever(docs)
75
  llm = load_llm()