udituen commited on
Commit
bc491de
·
1 Parent(s): 6b794dc

initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. Dockerfile +21 -0
  3. README.md +19 -2
  4. requirements.txt +10 -0
  5. src/streamlit_app.py +133 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ curl \
8
+ software-properties-common \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ COPY requirements.txt ./
13
+ COPY src/ ./src/
14
+
15
+ RUN pip3 install -r requirements.txt
16
+
17
+ EXPOSE 8501
18
+
19
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
+
21
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,2 +1,19 @@
1
- # tasktrack
2
- Track tracker application written in python with ML functionalities
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DocsQA
3
+ emoji: 📚
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
7
+ app_port: 8501
8
+ tags:
9
+ - streamlit
10
+ pinned: false
11
+ short_description: Upload a document and ask questions based on its content
12
+ ---
13
+
14
+ # Welcome to Streamlit!
15
+
16
+ Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
+
18
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
+ forums](https://discuss.streamlit.io).
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ altair
2
+ pandas
3
+ streamlit
4
+ fastapi
5
+ uvicorn
6
+ langchain
7
+ transformers
8
+ sentence-transformers
9
+ faiss-cpu
10
+ langchain-community
src/streamlit_app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_community.llms import HuggingFacePipeline
6
+ from transformers import pipeline
7
+
8
+ # # ----------------------
9
+ # # Helper: Load and process uploaded file
10
+ # # ----------------------
11
+ # def read_uploaded_file(uploaded_file):
12
+ # text = uploaded_file.read().decode("utf-8")
13
+ # docs = text.split("\n")
14
+ # return docs
15
+
16
+ # # ----------------------
17
+ # # Load lightweight LLM
18
+ # # ----------------------e
19
+ # @st.cache_resource
20
+ # def load_llm():
21
+ # pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
22
+ # return HuggingFacePipeline(pipeline=pipe)
23
+
24
+ # # ----------------------
25
+ # # Build retriever from uploaded content
26
+ # # ----------------------
27
+ # def build_retriever(docs):
28
+ # embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
29
+ # db = FAISS.from_texts(docs, embeddings)
30
+ # return db.as_retriever()
31
+
32
+ # # ----------------------
33
+ # # Streamlit UI
34
+ # # ----------------------
35
+
36
+ # uploaded_file = st.file_uploader("Upload a `.txt` file with agricultural content", type=["txt"])
37
+ # query = st.text_input("Ask a question based on your uploaded file:")
38
+
39
+ # # Check if user uploaded a file
40
+ # if uploaded_file:
41
+ # docs = read_uploaded_file(uploaded_file)
42
+ # retriever = build_retriever(docs)
43
+ # llm = load_llm()
44
+ # qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
45
+
46
+ # if query:
47
+ # with st.spinner("Generating answer..."):
48
+ # result = qa_chain.run(query)
49
+ # st.success(result)
50
+ # else:
51
+ # st.info("Please upload a `.txt` file to begin.")
52
+
53
+
54
+ # ----------------------
55
+ # Sample Text Content
56
+ # ----------------------
57
+ SAMPLE_TEXT = """Fertilizers help improve soil nutrients and crop yield.
58
+ Irrigation methods vary depending on climate and crop type.
59
+ Crop rotation can enhance soil health and reduce pests.
60
+ Composting is an organic way to enrich the soil.
61
+ Weed management is essential for higher productivity."""
62
+
63
+ EXAMPLE_QUESTIONS = [
64
+ "What is this document about?"
65
+ "What is the role of fertilizers in agriculture?",
66
+ "Why is crop rotation important?",
67
+ "How does composting help farming?",
68
+ ]
69
+
70
+ # ----------------------
71
+ # Helper: Read uploaded file
72
+ # ----------------------
73
+ def read_uploaded_file(uploaded_file):
74
+ text = uploaded_file.read().decode("utf-8")
75
+ docs = text.split("\n")
76
+ return docs
77
+
78
+ # ----------------------
79
+ # Load lightweight LLM
80
+ # ----------------------
81
+ @st.cache_resource
82
+ def load_llm():
83
+ pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
84
+ return HuggingFacePipeline(pipeline=pipe)
85
+
86
+ # extract
87
+
88
+ # ----------------------
89
+ # Build retriever from uploaded content
90
+ # ----------------------
91
+ def build_retriever(docs):
92
+ # if docs.type == pdf
93
+ # use langchain pymupdf to extract the text from the document
94
+
95
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
96
+ db = FAISS.from_texts(docs, embeddings)
97
+ return db.as_retriever()
98
+
99
+ # ----------------------
100
+ # Streamlit UI
101
+ # ----------------------
102
+ st.title("DocsQA: Upload & Ask")
103
+
104
+ st.markdown("Upload a text file and ask questions about its contents.")
105
+
106
+ # Add sample file download button
107
+ st.download_button(
108
+ label="📄 Download Sample File",
109
+ data=SAMPLE_TEXT,
110
+ file_name="sample_agri.txt",
111
+ mime="text/plain"
112
+ )
113
+
114
+ # Show example questions
115
+ with st.expander("💡 Try example questions"):
116
+ for q in EXAMPLE_QUESTIONS:
117
+ st.markdown(f"- {q}")
118
+
119
+ uploaded_file = st.file_uploader("Upload your `.txt` file", type=["txt"])
120
+ query = st.text_input("Ask a question:")
121
+
122
+ if uploaded_file:
123
+ docs = read_uploaded_file(uploaded_file)
124
+ retriever = build_retriever(docs)
125
+ llm = load_llm()
126
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
127
+
128
+ if query:
129
+ with st.spinner("Generating answer..."):
130
+ result = qa_chain.run(query)
131
+ st.success(result)
132
+ else:
133
+ st.info("Please upload a `.txt` file or use the sample provided.")