udituen commited on
Commit
4c9a8f9
·
unverified ·
1 Parent(s): c1d3591

Delete src directory

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -164
src/streamlit_app.py DELETED
@@ -1,164 +0,0 @@
1
- import streamlit as st
2
- from langchain_community.vectorstores import FAISS
3
- from langchain_huggingface.embeddings import HuggingFaceEmbeddings
4
- from langchain.chains import RetrievalQA
5
- from langchain_community.llms import HuggingFacePipeline
6
- from transformers import pipeline
7
- from langchain.prompts import PromptTemplate
8
- from langchain.chains.combine_documents import create_stuff_documents_chain
9
- from langchain_text_splitters.character import RecursiveCharacterTextSplitter
10
- from langchain.docstore.document import Document
11
- from langchain.chains import create_retrieval_chain
12
- from langchain_community.llms import Ollama
13
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
14
- import os
15
- import itertools
16
- from langchain_community.document_loaders import PyMuPDFLoader
17
-
18
- # ----------------------
19
- # Sample Text Content
20
- # ----------------------
21
- SAMPLE_TEXT = """Fertilizers help improve soil nutrients and crop yield.
22
- Irrigation methods vary depending on climate and crop type.
23
- Crop rotation can enhance soil health and reduce pests.
24
- Composting is an organic way to enrich the soil.
25
- Weed management is essential for higher productivity."""
26
-
27
- EXAMPLE_QUESTIONS = [
28
- "What is this document about?",
29
- "What is the role of fertilizers in agriculture?",
30
- "Why is crop rotation important?",
31
- "How does composting help farming?",
32
- ]
33
-
34
- HF_TOKEN = st.secrets["HF_TOKEN"]
35
- os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN
36
-
37
- prompt = PromptTemplate(
38
- input_variables=["context", "question"],
39
- template=(
40
- "You are a document question and answer expert.\n"
41
- "Use the context below to answer the question.\n"
42
- "Context:\n{context}\n\n"
43
- "Question: {input}\n"
44
- )
45
- )
46
-
47
- # Helper: Read uploaded file
48
- def read_uploaded_file(uploaded_file):
49
- return uploaded_file.read().decode("utf-8")
50
-
51
- def ingest_pdf(file_name):
52
- """
53
- loads content of file using pymupdf
54
- input (str): file names and file path
55
- output (list): file content divided by pages
56
- """
57
- pages = []
58
- loader = PyMuPDFLoader(file_name)
59
-
60
- for page in loader.alazy_load():
61
- pages.append(page)
62
-
63
- return pages
64
-
65
- # Load lightweight LLM
66
- @st.cache_resource
67
- def load_llm():
68
-
69
- model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
70
- # model_name = "meta-llama/Llama-2-7b-chat-hf"
71
- tokenizer = AutoTokenizer.from_pretrained(model_name)
72
- model = AutoModelForCausalLM.from_pretrained(model_name, dtype="auto", device_map="auto")
73
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
74
-
75
- return HuggingFacePipeline(pipeline=pipe)
76
- # pipe = pipeline("text-generation", model="google/flan-t5-small", max_new_tokens=256)
77
- # return HuggingFacePipeline(pipeline=pipe)
78
-
79
- # extract
80
- def get_chunks(file_content):
81
- """
82
- split document into chunks
83
- """
84
- # initialise the recursive method
85
- splitter = RecursiveCharacterTextSplitter(
86
- chunk_size=100,
87
- chunk_overlap=10
88
- )
89
- chunks = []
90
-
91
- for page in file_content:
92
- docs = [Document(page_content=page.page_content)]
93
- texts = splitter.split_documents(docs)
94
- chunks.append(texts)
95
-
96
- return list(itertools.chain(*chunks))
97
-
98
-
99
- # Build retriever from uploaded content
100
- # @st.cache_resource
101
- def build_retriever(docs):
102
-
103
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
104
- db = FAISS.from_texts(docs, embeddings)
105
- return db.as_retriever()
106
-
107
-
108
- # Streamlit UI
109
- st.title("DocsQA: Upload & Ask")
110
-
111
- st.markdown("Upload a text file and ask questions about its contents.")
112
-
113
- # Add sample file download button
114
- st.download_button(
115
- label="📄 Download Sample File",
116
- data=SAMPLE_TEXT,
117
- file_name="sample_agri.txt",
118
- mime="text/plain"
119
- )
120
-
121
- # Show example questions
122
- with st.expander("Try example questions"):
123
- for q in EXAMPLE_QUESTIONS:
124
- st.markdown(f"- {q}")
125
-
126
- uploaded_file = st.file_uploader("Upload your file", type=["txt","pdf"])
127
- if uploaded_file is not None:
128
- st.write("Filename:", uploaded_file.name)
129
- data = uploaded_file.read()
130
-
131
- if uploaded_file.type == "text/plain":
132
- # st.text_area("Content", data.decode("utf-8"), height=300)
133
- st.info("Uploaded txt file")
134
- else:
135
- st.info(f"Uploaded {len(data)} bytes (PDF or other format)")
136
- query = st.text_input("Ask a question ")
137
-
138
- if uploaded_file is not None:
139
- # st.success("file uploaded")
140
- if uploaded_file.type == "text/plain":
141
- # st.text_area("Content", data.decode("utf-8"), height=300)
142
- st.info("Uploaded txt file")
143
- docs = read_uploaded_file(uploaded_file)
144
- else:
145
- st.info(f"Uploaded {data} bytes (PDF or other format)")
146
- docs = st.pdf(uploaded_file.read())
147
-
148
- retriever = build_retriever(data.decode("utf-8"))
149
- llm = load_llm()
150
-
151
- # qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
152
- combine_docs_chain = create_stuff_documents_chain(llm, prompt)
153
-
154
- qa_chain = create_retrieval_chain(retriever, combine_docs_chain)
155
-
156
- if query:
157
- with st.spinner("Generating answer..."):
158
- result = qa_chain.invoke({"input":query})
159
- # st.info(result)
160
- answer = result["answer"].split("\nAnswer:")[-1].strip()
161
- st.success(answer)
162
-
163
- else:
164
- st.info("Please upload a `.txt or .pdf` file or use the sample provided.")