Spaces:

jordyvl
/

ask_my_thesis

Paused

App Files Files Community

Rahul Bhoyar commited on Jan 19, 2024

Commit

08728cc

1 Parent(s): 4ddfb35

Updated files

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +101 -51
requirements.txt +6 -14

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- venv/


1	+ venv/
2	+ data/*

app.py CHANGED Viewed

@@ -1,60 +1,110 @@
-import streamlit as st
-from PyPDF2 import PdfReader
-from llama_index.llms import HuggingFaceInferenceAPI
-from llama_index import VectorStoreIndex
-from llama_index.embeddings import HuggingFaceEmbedding
-from llama_index import ServiceContext
-from llama_index.schema import Document
-def read_pdf(uploaded_file):
-    pdf_reader = PdfReader(uploaded_file)
-    text = ""
-    for page_num in range(len(pdf_reader.pages)):
-        text += pdf_reader.pages[page_num].extract_text()
-    return text
-def querying(query_engine):
-    query = st.text_input("Enter the Query for PDF:")
-    submit = st.button("Generate The response for the query")
-    if submit:
-        with st.spinner("Fetching the response..."):
-            response = query_engine.query(query)
-            st.write(f"**Response:** {response}")
-        # docs = document_search.similarity_search(query_text)
-        # output = chain.run(input_documents=docs, question=query_text)
-        # st.write(output)
-def main():
-    st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
-    hf_token = st.text_input("Enter your Hugging Face token:")
-    llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
-    uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
-    if uploaded_file is not None:
-        file_contents = read_pdf(uploaded_file)
-        documents = Document(text=file_contents)
-        documents = [documents]
-        st.success("Documents loaded successfully!")
-        with st.spinner("Created Embedding model..."):
-            embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
-            service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
-            st.success("Embedding model created successfully!")
-        # Download embeddings from OpenAI
-        with st.spinner("Created VectorStoreIndex..."):
-            index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
-            index.storage_context.persist()
-            query_engine = index.as_query_engine()
-            st.success("VectorStoreIndex created successfully!")
-        querying(query_engine)
-if __name__ == "__main__":
-    main()

+# import streamlit as st
+# from PyPDF2 import PdfReader
+# from llama_index.llms import HuggingFaceInferenceAPI
+# from llama_index import VectorStoreIndex
+# from llama_index.embeddings import HuggingFaceEmbedding
+# from llama_index import ServiceContext
+# from llama_index.schema import Document
+# def read_pdf(uploaded_file):
+#     pdf_reader = PdfReader(uploaded_file)
+#     text = ""
+#     for page_num in range(len(pdf_reader.pages)):
+#         text += pdf_reader.pages[page_num].extract_text()
+#     return text
+# def querying(query_engine):
+#     query = st.text_input("Enter the Query for PDF:")
+#     submit = st.button("Generate The response for the query")
+#     if submit:
+#         with st.spinner("Fetching the response..."):
+#             response = query_engine.query(query)
+#             st.write(f"**Response:** {response}")
+# def main():
+#     st.title("PdfQuerier using LLAMA by Rahul Bhoyar")
+#     hf_token = st.text_input("Enter your Hugging Face token:")
+#     llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
+#     uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"])
+#     if uploaded_file is not None:
+#         file_contents = read_pdf(uploaded_file)
+#         documents = Document(text=file_contents)
+#         documents = [documents]
+#         st.success("Documents loaded successfully!")
+#         with st.spinner("Created Embedding model..."):
+#             embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
+#             service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae)
+#             st.success("Embedding model created successfully!")
+#         # Download embeddings from OpenAI
+#         with st.spinner("Created VectorStoreIndex..."):
+#             index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
+#             index.storage_context.persist()
+#             query_engine = index.as_query_engine()
+#             st.success("VectorStoreIndex created successfully!")
+#         querying(query_engine)
+# if __name__ == "__main__":
+#     main()
+import streamlit as st
+from llama_index import SimpleDirectoryReader, VectorStoreIndex
+from llama_index import ServiceContext
+from llama_index.embeddings import HuggingFaceEmbedding
+from llama_index.llms import HuggingFaceInferenceAPI
+import os
+# os.environ["GOOGLE_API_KEY"]="AIzaSyBYrZpUdTc4rumhdHajlKfwY4Kq0u6vFDs"
+# Streamlit title and description
+st.title("Gemini-File with Llama-Index by Rahul Bhoyar")
+st.write("This app allows you to upload your own Pdf and query your document, Powered By Gemini")
+hf_token = st.text_input("Enter your Hugging Face token:")
+#function to save a file
+def save_uploadedfile(uploadedfile):
+     with open(os.path.join("data",uploadedfile.name),"wb") as f:
+         f.write(uploadedfile.getbuffer())
+     return st.success("Saved File:{} to directory".format(uploadedfile.name))
+# Streamlit input for user file upload
+uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf'])
+# Load data and configure the index
+if uploaded_pdf is not None:
+    input_file = save_uploadedfile(uploaded_pdf)
+    st.write("File uploaded successfully!")
+    documents = SimpleDirectoryReader("data").load_data()
+    llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-alpha", token=hf_token)
+    embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1")
+    # Configure Service Context
+    service_context = ServiceContext.from_defaults(
+        llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae
+    )
+    index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
+    index.storage_context.persist()
+    query_engine = index.as_query_engine()
+    # Streamlit input for user query
+    user_query = st.text_input("Enter your query:")
+    # Query engine with user input
+    if user_query:
+        response = query_engine.query(user_query)
+        st.markdown(f"**Response:** {response}")
+else:
+    st.write("Please upload a file first.")

requirements.txt CHANGED Viewed

@@ -1,15 +1,7 @@
-langchain
-openai
-PyPDF2
-faiss-cpu
-tiktoken
-watchdog
-streamlit
-fitz
 llama-index
-transformers[torch]
-huggingface_hub[inference]
-beautifulsoup4
-unstructured
-watchdog
-transformers

 llama-index
+pypdf
+streamlit
+huggingface_hub[inference]>=0.19.0
+transformers
+torch
+watchdog