Spaces:

Seyfelislem
/

Document_QA_Assistant

Sleeping

App Files Files Community

Seif-aber commited on Jan 15, 2025

Commit

355fe19

1 Parent(s): c77e641

document q&a assistant with Gemini & RAG

Browse files

Files changed (3) hide show

app.py +50 -0
requirements.txt +7 -0
utils.py +64 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import streamlit as st
+import os
+from utils import load_data, get_gemini_embedding
+def process_document(doc, question):
+    """Process document and return response to question."""
+    temp_path = os.path.join("data", doc.name)
+    try:
+        with open(temp_path, "wb") as f:
+            f.write(doc.getbuffer())
+        documents = load_data("data")
+        query_engine = get_gemini_embedding(documents)
+        return query_engine.query(question)
+    finally:
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+def main():
+    st.set_page_config(page_title="Document Q&A Assistant")
+    st.title("Smart Document Question-Answering")
+    # Create data directory if not exists
+    os.makedirs("data", exist_ok=True)
+    doc = st.file_uploader(
+        "Upload your document (PDF, CSV, or TXT)", type=["pdf", "csv", "txt"]
+    )
+    question = st.text_input(
+        "What would you like to know about your document?",
+        placeholder="Enter your question here...",
+    )
+    if st.button("Get Answer"):
+        if not doc:
+            st.error("Please upload a document first.")
+            return
+        if not question:
+            st.error("Please enter a question.")
+            return
+        with st.spinner("Analyzing your document..."):
+            response = process_document(doc, question)
+            st.write(response.response)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+llama-index
+google-generativeai
+llama-index-llms-gemini
+pypdf
+python-dotenv
+llama-index-embeddings-gemini
+streamlit

utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.embeddings.gemini import GeminiEmbedding
+from llama_index.llms.gemini import Gemini
+import logging
+import os
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+def load_data(data_path: str) -> list[str]:
+    """
+    Load documents from a directory.
+    Args:
+        data_path (str): Path to the directory containing documents
+    Returns:
+        list[str]: List of loaded documents or False if loading fails
+    """
+    try:
+        logger.info(f"Loading documents from {data_path}")
+        loader = SimpleDirectoryReader(data_path)
+        documents = loader.load_data()
+        logger.info(f"Successfully loaded {len(documents)} documents")
+        return documents
+    except Exception as e:
+        logger.error(f"Failed to load data: {str(e)}")
+        return False
+def get_gemini_embedding(documents: str):
+    """
+    Create a query engine using Gemini embeddings.
+    Args:
+        documents (str): Documents to process
+    Returns:
+        QueryEngine: Configured query engine or False if setup fails
+    """
+    try:
+        logger.info("Initializing Gemini embedding model and LLM")
+        gemini_embedding_model = GeminiEmbedding(model_name="models/embedding-001")
+        llm = Gemini(model="models/gemini-1.5-flash", api_key=GEMINI_API_KEY)
+        # Configure global settings
+        Settings.llm = llm
+        Settings.embed_model = gemini_embedding_model
+        Settings.node_parser = SentenceSplitter(chunk_size=1000, chunk_overlap=20)
+        logger.info("Creating vector store index")
+        index = VectorStoreIndex.from_documents(
+            documents=documents,
+            embed_model=gemini_embedding_model
+        )
+        logger.info("Creating query engine")
+        return index.as_query_engine()
+    except Exception as e:
+        logger.error(f"Failed to setup Gemini embedding: {str(e)}")
+        return False