Spaces:

stevafernandes
/

Librarianship

Sleeping

App Files Files Community

stevafernandes commited on Jul 28, 2025

Commit

d96618a

verified ·

1 Parent(s): cd18249

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -59

app.py CHANGED Viewed

@@ -1,72 +1,86 @@
-import os
-import asyncio
-import gradio as gr
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_community.vectorstores import FAISS
-from langchain.chains import RetrievalQA
-# ---- ensure asyncio loop ----
-try:
-    asyncio.get_running_loop()
-except RuntimeError:
-    asyncio.set_event_loop(asyncio.new_event_loop())
-# load key
-API_KEY = os.getenv("GOOGLE_API_KEY", "").strip()
-if not API_KEY:
-    raise RuntimeError("Set the GOOGLE_API_KEY env var")
-# 1) build FAISS index over librarianship.pdf
-vector_store = None
-def build_index():
-    global vector_store
-    reader = PdfReader("librarianship.pdf")
-    full_text = ""
     for page in reader.pages:
-        txt = page.extract_text() or ""
-        full_text += txt + "\n"
-    splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
-    chunks = splitter.split_text(full_text)
-    embeds = GoogleGenerativeAIEmbeddings(
-        model="models/embedding-001", google_api_key=API_KEY
-    )
-    vector_store = FAISS.from_texts(chunks, embedding=embeds)
-    print(f"Indexed {len(chunks)} chunks from librarianship.pdf")
-build_index()
-# 2) set up a RetrievalQA chain
-llm = ChatGoogleGenerativeAI(
-    model="gemini-2.0-flash-exp",
-    temperature=0,
-    google_api_key=API_KEY
-)
-qa = RetrievalQA.from_chain_type(
-    llm=llm,
-    chain_type="stuff",
-    retriever=vector_store.as_retriever()
-)
-# 3) Gradio interface
-def answer(question, chat_history):
-    if not question.strip():
-        return chat_history, ""
-    result = qa.run(question)
-    chat_history.append({"role": "user", "content": question})
-    chat_history.append({"role": "assistant", "content": result})
-    return chat_history, ""
-with gr.Blocks() as demo:
-    gr.Markdown("## 📚 Chat over **librarianship.pdf** with Gemini AI")
-    chatbot = gr.Chatbot(type="messages")
-    user_input = gr.Textbox(placeholder="Ask anything about librarianship…")
-    user_input.submit(answer, [user_input, chatbot], [chatbot, user_input])
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=int(os.environ.get("PORT", 7860))
-    )

+import streamlit as st
 from PyPDF2 import PdfReader
+import os
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from langchain_community.vectorstores import FAISS
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+# --- Get API key from environment variable (set in Hugging Face Secrets or .env file) ---
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+PDF_PATH = "librarianship.pdf"
+INDEX_PATH = "/tmp/faiss_index"
+def get_pdf_text(pdf_path):
+    text = ""
+    reader = PdfReader(pdf_path)
     for page in reader.pages:
+        page_text = page.extract_text()
+        if page_text:
+            text += page_text
+    return text
+def get_text_chunks(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    return text_splitter.split_text(text)
+def build_and_save_vector_store(text_chunks, api_key):
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
+    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+    vector_store.save_local(INDEX_PATH)
+def load_vector_store(api_key):
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
+    return FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
+def get_conversational_chain(api_key):
+    prompt_template = """
+    You are a helpful assistant that only answers based on the context provided from the PDF document.
+    Do not use any external knowledge or assumptions. If the answer is not found in the context below, reply with "I don't know."
+    Context:
+    {context}
+    Question:
+    {question}
+    Answer:
+    """
+    model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, google_api_key=api_key)
+    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+    return chain
+def user_input(user_question, api_key):
+    db = load_vector_store(api_key)
+    docs = db.similarity_search(user_question)
+    chain = get_conversational_chain(api_key)
+    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
+    st.write("**Reply:**", response["output_text"])
+def main():
+    st.set_page_config(page_title="Chat librarianship.pdf")
+    st.header("RAG: Chat with librarianship.pdf using Gemini 2.0")
+    st.markdown("---")
+    # Ensure API key is present
+    if not GOOGLE_API_KEY:
+        st.error("Please set the GOOGLE_API_KEY environment variable in your Hugging Face Space secrets or .env file.")
+        st.stop()
+    # Build FAISS index if not present
+    if not os.path.exists(INDEX_PATH + ".index"):
+        with st.spinner(f"Indexing {PDF_PATH}..."):
+            raw_text = get_pdf_text(PDF_PATH)
+            text_chunks = get_text_chunks(raw_text)
+            build_and_save_vector_store(text_chunks, GOOGLE_API_KEY)
+        st.success(f"Indexed {PDF_PATH}. You can now ask questions.")
+    # Simple chat UI
+    st.subheader("Ask a question about librarianship.pdf")
+    user_question = st.text_input("Ask a question", key="user_question")
+    if user_question:
+        user_input(user_question, GOOGLE_API_KEY)
 if __name__ == "__main__":
+    main()