Spaces:

Narayana02
/

IntelliLearn

Sleeping

App Files Files Community

Narayana02 commited on Dec 23, 2024

Commit

f390c59

verified ·

1 Parent(s): 7eab4a2

Upload 4 files

Browse files

Files changed (4) hide show

.env +1 -0
app.py +59 -0
requirements.txt +6 -0
utilities.py +65 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY = 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

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import streamlit as st
+import os
+from dotenv import load_dotenv
+from utils import (
+    extract_text_from_pdf,
+    build_hierarchical_tree,
+    save_tree,
+    hybrid_retrieval,
+    rag_answer,
+)
+# Load API key from .env
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+# Create necessary directories
+os.makedirs("uploaded_textbooks", exist_ok=True)
+os.makedirs("hierarchical_trees", exist_ok=True)
+os.makedirs("retrieved_contexts", exist_ok=True)
+# Streamlit UI
+st.title("Hierarchical Question-Answering System 📚🤖")
+st.markdown(
+    "Upload textbooks, explore their structure, and ask questions powered by AI."
+)
+# Upload PDF section
+uploaded_files = st.file_uploader("Upload Textbooks (PDF)", type=["pdf"], accept_multiple_files=True)
+if uploaded_files:
+    for uploaded_file in uploaded_files:
+        file_path = os.path.join("uploaded_textbooks", uploaded_file.name)
+        with open(file_path, "wb") as f:
+            f.write(uploaded_file.read())
+        # Extract text
+        st.write(f"Processing: {uploaded_file.name}")
+        extracted_text = extract_text_from_pdf(file_path)
+        # Build hierarchical tree
+        tree = build_hierarchical_tree(extracted_text, textbook_title=uploaded_file.name)
+        tree_path = os.path.join("hierarchical_trees", f"{uploaded_file.name}_tree.json")
+        save_tree(tree, tree_path)
+        st.success(f"Processed and indexed: {uploaded_file.name}")
+# Query Section
+query = st.text_input("Ask a question:")
+if query:
+    st.write("Retrieving relevant information...")
+    relevant_text = hybrid_retrieval(query, OPENAI_API_KEY)
+    if relevant_text:
+        st.write("Generating an answer...")
+        answer = rag_answer(query, relevant_text, OPENAI_API_KEY)
+        st.write(f"**Answer:** {answer}")
+        st.write("**Relevant Context:**")
+        st.write(relevant_text)
+    else:
+        st.write("No relevant information found.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+PyPDF2
+networkx
+sentence-transformers
+openai
+transformers

utilities.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import PyPDF2
+import json
+import networkx as nx
+from sentence_transformers import SentenceTransformer, util
+import openai
+# Model for embeddings
+model = SentenceTransformer("all-MiniLM-L6-v2")
+# 1. Extract Text from PDF
+def extract_text_from_pdf(file_path):
+    """Extract text from a PDF."""
+    text = ""
+    with open(file_path, "rb") as f:
+        reader = PyPDF2.PdfReader(f)
+        for page in reader.pages:
+            text += page.extract_text()
+    return text
+# 2. Build Hierarchical Tree
+def build_hierarchical_tree(text, textbook_title):
+    """Create a hierarchical tree structure."""
+    lines = text.split("\n")
+    tree = {"title": textbook_title, "chapters": []}
+    current_chapter = None
+    for line in lines:
+        if line.strip().startswith("Chapter"):
+            current_chapter = {"title": line.strip(), "sections": []}
+            tree["chapters"].append(current_chapter)
+        elif current_chapter and line.strip():
+            current_chapter["sections"].append(line.strip())
+    return tree
+def save_tree(tree, path):
+    """Save the hierarchical tree."""
+    with open(path, "w") as f:
+        json.dump(tree, f, indent=4)
+# 3. Hybrid Retrieval
+def hybrid_retrieval(query, openai_api_key):
+    """Retrieve relevant text using hybrid methods."""
+    with open("hierarchical_trees/example_tree.json") as f:  # Adjust file path as needed
+        tree = json.load(f)
+    all_sections = [
+        section for chapter in tree["chapters"] for section in chapter["sections"]
+    ]
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    section_embeddings = model.encode(all_sections, convert_to_tensor=True)
+    similarities = util.pytorch_cos_sim(query_embedding, section_embeddings)
+    top_indices = similarities[0].topk(3).indices.tolist()
+    return " ".join([all_sections[i] for i in top_indices])
+# 4. RAG Answer Generation
+def rag_answer(query, context, openai_api_key):
+    """Generate an answer using Retrieval-Augmented Generation."""
+    openai.api_key = openai_api_key
+    response = openai.Completion.create(
+        engine="text-davinci-003",
+        prompt=f"Answer the question based on the context below:\n\nContext: {context}\n\nQuestion: {query}\n\nAnswer:",
+        max_tokens=150,
+    )
+    return response.choices[0].text.strip()