Spaces:

heerjtdev
/

answer

Sleeping

App Files Files Community

heerjtdev commited on 23 days ago

Commit

d443dc8

verified ·

1 Parent(s): 8207b6c

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -24

app.py CHANGED Viewed

@@ -1,3 +1,115 @@
 import gradio as gr
 import fitz  # PyMuPDF
 from langchain_text_splitters import RecursiveCharacterTextSplitter
@@ -10,22 +122,32 @@ import os
 class VectorSystem:
     def __init__(self):
         self.vector_store = None
-        # Use a lightweight CPU-friendly model
         self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-    def process_pdf(self, file_obj):
-        """Extracts text from PDF and builds the Vector Index"""
         if file_obj is None:
             return "No file uploaded."
         try:
-            # 1. Extract Text
-            doc = fitz.open(file_obj.name)
             text = ""
-            for page in doc:
-                text += page.get_text()
-            # 2. Split Text into Chunks
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=800,
                 chunk_overlap=150,
@@ -34,28 +156,25 @@ class VectorSystem:
             chunks = text_splitter.split_text(text)
             if not chunks:
-                return "Could not extract text. Is the PDF scanned images?"
             # 3. Build Vector Index (FAISS)
             self.vector_store = FAISS.from_texts(chunks, self.embeddings)
-            return f"✅ Success! Indexed {len(chunks)} text chunks from the PDF."
         except Exception as e:
-            return f"Error processing PDF: {str(e)}"
     def retrieve_evidence(self, question, student_answer):
-        """Finds relevant text chunks based on the Question"""
         if not self.vector_store:
-            return "⚠️ Please upload and process a PDF first."
         if not question:
             return "⚠️ Please enter a Question."
-        # We search primarily using the Question to find the 'Ground Truth' in the text.
         docs = self.vector_store.similarity_search(question, k=3)
-        # Format the output
         output_text = "### 🔍 Relevant Context Found:\n\n"
         for i, doc in enumerate(docs):
             output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
@@ -69,28 +188,26 @@ system = VectorSystem()
 # --- Gradio UI ---
 with gr.Blocks(title="EduGenius Context Retriever") as demo:
-    gr.Markdown("# 🎓 EduGenius: PDF Context Retriever")
-    gr.Markdown("Upload a chapter, ask a question, and see exactly which part of the text proves the answer right or wrong.")
     with gr.Row():
         with gr.Column(scale=1):
-            # Step 1: Upload
-            pdf_input = gr.File(label="1. Upload PDF Chapter", file_types=[".pdf"])
-            upload_btn = gr.Button("Process PDF", variant="primary")
             upload_status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
-            # Step 2: Query
             question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
             answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
             search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
-            # Output
             evidence_output = gr.Markdown(label="Relevant Text Chunks")
     # Event Handlers
     upload_btn.click(
-        fn=system.process_pdf,
         inputs=[pdf_input],
         outputs=[upload_status]
     )
@@ -101,6 +218,5 @@ with gr.Blocks(title="EduGenius Context Retriever") as demo:
         outputs=[evidence_output]
     )
-# Launch
 if __name__ == "__main__":
     demo.launch()

+# import gradio as gr
+# import fitz  # PyMuPDF
+# from langchain_text_splitters import RecursiveCharacterTextSplitter
+# from langchain_community.vectorstores import FAISS
+# from langchain_huggingface import HuggingFaceEmbeddings
+# import os
+# # --- Backend Logic ---
+# class VectorSystem:
+#     def __init__(self):
+#         self.vector_store = None
+#         # Use a lightweight CPU-friendly model
+#         self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+#     def process_pdf(self, file_obj):
+#         """Extracts text from PDF and builds the Vector Index"""
+#         if file_obj is None:
+#             return "No file uploaded."
+#         try:
+#             # 1. Extract Text
+#             doc = fitz.open(file_obj.name)
+#             text = ""
+#             for page in doc:
+#                 text += page.get_text()
+#             # 2. Split Text into Chunks
+#             text_splitter = RecursiveCharacterTextSplitter(
+#                 chunk_size=800,
+#                 chunk_overlap=150,
+#                 separators=["\n\n", "\n", ".", " ", ""]
+#             )
+#             chunks = text_splitter.split_text(text)
+#             if not chunks:
+#                 return "Could not extract text. Is the PDF scanned images?"
+#             # 3. Build Vector Index (FAISS)
+#             self.vector_store = FAISS.from_texts(chunks, self.embeddings)
+#             return f"✅ Success! Indexed {len(chunks)} text chunks from the PDF."
+#         except Exception as e:
+#             return f"Error processing PDF: {str(e)}"
+#     def retrieve_evidence(self, question, student_answer):
+#         """Finds relevant text chunks based on the Question"""
+#         if not self.vector_store:
+#             return "⚠️ Please upload and process a PDF first."
+#         if not question:
+#             return "⚠️ Please enter a Question."
+#         # We search primarily using the Question to find the 'Ground Truth' in the text.
+#         docs = self.vector_store.similarity_search(question, k=3)
+#         # Format the output
+#         output_text = "### 🔍 Relevant Context Found:\n\n"
+#         for i, doc in enumerate(docs):
+#             output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
+#         output_text += "---\n*These are the most relevant segments to grade the answer against.*"
+#         return output_text
+# # Initialize System
+# system = VectorSystem()
+# # --- Gradio UI ---
+# with gr.Blocks(title="EduGenius Context Retriever") as demo:
+#     gr.Markdown("# 🎓 EduGenius: PDF Context Retriever")
+#     gr.Markdown("Upload a chapter, ask a question, and see exactly which part of the text proves the answer right or wrong.")
+#     with gr.Row():
+#         with gr.Column(scale=1):
+#             # Step 1: Upload
+#             pdf_input = gr.File(label="1. Upload PDF Chapter", file_types=[".pdf"])
+#             upload_btn = gr.Button("Process PDF", variant="primary")
+#             upload_status = gr.Textbox(label="Status", interactive=False)
+#         with gr.Column(scale=2):
+#             # Step 2: Query
+#             question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
+#             answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
+#             search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
+#             # Output
+#             evidence_output = gr.Markdown(label="Relevant Text Chunks")
+#     # Event Handlers
+#     upload_btn.click(
+#         fn=system.process_pdf,
+#         inputs=[pdf_input],
+#         outputs=[upload_status]
+#     )
+#     search_btn.click(
+#         fn=system.retrieve_evidence,
+#         inputs=[question_input, answer_input],
+#         outputs=[evidence_output]
+#     )
+# # Launch
+# if __name__ == "__main__":
+#     demo.launch()
 import gradio as gr
 import fitz  # PyMuPDF
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 class VectorSystem:
     def __init__(self):
         self.vector_store = None
         self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    def process_file(self, file_obj):
+        """Extracts text from PDF OR TXT and builds the Vector Index"""
         if file_obj is None:
             return "No file uploaded."
         try:
             text = ""
+            file_path = file_obj.name
+            # --- LOGIC BRANCH: Detect File Type ---
+            if file_path.lower().endswith('.pdf'):
+                # Handle PDF
+                doc = fitz.open(file_path)
+                for page in doc:
+                    text += page.get_text()
+            elif file_path.lower().endswith('.txt'):
+                # Handle Text File
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    text = f.read()
+            else:
+                return "❌ Error: Only .pdf and .txt files are supported."
+            # --------------------------------------
+            # 2. Split Text into Chunks (Logic is identical for both)
             text_splitter = RecursiveCharacterTextSplitter(
                 chunk_size=800,
                 chunk_overlap=150,
             chunks = text_splitter.split_text(text)
             if not chunks:
+                return "Could not extract text. Is the file empty?"
             # 3. Build Vector Index (FAISS)
             self.vector_store = FAISS.from_texts(chunks, self.embeddings)
+            return f"✅ Success! Indexed {len(chunks)} text chunks."
         except Exception as e:
+            return f"Error processing file: {str(e)}"
     def retrieve_evidence(self, question, student_answer):
         if not self.vector_store:
+            return "⚠️ Please upload and process a file first."
         if not question:
             return "⚠️ Please enter a Question."
         docs = self.vector_store.similarity_search(question, k=3)
         output_text = "### 🔍 Relevant Context Found:\n\n"
         for i, doc in enumerate(docs):
             output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
 # --- Gradio UI ---
 with gr.Blocks(title="EduGenius Context Retriever") as demo:
+    gr.Markdown("# 🎓 EduGenius: Context Retriever")
+    gr.Markdown("Upload a Chapter (PDF or TXT), ask a question, and see exactly which part of the text proves the answer right or wrong.")
     with gr.Row():
         with gr.Column(scale=1):
+            # UPDATED: Added ".txt" to file_types and changed label
+            pdf_input = gr.File(label="1. Upload File (PDF or TXT)", file_types=[".pdf", ".txt"])
+            upload_btn = gr.Button("Process File", variant="primary")
             upload_status = gr.Textbox(label="Status", interactive=False)
         with gr.Column(scale=2):
             question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
             answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
             search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
             evidence_output = gr.Markdown(label="Relevant Text Chunks")
     # Event Handlers
     upload_btn.click(
+        fn=system.process_file,  # Note: Function name changed
         inputs=[pdf_input],
         outputs=[upload_status]
     )
         outputs=[evidence_output]
     )
 if __name__ == "__main__":
     demo.launch()