heerjtdev commited on
Commit
d443dc8
Β·
verified Β·
1 Parent(s): 8207b6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -24
app.py CHANGED
@@ -1,3 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import fitz # PyMuPDF
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
@@ -10,22 +122,32 @@ import os
10
  class VectorSystem:
11
  def __init__(self):
12
  self.vector_store = None
13
- # Use a lightweight CPU-friendly model
14
  self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
15
 
16
- def process_pdf(self, file_obj):
17
- """Extracts text from PDF and builds the Vector Index"""
18
  if file_obj is None:
19
  return "No file uploaded."
20
 
21
  try:
22
- # 1. Extract Text
23
- doc = fitz.open(file_obj.name)
24
  text = ""
25
- for page in doc:
26
- text += page.get_text()
27
 
28
- # 2. Split Text into Chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  text_splitter = RecursiveCharacterTextSplitter(
30
  chunk_size=800,
31
  chunk_overlap=150,
@@ -34,28 +156,25 @@ class VectorSystem:
34
  chunks = text_splitter.split_text(text)
35
 
36
  if not chunks:
37
- return "Could not extract text. Is the PDF scanned images?"
38
 
39
  # 3. Build Vector Index (FAISS)
40
  self.vector_store = FAISS.from_texts(chunks, self.embeddings)
41
 
42
- return f"βœ… Success! Indexed {len(chunks)} text chunks from the PDF."
43
 
44
  except Exception as e:
45
- return f"Error processing PDF: {str(e)}"
46
 
47
  def retrieve_evidence(self, question, student_answer):
48
- """Finds relevant text chunks based on the Question"""
49
  if not self.vector_store:
50
- return "⚠️ Please upload and process a PDF first."
51
 
52
  if not question:
53
  return "⚠️ Please enter a Question."
54
 
55
- # We search primarily using the Question to find the 'Ground Truth' in the text.
56
  docs = self.vector_store.similarity_search(question, k=3)
57
 
58
- # Format the output
59
  output_text = "### πŸ” Relevant Context Found:\n\n"
60
  for i, doc in enumerate(docs):
61
  output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
@@ -69,28 +188,26 @@ system = VectorSystem()
69
  # --- Gradio UI ---
70
 
71
  with gr.Blocks(title="EduGenius Context Retriever") as demo:
72
- gr.Markdown("# πŸŽ“ EduGenius: PDF Context Retriever")
73
- gr.Markdown("Upload a chapter, ask a question, and see exactly which part of the text proves the answer right or wrong.")
74
 
75
  with gr.Row():
76
  with gr.Column(scale=1):
77
- # Step 1: Upload
78
- pdf_input = gr.File(label="1. Upload PDF Chapter", file_types=[".pdf"])
79
- upload_btn = gr.Button("Process PDF", variant="primary")
80
  upload_status = gr.Textbox(label="Status", interactive=False)
81
 
82
  with gr.Column(scale=2):
83
- # Step 2: Query
84
  question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
85
  answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
86
  search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
87
 
88
- # Output
89
  evidence_output = gr.Markdown(label="Relevant Text Chunks")
90
 
91
  # Event Handlers
92
  upload_btn.click(
93
- fn=system.process_pdf,
94
  inputs=[pdf_input],
95
  outputs=[upload_status]
96
  )
@@ -101,6 +218,5 @@ with gr.Blocks(title="EduGenius Context Retriever") as demo:
101
  outputs=[evidence_output]
102
  )
103
 
104
- # Launch
105
  if __name__ == "__main__":
106
  demo.launch()
 
1
+ # import gradio as gr
2
+ # import fitz # PyMuPDF
3
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ # from langchain_community.vectorstores import FAISS
5
+ # from langchain_huggingface import HuggingFaceEmbeddings
6
+ # import os
7
+
8
+ # # --- Backend Logic ---
9
+
10
+ # class VectorSystem:
11
+ # def __init__(self):
12
+ # self.vector_store = None
13
+ # # Use a lightweight CPU-friendly model
14
+ # self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
15
+
16
+ # def process_pdf(self, file_obj):
17
+ # """Extracts text from PDF and builds the Vector Index"""
18
+ # if file_obj is None:
19
+ # return "No file uploaded."
20
+
21
+ # try:
22
+ # # 1. Extract Text
23
+ # doc = fitz.open(file_obj.name)
24
+ # text = ""
25
+ # for page in doc:
26
+ # text += page.get_text()
27
+
28
+ # # 2. Split Text into Chunks
29
+ # text_splitter = RecursiveCharacterTextSplitter(
30
+ # chunk_size=800,
31
+ # chunk_overlap=150,
32
+ # separators=["\n\n", "\n", ".", " ", ""]
33
+ # )
34
+ # chunks = text_splitter.split_text(text)
35
+
36
+ # if not chunks:
37
+ # return "Could not extract text. Is the PDF scanned images?"
38
+
39
+ # # 3. Build Vector Index (FAISS)
40
+ # self.vector_store = FAISS.from_texts(chunks, self.embeddings)
41
+
42
+ # return f"βœ… Success! Indexed {len(chunks)} text chunks from the PDF."
43
+
44
+ # except Exception as e:
45
+ # return f"Error processing PDF: {str(e)}"
46
+
47
+ # def retrieve_evidence(self, question, student_answer):
48
+ # """Finds relevant text chunks based on the Question"""
49
+ # if not self.vector_store:
50
+ # return "⚠️ Please upload and process a PDF first."
51
+
52
+ # if not question:
53
+ # return "⚠️ Please enter a Question."
54
+
55
+ # # We search primarily using the Question to find the 'Ground Truth' in the text.
56
+ # docs = self.vector_store.similarity_search(question, k=3)
57
+
58
+ # # Format the output
59
+ # output_text = "### πŸ” Relevant Context Found:\n\n"
60
+ # for i, doc in enumerate(docs):
61
+ # output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
62
+
63
+ # output_text += "---\n*These are the most relevant segments to grade the answer against.*"
64
+ # return output_text
65
+
66
+ # # Initialize System
67
+ # system = VectorSystem()
68
+
69
+ # # --- Gradio UI ---
70
+
71
+ # with gr.Blocks(title="EduGenius Context Retriever") as demo:
72
+ # gr.Markdown("# πŸŽ“ EduGenius: PDF Context Retriever")
73
+ # gr.Markdown("Upload a chapter, ask a question, and see exactly which part of the text proves the answer right or wrong.")
74
+
75
+ # with gr.Row():
76
+ # with gr.Column(scale=1):
77
+ # # Step 1: Upload
78
+ # pdf_input = gr.File(label="1. Upload PDF Chapter", file_types=[".pdf"])
79
+ # upload_btn = gr.Button("Process PDF", variant="primary")
80
+ # upload_status = gr.Textbox(label="Status", interactive=False)
81
+
82
+ # with gr.Column(scale=2):
83
+ # # Step 2: Query
84
+ # question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
85
+ # answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
86
+ # search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
87
+
88
+ # # Output
89
+ # evidence_output = gr.Markdown(label="Relevant Text Chunks")
90
+
91
+ # # Event Handlers
92
+ # upload_btn.click(
93
+ # fn=system.process_pdf,
94
+ # inputs=[pdf_input],
95
+ # outputs=[upload_status]
96
+ # )
97
+
98
+ # search_btn.click(
99
+ # fn=system.retrieve_evidence,
100
+ # inputs=[question_input, answer_input],
101
+ # outputs=[evidence_output]
102
+ # )
103
+
104
+ # # Launch
105
+ # if __name__ == "__main__":
106
+ # demo.launch()
107
+
108
+
109
+
110
+
111
+
112
+
113
  import gradio as gr
114
  import fitz # PyMuPDF
115
  from langchain_text_splitters import RecursiveCharacterTextSplitter
 
122
  class VectorSystem:
123
  def __init__(self):
124
  self.vector_store = None
 
125
  self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
126
 
127
+ def process_file(self, file_obj):
128
+ """Extracts text from PDF OR TXT and builds the Vector Index"""
129
  if file_obj is None:
130
  return "No file uploaded."
131
 
132
  try:
 
 
133
  text = ""
134
+ file_path = file_obj.name
 
135
 
136
+ # --- LOGIC BRANCH: Detect File Type ---
137
+ if file_path.lower().endswith('.pdf'):
138
+ # Handle PDF
139
+ doc = fitz.open(file_path)
140
+ for page in doc:
141
+ text += page.get_text()
142
+ elif file_path.lower().endswith('.txt'):
143
+ # Handle Text File
144
+ with open(file_path, 'r', encoding='utf-8') as f:
145
+ text = f.read()
146
+ else:
147
+ return "❌ Error: Only .pdf and .txt files are supported."
148
+ # --------------------------------------
149
+
150
+ # 2. Split Text into Chunks (Logic is identical for both)
151
  text_splitter = RecursiveCharacterTextSplitter(
152
  chunk_size=800,
153
  chunk_overlap=150,
 
156
  chunks = text_splitter.split_text(text)
157
 
158
  if not chunks:
159
+ return "Could not extract text. Is the file empty?"
160
 
161
  # 3. Build Vector Index (FAISS)
162
  self.vector_store = FAISS.from_texts(chunks, self.embeddings)
163
 
164
+ return f"βœ… Success! Indexed {len(chunks)} text chunks."
165
 
166
  except Exception as e:
167
+ return f"Error processing file: {str(e)}"
168
 
169
  def retrieve_evidence(self, question, student_answer):
 
170
  if not self.vector_store:
171
+ return "⚠️ Please upload and process a file first."
172
 
173
  if not question:
174
  return "⚠️ Please enter a Question."
175
 
 
176
  docs = self.vector_store.similarity_search(question, k=3)
177
 
 
178
  output_text = "### πŸ” Relevant Context Found:\n\n"
179
  for i, doc in enumerate(docs):
180
  output_text += f"**Chunk {i+1}:**\n> {doc.page_content}\n\n"
 
188
  # --- Gradio UI ---
189
 
190
  with gr.Blocks(title="EduGenius Context Retriever") as demo:
191
+ gr.Markdown("# πŸŽ“ EduGenius: Context Retriever")
192
+ gr.Markdown("Upload a Chapter (PDF or TXT), ask a question, and see exactly which part of the text proves the answer right or wrong.")
193
 
194
  with gr.Row():
195
  with gr.Column(scale=1):
196
+ # UPDATED: Added ".txt" to file_types and changed label
197
+ pdf_input = gr.File(label="1. Upload File (PDF or TXT)", file_types=[".pdf", ".txt"])
198
+ upload_btn = gr.Button("Process File", variant="primary")
199
  upload_status = gr.Textbox(label="Status", interactive=False)
200
 
201
  with gr.Column(scale=2):
 
202
  question_input = gr.Textbox(label="2. Question", placeholder="e.g., What causes the chemical reaction?")
203
  answer_input = gr.Textbox(label="Student Answer (Optional Context)", placeholder="e.g., The heat causes it...")
204
  search_btn = gr.Button("Find Relevant Evidence", variant="secondary")
205
 
 
206
  evidence_output = gr.Markdown(label="Relevant Text Chunks")
207
 
208
  # Event Handlers
209
  upload_btn.click(
210
+ fn=system.process_file, # Note: Function name changed
211
  inputs=[pdf_input],
212
  outputs=[upload_status]
213
  )
 
218
  outputs=[evidence_output]
219
  )
220
 
 
221
  if __name__ == "__main__":
222
  demo.launch()