BasitAliii commited on
Commit
b3fa4cf
Β·
verified Β·
1 Parent(s): ddec509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -11,7 +11,7 @@ import numpy as np
11
  from sklearn.feature_extraction.text import TfidfVectorizer
12
 
13
  # ==========================================================
14
- # 🧠 NLTK Setup (Fix for punkt_tab)
15
  # ==========================================================
16
  for pkg in ["punkt", "punkt_tab"]:
17
  try:
@@ -22,7 +22,7 @@ for pkg in ["punkt", "punkt_tab"]:
22
  # ==========================================================
23
  # βš™οΈ Model Setup
24
  # ==========================================================
25
- DEVICE = -1 # CPU (-1), 0 for GPU if available
26
  SUMMARIZER_MODEL = "facebook/bart-large-cnn"
27
  QA_MODEL = "deepset/roberta-base-squad2"
28
 
@@ -94,15 +94,13 @@ def extract_keywords_tfidf(text: str, top_k=8):
94
 
95
 
96
  # ==========================================================
97
- # ✍️ Summarization
98
  # ==========================================================
99
  def summarize_long_text(text: str) -> str:
100
  if summarizer is None:
101
  return "Summarization model unavailable."
102
-
103
  text = clean_text(text)
104
  L = len(text)
105
-
106
  if L < 1500:
107
  max_len, min_len, chunk_size = 180, 60, 1400
108
  elif L < 5000:
@@ -129,7 +127,7 @@ def summarize_long_text(text: str) -> str:
129
 
130
 
131
  # ==========================================================
132
- # πŸ”Š Text to Speech
133
  # ==========================================================
134
  def text_to_speech(text):
135
  if not text:
@@ -143,7 +141,7 @@ def text_to_speech(text):
143
 
144
 
145
  # ==========================================================
146
- # πŸ’¬ Q&A Generation
147
  # ==========================================================
148
  def generate_auto_questions(text: str, n=5):
149
  sents = sentence_tokenize(text)
@@ -186,13 +184,12 @@ def process_pdf(pdf_file):
186
 
187
 
188
  # ==========================================================
189
- # 🎨 Gradio Interface
190
  # ==========================================================
191
  with gr.Blocks(title="AI PDF Assistant", theme=gr.themes.Soft()) as demo:
192
  gr.Markdown("# πŸ“˜ AI PDF Assistant β€” Smart Chat & Summarizer")
193
  gr.Markdown("Easily extract, summarize, and chat with your PDFs using AI.")
194
 
195
- # --- Analyze PDF Tab ---
196
  with gr.Tab("πŸ“„ Analyze PDF"):
197
  with gr.Row():
198
  with gr.Column(scale=1):
@@ -204,33 +201,30 @@ with gr.Blocks(title="AI PDF Assistant", theme=gr.themes.Soft()) as demo:
204
  audio_box = gr.Audio(label="Summary Audio", interactive=False)
205
  keywords_box = gr.Textbox(label="Top Keywords", lines=2, interactive=False)
206
 
207
- # --- Chat with PDF Tab ---
208
  with gr.Tab("πŸ’¬ Chat with PDF"):
209
  gr.Markdown("### Auto-Generated Questions")
210
- auto_q_box = gr.Textbox(label="Generated Questions", lines=6, interactive=False)
211
  gr.Markdown("### Ask Your Own Question")
212
  user_q = gr.Textbox(label="Your Question", placeholder="Type your question here...")
213
  ask_btn = gr.Button("Ask", variant="primary")
214
  answer_box = gr.Textbox(label="Answer", lines=4, interactive=False)
215
 
216
- # --- About Tab ---
217
  with gr.Tab("ℹ️ About"):
218
  gr.Markdown("""
219
  ## πŸ“˜ About AI PDF Assistant
220
- **AI PDF Assistant** helps you understand and interact with PDFs effortlessly.
221
 
222
  ### Features
223
- - Extracts and cleans text
224
- - Generates adaptive summaries
225
- - Identifies keywords
226
- - Creates audio summaries
227
- - Auto-generates Q&A
228
- - Lets you chat with your PDF content
229
 
230
  Built with ❀️ using Hugging Face Transformers, gTTS, and Gradio.
231
  """)
232
 
233
- # --- Event Connections ---
234
  process_btn.click(
235
  process_pdf,
236
  inputs=[pdf_input],
 
11
  from sklearn.feature_extraction.text import TfidfVectorizer
12
 
13
  # ==========================================================
14
+ # 🧠 NLTK Setup (Fixed punkt_tab Issue)
15
  # ==========================================================
16
  for pkg in ["punkt", "punkt_tab"]:
17
  try:
 
22
  # ==========================================================
23
  # βš™οΈ Model Setup
24
  # ==========================================================
25
+ DEVICE = -1 # CPU (-1), use 0 for GPU if available
26
  SUMMARIZER_MODEL = "facebook/bart-large-cnn"
27
  QA_MODEL = "deepset/roberta-base-squad2"
28
 
 
94
 
95
 
96
  # ==========================================================
97
+ # ✍️ Adaptive Summarization
98
  # ==========================================================
99
  def summarize_long_text(text: str) -> str:
100
  if summarizer is None:
101
  return "Summarization model unavailable."
 
102
  text = clean_text(text)
103
  L = len(text)
 
104
  if L < 1500:
105
  max_len, min_len, chunk_size = 180, 60, 1400
106
  elif L < 5000:
 
127
 
128
 
129
  # ==========================================================
130
+ # πŸ”Š Text-to-Speech
131
  # ==========================================================
132
  def text_to_speech(text):
133
  if not text:
 
141
 
142
 
143
  # ==========================================================
144
+ # 🧠 Q&A
145
  # ==========================================================
146
  def generate_auto_questions(text: str, n=5):
147
  sents = sentence_tokenize(text)
 
184
 
185
 
186
  # ==========================================================
187
+ # 🎨 Gradio UI
188
  # ==========================================================
189
  with gr.Blocks(title="AI PDF Assistant", theme=gr.themes.Soft()) as demo:
190
  gr.Markdown("# πŸ“˜ AI PDF Assistant β€” Smart Chat & Summarizer")
191
  gr.Markdown("Easily extract, summarize, and chat with your PDFs using AI.")
192
 
 
193
  with gr.Tab("πŸ“„ Analyze PDF"):
194
  with gr.Row():
195
  with gr.Column(scale=1):
 
201
  audio_box = gr.Audio(label="Summary Audio", interactive=False)
202
  keywords_box = gr.Textbox(label="Top Keywords", lines=2, interactive=False)
203
 
 
204
  with gr.Tab("πŸ’¬ Chat with PDF"):
205
  gr.Markdown("### Auto-Generated Questions")
206
+ auto_q_box = gr.Textbox(label="Generated Questions", lines=6, interactive=False, placeholder="Questions will appear after PDF is processed.")
207
  gr.Markdown("### Ask Your Own Question")
208
  user_q = gr.Textbox(label="Your Question", placeholder="Type your question here...")
209
  ask_btn = gr.Button("Ask", variant="primary")
210
  answer_box = gr.Textbox(label="Answer", lines=4, interactive=False)
211
 
 
212
  with gr.Tab("ℹ️ About"):
213
  gr.Markdown("""
214
  ## πŸ“˜ About AI PDF Assistant
215
+ **AI PDF Assistant** helps you understand and interact with PDFs effortlessly.
216
 
217
  ### Features
218
+ - Extracts and cleans text
219
+ - Generates adaptive summaries
220
+ - Identifies keywords
221
+ - Creates audio summaries
222
+ - Auto-generates Q&A
223
+ - Lets you chat with your PDF content
224
 
225
  Built with ❀️ using Hugging Face Transformers, gTTS, and Gradio.
226
  """)
227
 
 
228
  process_btn.click(
229
  process_pdf,
230
  inputs=[pdf_input],