rbbist commited on
Commit
5434acc
Β·
verified Β·
1 Parent(s): 349be24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -20
app.py CHANGED
@@ -1,26 +1,77 @@
1
- with st.spinner("Processing paper..."):
2
- try:
3
- temp_dir = tempfile.mkdtemp()
4
- file_path = os.path.join(temp_dir, uploaded_file.name)
5
 
6
- with open(file_path, "wb") as f:
7
- f.write(uploaded_file.read())
 
8
 
9
- text = extract_text_from_pdf(file_path)
10
- topic_list = [t.strip() for t in topic_input.split(",") if t.strip()]
11
- classified_topic = classify_topic(text, topic_list)
12
- summary = summarize_text(text)
 
13
 
14
- st.markdown(f"### 🧠 Classified Topic: `{classified_topic}`")
15
- st.markdown("### ✍️ Summary:")
16
- st.write(summary)
 
17
 
18
- audio_path = os.path.join(temp_dir, "summary.mp3")
19
- generate_audio(summary, audio_path)
 
 
 
 
20
 
21
- st.markdown("### πŸ”Š Audio Summary")
22
- st.audio(audio_path)
23
- st.success("Done! Audio summary is ready.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- except Exception as e:
26
- st.error(f"❌ Error: {str(e)}")
 
1
+ # app.py (Streamlit-only version for Hugging Face Spaces)
 
 
 
2
 
3
+ import os
4
+ import tempfile
5
+ from typing import List
6
 
7
+ import fitz # PyMuPDF
8
+ import requests
9
+ from transformers import pipeline
10
+ from gtts import gTTS
11
+ import streamlit as st
12
 
13
+ # ---------- CONFIG ----------
14
+ def summarize_text(text: str) -> str:
15
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
16
+ return summarizer(text, max_length=200, min_length=30, do_sample=False)[0]['summary_text']
17
 
18
+ def extract_text_from_pdf(pdf_path: str) -> str:
19
+ doc = fitz.open(pdf_path)
20
+ text = ""
21
+ for page in doc:
22
+ text += page.get_text()
23
+ return text
24
 
25
+ def classify_topic(text: str, topics: List[str]) -> str:
26
+ classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3")
27
+ result = classifier(text[:1000], candidate_labels=topics)
28
+ return result['labels'][0]
29
+
30
+ def generate_audio(text: str, output_path: str):
31
+ tts = gTTS(text)
32
+ tts.save(output_path)
33
+
34
+ # ---------- STREAMLIT UI ----------
35
+ st.set_page_config(page_title="Research Paper Summarizer", layout="centered")
36
+ st.title("πŸ“„ AI Research Paper Summarizer")
37
+
38
+ st.markdown("""
39
+ Upload a research paper (PDF) and a list of topics. The app will:
40
+ 1. Extract and summarize the paper
41
+ 2. Classify it into a topic
42
+ 3. Generate an audio summary 🎧
43
+ """)
44
+
45
+ with st.form("upload_form"):
46
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
47
+ topic_input = st.text_input("Enter comma-separated topics")
48
+ submitted = st.form_submit_button("Summarize and Generate Audio")
49
+
50
+ if submitted and uploaded_file and topic_input:
51
+ with st.spinner("Processing paper..."):
52
+ try:
53
+ temp_dir = tempfile.mkdtemp()
54
+ file_path = os.path.join(temp_dir, uploaded_file.name)
55
+
56
+ with open(file_path, "wb") as f:
57
+ f.write(uploaded_file.read())
58
+
59
+ text = extract_text_from_pdf(file_path)
60
+ topic_list = [t.strip() for t in topic_input.split(",") if t.strip()]
61
+ classified_topic = classify_topic(text, topic_list)
62
+ summary = summarize_text(text)
63
+
64
+ st.markdown(f"### 🧠 Classified Topic: `{classified_topic}`")
65
+ st.markdown("### ✍️ Summary:")
66
+ st.write(summary)
67
+
68
+ audio_path = os.path.join(temp_dir, "summary.mp3")
69
+ generate_audio(summary, audio_path)
70
+
71
+ st.markdown("### πŸ”Š Audio Summary")
72
+ st.audio(audio_path)
73
+ st.success("Done! Audio summary is ready.")
74
+
75
+ except Exception as e:
76
+ st.error(f"❌ Error: {str(e)}")
77