rbbist commited on
Commit
f771a3a
·
verified ·
1 Parent(s): 307dfc2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import os
4
+ import uuid
5
+ import tempfile
6
+ from typing import List
7
+ from fastapi import FastAPI, UploadFile, File, Form
8
+ from fastapi.responses import FileResponse
9
+ from pydantic import BaseModel
10
+ import fitz # PyMuPDF
11
+ import requests
12
+ import openai
13
+ from transformers import pipeline
14
+ import torch
15
+ from gtts import gTTS
16
+ import shutil
17
+
18
+ # ---------- CONFIG ----------
19
+ openai.api_key = os.getenv("OPENAI_API_KEY") # Set this as HF secret
20
+
21
+ def summarize_text(text: str) -> str:
22
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
+ return summarizer(text, max_length=200, min_length=30, do_sample=False)[0]['summary_text']
24
+
25
+ # ---------- FASTAPI SETUP ----------
26
+ app = FastAPI(title="Research Paper Summarization App")
27
+
28
+ class SummaryRequest(BaseModel):
29
+ topic: str
30
+ urls: List[str] = []
31
+
32
+ # ---------- HELPERS ----------
33
+ def extract_text_from_pdf(pdf_path: str) -> str:
34
+ doc = fitz.open(pdf_path)
35
+ text = ""
36
+ for page in doc:
37
+ text += page.get_text()
38
+ return text
39
+
40
+ def classify_topic(text: str, topics: List[str]) -> str:
41
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
42
+ result = classifier(text[:1000], candidate_labels=topics)
43
+ return result['labels'][0]
44
+
45
+ def generate_audio(text: str, output_path: str):
46
+ tts = gTTS(text)
47
+ tts.save(output_path)
48
+
49
+ # ---------- ROUTES ----------
50
+
51
+ @app.post("/upload")
52
+ def upload_paper(file: UploadFile = File(...), topics: str = Form(...)):
53
+ temp_dir = tempfile.mkdtemp()
54
+ file_path = os.path.join(temp_dir, file.filename)
55
+ with open(file_path, "wb") as f:
56
+ f.write(file.file.read())
57
+
58
+ text = extract_text_from_pdf(file_path)
59
+ topic_list = [t.strip() for t in topics.split(",")]
60
+ classified_topic = classify_topic(text, topic_list)
61
+ summary = summarize_text(text)
62
+
63
+ audio_path = os.path.join(temp_dir, "summary.mp3")
64
+ generate_audio(summary, audio_path)
65
+
66
+ result = {
67
+ "topic": classified_topic,
68
+ "summary": summary,
69
+ "audio_file": audio_path
70
+ }
71
+
72
+ return FileResponse(audio_path, media_type="audio/mpeg", filename="summary.mp3")
73
+
74
+ @app.post("/summarize_urls")
75
+ def summarize_from_urls(req: SummaryRequest):
76
+ summaries = []
77
+ for url in req.urls:
78
+ try:
79
+ response = requests.get(url)
80
+ if not response.ok:
81
+ continue
82
+ text = response.text
83
+ summary = summarize_text(text)
84
+ summaries.append(summary)
85
+ except Exception as e:
86
+ summaries.append(f"Error: {str(e)}")
87
+
88
+ combined_summary = "\n".join(summaries)
89
+ return {"topic": req.topic, "combined_summary": combined_summary}
90
+
91
+ @app.get("/")
92
+ def root():
93
+ return {"message": "Welcome to the Research Paper Summarization API!"}