Asanaly commited on
Commit
6744f2c
·
verified ·
1 Parent(s): 522bcab

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +36 -37
main.py CHANGED
@@ -1,43 +1,42 @@
1
- from fastapi import FastAPI, UploadFile, File, Form
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.responses import FileResponse
4
- from fastapi.staticfiles import StaticFiles
5
- from summarizer import generate_summary
6
- from pdf_reader import extract_text_from_pdf
7
 
8
- app = FastAPI()
9
 
10
- app.add_middleware(
11
- CORSMiddleware,
12
- allow_origins=["*"],
13
- allow_methods=["*"],
14
- allow_headers=["*"],
15
- )
16
 
17
- app.mount("/static", StaticFiles(directory="."), name="static")
18
-
19
- @app.get("/")
20
- async def root():
21
- return FileResponse("index.html")
22
-
23
-
24
- @app.post("/summarize/text")
25
- async def summarize_text(data: dict):
26
- text = data.get("text", "")
27
- lang = data.get("lang", "en")
28
- if not text.strip():
29
- return {"summary": "No text provided."}
30
-
31
- summary = generate_summary(text, lang=lang)
32
- return {"summary": summary}
33
 
 
 
 
34
 
35
- @app.post("/summarize/pdf")
36
- async def summarize_pdf(file: UploadFile = File(...), lang: str = Form("en")):
37
- pdf_bytes = await file.read()
38
- text = extract_text_from_pdf(pdf_bytes)
39
- if not text.strip():
40
- return {"summary": "PDF is empty or could not extract text."}
41
-
42
- summary = generate_summary(text, lang=lang)
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  return {"summary": summary}
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import BartForConditionalGeneration, BartTokenizer
4
+ import torch
 
 
5
 
6
+ app = FastAPI(title="Multilingual Text Summarizer")
7
 
8
+ # Model for English and Russian summarization
9
+ MODEL_NAME = "facebook/bart-large-cnn" # English
10
+ tokenizer_en = BartTokenizer.from_pretrained(MODEL_NAME)
11
+ model_en = BartForConditionalGeneration.from_pretrained(MODEL_NAME)
 
 
12
 
13
+ MODEL_NAME_RU = "IlyaGusev/mbart_ru_sum_gazeta" # Russian
14
+ tokenizer_ru = BartTokenizer.from_pretrained(MODEL_NAME_RU)
15
+ model_ru = BartForConditionalGeneration.from_pretrained(MODEL_NAME_RU)
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ class TextRequest(BaseModel):
18
+ text: str
19
+ lang: str # "en" or "ru"
20
 
21
+ @app.get("/")
22
+ def root():
23
+ return {"message": "Multilingual Text Summarizer is running!"}
24
+
25
+ @app.post("/summarize/")
26
+ def summarize(request: TextRequest):
27
+ if request.lang.lower() == "ru":
28
+ tokenizer = tokenizer_ru
29
+ model = model_ru
30
+ else:
31
+ tokenizer = tokenizer_en
32
+ model = model_en
33
+
34
+ inputs = tokenizer([request.text], max_length=1024, return_tensors="pt", truncation=True)
35
+ summary_ids = model.generate(
36
+ inputs["input_ids"],
37
+ num_beams=4,
38
+ max_length=150,
39
+ early_stopping=True
40
+ )
41
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
42
  return {"summary": summary}