Spaces:

mrsk1883
/

testing

Sleeping

App Files Files Community

mrsk1883 commited on Dec 9, 2023

Commit

17cf0e6

1 Parent(s): 3e0f5bd

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -76

app.py DELETED Viewed

@@ -1,76 +0,0 @@
-from PyPDF2 import PdfReader
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from gtts import gTTS
-import os
-# Download the model and tokenizer
-model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-def summarize_and_speak_pdf_abstract(pdf_path):
-    """
-    Reads a PDF file, extracts the abstract, summarizes it in one sentence, and generates an audio file of the summary.
-    Args:
-        pdf_path: Path to the PDF file.
-    """
-    # Summarize the abstract
-    summary = summarize_pdf_abstract(pdf_path)
-    # Define language and audio format
-    language = "en"  # Change this to your desired language
-    audio_format = "mp3"
-    # Create the text-to-speech object
-    tts = gTTS(text=summary, lang=language)
-    # Generate the audio file
-    audio_file_name = f"summary.{audio_format}"
-    tts.save(audio_file_name)
-    print(f"Audio file created: {audio_file_name}")
-    # Play the audio file (optional)
-    # os.system(f"play {audio_file_name}")
-# Define the function to summarize the abstract
-def summarize_pdf_abstract(pdf_path):
-    """
-    Reads a PDF file, extracts the abstract, and summarizes it in one sentence.
-    Args:
-        pdf_path: Path to the PDF file.
-    Returns:
-        A string containing the one-sentence summary of the abstract.
-    """
-    # Read the PDF file
-    reader = PdfReader(open(pdf_path, "rb"))
-    # Extract the abstract
-    abstract_text = ""
-    for page in reader.pages:
-        # Search for keywords like "Abstract" or "Introduction"
-        if (
-            "Abstract" in page.extract_text()
-            or "Introduction" in page.extract_text()
-        ):
-            # Extract the text following the keyword
-            abstract_text = page.extract_text()
-            break
-    # Encode the abstract text
-    inputs = tokenizer(abstract_text, return_tensors="pt")
-    # Generate the summary
-    outputs = model.generate(**inputs)
-    # Decode the summary
-    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return summary