| import pandas as pd |
| import numpy as np |
| import torch |
| from transformers import pipeline |
| import gradio as gr |
| import os |
| from youtube_transcript_api import YouTubeTranscriptApi |
|
|
| summarizer_ft = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") |
|
|
| summarizer_bart = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
| def summarize(full_txt, min_summ_len=30): |
| l = full_txt.split(" ") |
| l_summ = [] |
| chunk_len = 750 |
| overlap = 50 |
| pointer = 0 |
| flag = True |
| while(flag): |
| if pointer < len(l): |
| if pointer + chunk_len < len(l): |
| txt = " ".join(l[pointer:pointer+chunk_len]) |
| pointer = pointer + chunk_len - overlap |
| l_summ.append(summarizer_ft(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text']) |
| else: |
| txt = " ".join(l[pointer:]) |
| l_summ.append(summarizer_ft(txt, max_length=len(l) - pointer, min_length=40, do_sample=False)[0]['summary_text']) |
| pointer = len(l) |
| flag = False |
|
|
| large_summ = " ".join(l_summ) |
| print(l_summ) |
| l_large_summ = large_summ.split(" ") |
|
|
| if len(large_summ.split(" ")) < chunk_len: |
| summ = summarizer_bart(large_summ, max_length=150, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] |
| else: |
| flag = True |
| pointer = 0 |
| final_summ = [] |
| while(flag): |
| if pointer < len(l_large_summ): |
| if pointer + chunk_len < len(l_large_summ): |
| txt = " ".join(l_large_summ[pointer:pointer+chunk_len]) |
| pointer = pointer + chunk_len - overlap |
| t = summarizer_bart(txt, max_length=130, min_length=40, do_sample=False)[0]['summary_text'] |
| print(t) |
| final_summ.append(t) |
| else: |
| txt = " ".join(l_large_summ[pointer:]) |
| t = summarizer_bart(txt, max_length=len(l_large_summ)-pointer, min_length=40, do_sample=False)[0]['summary_text'] |
| final_summ.append(t) |
| print(t) |
| pointer = len(l_large_summ) |
| flag = False |
| large_summ = " ".join(final_summ) |
| summ = summarizer_bart(large_summ, max_length=100, min_length=int(min_summ_len), do_sample=False)[0]['summary_text'] |
| return summ |
|
|
| def extract_text(youtube_video_url,min_summ_len): |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| video_id = youtube_video_url.split("=")[1] |
| transcript_text = YouTubeTranscriptApi.get_transcript(video_id,languages=['hi', 'en']) |
| transcript = " " |
| for i in transcript_text: |
| transcript += " " + i["text"] |
| print(transcript) |
| res = summarize(transcript,min_summ_len) |
| print(res) |
| return res |
|
|
| demo = gr.Interface( |
| fn=extract_text, |
| inputs=["text","number"], |
| outputs="text", |
| title="YouTube Video Text Summarization for Efficient Information Capture", |
| description="Generate concise summaries of your YouTube Video Text tailored to your specific needs.", |
| ) |
|
|
| demo.launch(debug=True) |