Spaces:
Sleeping
Sleeping
File size: 6,128 Bytes
a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d 891eb65 e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade 891eb65 e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade 891eb65 e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade a2ec13d e5b9ade 891eb65 a2ec13d 891eb65 a2ec13d e5b9ade | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | import torch
import whisper
from transformers import pipeline
import gradio as gr
import concurrent.futures
import os # For environment variables
print("Starting up...")
# *** Model Loading - CPU Optimized & Size Considerations ***
try:
# Option 1: Try "tiny" model. Significantly faster on CPU, but lower accuracy.
whisper_model = whisper.load_model("tiny")
print("Using whisper 'tiny' model.")
except Exception as e:
print(f"Error loading whisper 'tiny' model: {e}. Trying 'small'.")
try:
whisper_model = whisper.load_model("small")
print("Using whisper 'small' model.")
except Exception as e2:
print(f"Error loading whisper 'small' model: {e2}. Whisper will not work.")
whisper_model = None # Disable whisper functionality
try:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1) # device=-1 forces CPU
question_generator = pipeline("text2text-generation", model="google/flan-t5-large", device=-1) # device=-1 forces CPU
print("Summarizer and Question Generator loaded successfully.")
except Exception as e:
print(f"Error loading Summarizer or Question Generator: {e}")
summarizer = None
question_generator = None
print("Summarization and Question Generation will not work.")
print("Models loaded (or failed gracefully).")
# *** Transcription ***
def transcribe_audio(audio_path):
print("Transcribing audio...")
if whisper_model is None:
return "Error: Whisper model failed to load."
try:
result = whisper_model.transcribe(audio_path)
return result["text"]
except Exception as e:
print(f"Error transcribing audio: {e}")
return f"Error during transcription: {e}"
# *** Summarization ***
def summarize_text(text):
if summarizer is None:
return "Error: Summarizer model failed to load."
print("Summarizing text using BART...")
# Chunk the text into smaller parts, even smaller than before for CPU
text_chunks = [text[i:i + 768] for i in range(0, len(text), 768)] # More aggressive chunking
try:
summaries = summarizer(text_chunks, max_length=150, min_length=30, do_sample=False) # Reduce length
return " ".join([s['summary_text'] for s in summaries])
except Exception as e:
print(f"Error during summarization: {e}")
return f"Error during summarization: {e}"
# *** Question Generation ***
def generate_questions(text):
if question_generator is None:
return "Error: Question Generation model failed to load."
print("Generating questions using FLAN-T5...")
# Even smaller chunks for question generation (CPU is struggling)
text_chunks = [text[i:i + 512] for i in range(0, len(text), 512)]
questions = []
with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: # Explicitly limit threads
future_questions = [
executor.submit(
lambda chunk: question_generator(
f"You are an AI tutor. Your task is to generate **insightful, topic-specific** questions based on the following passage. Ensure that the questions are relevant to the **key concepts, definitions, and explanations** present in the text. Avoid generic questions.\n\nPassage:\n{chunk}",
max_length=80, num_return_sequences=2, do_sample=True # Reduce length and sequences
),
chunk
) for chunk in text_chunks
]
for future in future_questions:
try:
generated = future.result()
questions.extend([q['generated_text'] for q in generated])
except Exception as e:
print(f"Error generating questions for a chunk: {e}")
questions.append(f"Error generating questions: {e}") # Report the error
return "\n".join(questions)
# *** Main Processing Function ***
def process_audio(audio_path):
transcript = transcribe_audio(audio_path)
summary = summarize_text(transcript)
questions = generate_questions(transcript)
combined_text = f"π Transcription:\n{transcript}\n\nπ Summary:\n{summary}\n\nπ€ Practice Questions:\n{questions}"
file_path = "lecture_summary.txt"
with open(file_path, "w", encoding="utf-8") as f:
f.write(combined_text)
return transcript, summary, questions, file_path
def gradio_interface(audio):
return process_audio(audio)
# *** Gradio Interface ***
with gr.Blocks(css="""
#submit-btn, #download-btn {
background-color: blue !important;
color: white !important;
border-radius: 8px !important;
padding: 10px !important;
font-size: 16px !important;
}
textarea {
border: 2px solid black !important;
border-radius: 5px !important;
}
""") as demo:
gr.Markdown("# π LectureGenie: Transcribe, Summarize & Quiz")
gr.Markdown("Upload a lecture audio file. The system will **transcribe**, **summarize**, and **generate questions** automatically.")
audio_input = gr.Audio(type="filepath", label="π€ Upload Audio File", interactive=True)
submit_btn = gr.Button("Submit", elem_id="submit-btn")
with gr.Row():
with gr.Column():
transcript_box = gr.Textbox(label="π Transcription", lines=10, interactive=False, show_copy_button=True)
with gr.Column():
summary_box = gr.Textbox(label="π Summary", lines=10, interactive=False, show_copy_button=True)
with gr.Column():
questions_box = gr.Textbox(label="π€ Practice Questions", lines=10, interactive=False, show_copy_button=True)
download_btn = gr.File(label="π₯ Download All", interactive=False, visible=False)
download_button = gr.Button("π₯ Download", elem_id="download-btn")
submit_btn.click(
gradio_interface,
inputs=[audio_input],
outputs=[transcript_box, summary_box, questions_box, download_btn]
)
download_button.click(lambda x: x, inputs=[download_btn], outputs=[download_btn])
demo.launch(share=True) |