Spaces:

JaganathC
/

Video_To_Text

Runtime error

App Files Files Community

JaganathC commited on Mar 15, 2025

Commit

b28774a

verified ·

1 Parent(s): 6bf8290

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -7

app.py CHANGED Viewed

@@ -11,32 +11,45 @@ import time
 import langdetect
 import uuid
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("Starting the program...")
 model_path = "Qwen/Qwen2.5-7B-Instruct"
-print(f"Loading model {model_path}...")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, trust_remote_code=True).cuda()
 model = model.eval()
 print("Model successfully loaded.")
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
 def cleanup_files(*files):
     for file in files:
         if file and os.path.exists(file):
             os.remove(file)
             print(f"Removed file: {file}")
 def extract_audio_ffmpeg(video_path):
     print("Extracting audio using ffmpeg...")
     audio_path = generate_unique_filename(".wav")
     command = ["ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a", audio_path, "-y"]
-    subprocess.run(command, check=True)
     return audio_path
 def transcribe_audio(file_path):
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
@@ -48,11 +61,11 @@ def transcribe_audio(file_path):
     output_file = generate_unique_filename(".json")
     command = [
         "insanely-fast-whisper", "--file-name", file_path,
-        "--device-id", "0", "--model-name", "openai/whisper-large-v3",
         "--task", "transcribe", "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
-    subprocess.run(command, check=True)
     with open(output_file, "r") as f:
         transcription = json.load(f)
@@ -64,15 +77,17 @@ def transcribe_audio(file_path):
     return result
 def generate_summary_stream(transcription):
     detected_language = langdetect.detect(transcription)
     prompt = f"""Summarize the following video transcription in 150-300 words.
     The summary should be in the same language as the transcription, which is detected as {detected_language}.
-    {transcription[:300000]}..."""
     response, history = model.chat(tokenizer, prompt, history=[])
     return response
 def process_uploaded_video(video_path):
     try:
         transcription = transcribe_audio(video_path)
@@ -80,6 +95,7 @@ def process_uploaded_video(video_path):
     except Exception as e:
         return f"Processing error: {str(e)}", None
 demo = gr.Blocks()
 with demo:
     gr.Markdown("""
@@ -99,4 +115,4 @@ with demo:
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
-demo.launch()

 import langdetect
 import uuid
+# Hugging Face Token
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("Starting the program...")
+# Load Qwen Model on CPU
 model_path = "Qwen/Qwen2.5-7B-Instruct"
+print(f"Loading model {model_path} on CPU...")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.bfloat16,  # Uses less memory than float32
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    device_map="auto"  # Automatically optimizes model parts for CPU
+).to("cpu")
 model = model.eval()
 print("Model successfully loaded.")
+# Generate unique filenames
 def generate_unique_filename(extension):
     return f"{uuid.uuid4()}{extension}"
+# Cleanup temporary files
 def cleanup_files(*files):
     for file in files:
         if file and os.path.exists(file):
             os.remove(file)
             print(f"Removed file: {file}")
+# Extract audio using FFmpeg
 def extract_audio_ffmpeg(video_path):
     print("Extracting audio using ffmpeg...")
     audio_path = generate_unique_filename(".wav")
     command = ["ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a", audio_path, "-y"]
+    subprocess.Popen(command).wait()  # Use Popen to reduce memory usage
     return audio_path
+# Transcribe audio
 def transcribe_audio(file_path):
     print(f"Starting transcription of file: {file_path}")
     temp_audio = None
     output_file = generate_unique_filename(".json")
     command = [
         "insanely-fast-whisper", "--file-name", file_path,
+        "--device-id", "cpu", "--model-name", "openai/whisper-large-v3",
         "--task", "transcribe", "--timestamp", "chunk",
         "--transcript-path", output_file
     ]
+    subprocess.Popen(command).wait()
     with open(output_file, "r") as f:
         transcription = json.load(f)
     return result
+# Generate summary using Qwen Model
 def generate_summary_stream(transcription):
     detected_language = langdetect.detect(transcription)
     prompt = f"""Summarize the following video transcription in 150-300 words.
     The summary should be in the same language as the transcription, which is detected as {detected_language}.
+    {transcription[:100000]}..."""  # Limiting input size to avoid memory overflow
     response, history = model.chat(tokenizer, prompt, history=[])
     return response
+# Process video upload
 def process_uploaded_video(video_path):
     try:
         transcription = transcribe_audio(video_path)
     except Exception as e:
         return f"Processing error: {str(e)}", None
+# Gradio UI
 demo = gr.Blocks()
 with demo:
     gr.Markdown("""
     video_button.click(process_uploaded_video, inputs=[video_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
+demo.launch()