Spaces:

Asad235
/

editor

Paused

App Files Files Community

Create app.py

by Asad235 - opened May 31

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+408

-0

Files changed (1) hide show

app.py +408 -0

app.py ADDED Viewed

	@@ -0,0 +1,408 @@

+#!/usr/bin/env python3
+"""
+ViralCutter - 100% Local, No API Keys Required
+Video se automatic shorts banaye bina kisi paid API ke
+"""
+import os
+import subprocess
+import json
+import re
+import uuid
+import yt_dlp
+import whisperx
+from flask import Flask, request, send_file, jsonify
+from flask_cors import CORS
+from datetime import datetime
+# ========== CONFIGURATION ==========
+BASE_DIR = '/data' if os.path.exists('/data') else os.getcwd()
+UPLOAD_FOLDER = os.path.join(BASE_DIR, 'uploads')
+OUTPUT_FOLDER = os.path.join(BASE_DIR, 'outputs')
+MODEL_CACHE = os.path.join(BASE_DIR, 'models')
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(OUTPUT_FOLDER, exist_ok=True)
+os.makedirs(MODEL_CACHE, exist_ok=True)
+os.environ['XDG_CACHE_HOME'] = MODEL_CACHE
+os.environ['TRANSFORMERS_CACHE'] = MODEL_CACHE
+os.environ['HF_HOME'] = MODEL_CACHE
+# ========== FLASK APP ==========
+app = Flask(__name__)
+CORS(app)
+# ========== VIRAL CUTTER - NO GEMINI ==========
+class ViralCutter:
+    def __init__(self):
+        self.device = "cpu"
+        self.compute_type = "int8"
+        self.batch_size = 16
+        print("🚀 ViralCutter API Initialized (No Gemini)")
+        print(f"📁 Storage: {BASE_DIR}")
+    def download_video(self, url):
+        """Download video from YouTube"""
+        video_id = uuid.uuid4().hex[:8]
+        output_path = os.path.join(UPLOAD_FOLDER, f"{video_id}_input.mp4")
+        ydl_opts = {
+            'format': 'best[height<=720]/best',
+            'outtmpl': output_path,
+            'quiet': True,
+            'no_warnings': True
+        }
+        try:
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                info = ydl.extract_info(url, download=True)
+                title = info.get('title', 'video')
+            print(f"✅ Downloaded: {title}")
+            return output_path, title
+        except Exception as e:
+            print(f"❌ Download failed: {e}")
+            return None, None
+    def transcribe_audio(self, video_path):
+        """Transcribe with WhisperX (local, no API)"""
+        print("🎙️ Transcribing audio with WhisperX...")
+        audio_path = video_path.replace('.mp4', '.wav')
+        subprocess.run([
+            'ffmpeg', '-i', video_path, '-ac', '1', '-ar', '16000',
+            audio_path, '-y'
+        ], capture_output=True)
+        # Load WhisperX model (downloads once, caches to /data/models)
+        model = whisperx.load_model("base", self.device, compute_type=self.compute_type)
+        result = model.transcribe(audio_path, batch_size=self.batch_size)
+        # Align for word timestamps
+        model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=self.device)
+        result = whisperx.align(result["segments"], model_a, metadata, audio_path, self.device)
+        os.remove(audio_path)
+        print(f"✅ Transcription complete: {len(result['segments'])} segments")
+        return result
+    def detect_viral_moments(self, transcription):
+        """Detect viral hooks using keyword analysis (No Gemini)"""
+        segments = transcription['segments']
+        # Viral hook keywords
+        hook_keywords = {
+            'high': [
+                'wow', 'amazing', 'incredible', 'unbelievable', 'crazy', 'insane',
+                'never', 'ever', 'finally', 'secret', 'revealed', 'breaking',
+                'exclusive', 'shocking', 'wait', 'seriously', 'omg', 'literally'
+            ],
+            'medium': [
+                'how to', 'why', 'what if', 'imagine', 'discover', 'learn',
+                'master', 'ultimate', 'best', 'worst', 'top', 'secret'
+            ],
+            'questions': ['?', 'what', 'where', 'when', 'who', 'which', 'why', 'how']
+        }
+        scored_segments = []
+        for seg in segments:
+            score = 0
+            text_lower = seg['text'].lower()
+            duration = seg['end'] - seg['start']
+            # Score based on keywords
+            for kw in hook_keywords['high']:
+                if kw in text_lower:
+                    score += 3
+                    break
+            for kw in hook_keywords['medium']:
+                if kw in text_lower:
+                    score += 2
+            for q in hook_keywords['questions']:
+                if q in text_lower:
+                    score += 1
+                    break
+            # Excitement indicators
+            if '!' in seg['text']:
+                score += 1
+            # Duration bonus (5-20 seconds is ideal for shorts)
+            if 5 <= duration <= 20:
+                score += 1
+            elif duration < 3:
+                score -= 1
+            # Word count bonus
+            word_count = len(text_lower.split())
+            if 10 <= word_count <= 30:
+                score += 1
+            scored_segments.append({
+                'start': seg['start'],
+                'end': seg['end'],
+                'text': seg['text'],
+                'score': score,
+                'duration': duration
+            })
+        # Sort by score
+        scored_segments.sort(key=lambda x: x['score'], reverse=True)
+        # Remove overlapping clips (minimum 2 second gap)
+        viral_clips = []
+        for clip in scored_segments:
+            overlap = False
+            for existing in viral_clips:
+                if not (clip['end'] <= existing['start'] - 2 or clip['start'] >= existing['end'] + 2):
+                    overlap = True
+                    break
+            if not overlap and len(viral_clips) < 5:
+                viral_clips.append(clip)
+        print(f"🔥 Found {len(viral_clips)} viral moments")
+        for i, clip in enumerate(viral_clips[:3]):
+            print(f"   Hook {i+1}: Score {clip['score']} - \"{clip['text'][:50]}...\"")
+        return viral_clips
+    def create_vertical_clip(self, video_path, start, end, output_path):
+        """Extract and convert to 9:16 vertical format"""
+        duration = end - start
+        # Extract clip
+        temp_path = output_path.replace('.mp4', '_temp.mp4')
+        subprocess.run([
+            'ffmpeg', '-i', video_path, '-ss', str(start), '-t', str(duration),
+            '-c', 'copy', '-avoid_negative_ts', 'make_zero', temp_path, '-y'
+        ], capture_output=True)
+        # Convert to 9:16 vertical (1080x1920)
+        subprocess.run([
+            'ffmpeg', '-i', temp_path,
+            '-vf', 'scale=1080:1920:force_original_aspect_ratio=1,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:color=black',
+            '-c:a', 'copy', '-preset', 'fast', output_path, '-y'
+        ], capture_output=True)
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+        return output_path
+    def format_srt_time(self, seconds):
+        """Convert seconds to SRT timestamp"""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = seconds % 60
+        millis = int((secs % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{millis:03d}"
+    def generate_word_by_word_srt(self, transcription, start_offset, end_offset, output_path):
+        """Generate word-by-word SRT subtitles"""
+        with open(output_path, 'w', encoding='utf-8') as f:
+            idx = 1
+            for seg in transcription['segments']:
+                if seg['start'] >= start_offset and seg['end'] <= end_offset:
+                    if 'words' in seg:
+                        for word in seg['words']:
+                            start = self.format_srt_time(word['start'] - start_offset)
+                            end = self.format_srt_time(word['end'] - start_offset)
+                            f.write(f"{idx}\n{start} --> {end}\n{word['word']}\n\n")
+                            idx += 1
+                    else:
+                        start = self.format_srt_time(seg['start'] - start_offset)
+                        end = self.format_srt_time(seg['end'] - start_offset)
+                        f.write(f"{idx}\n{start} --> {end}\n{seg['text']}\n\n")
+                        idx += 1
+        return output_path
+    def add_subtitles(self, video_path, srt_path, output_path):
+        """Burn subtitles into video"""
+        subprocess.run([
+            'ffmpeg', '-i', video_path,
+            '-vf', f"subtitles={srt_path}:force_style='FontSize=28,FontName=Arial,OutlineColour=&H00000000,BorderStyle=1'",
+            '-c:a', 'copy', '-preset', 'fast', output_path, '-y'
+        ], capture_output=True)
+        return output_path
+    def process_video(self, video_source, num_clips=3):
+        """Main processing pipeline"""
+        # Step 1: Get video (YouTube URL or local file)
+        if video_source.startswith(('http://', 'https://', 'www.')):
+            video_path, title = self.download_video(video_source)
+            if not video_path:
+                return None, "Download failed"
+            is_temp = True
+        else:
+            video_path = video_source
+            title = os.path.basename(video_source)
+            is_temp = False
+        # Step 2: Transcribe
+        transcription = self.transcribe_audio(video_path)
+        # Step 3: Detect viral moments
+        viral_clips = self.detect_viral_moments(transcription)
+        if not viral_clips:
+            return None, "No viral moments detected"
+        # Step 4: Generate clips
+        clips_info = []
+        video_id = uuid.uuid4().hex[:8]
+        for i, clip in enumerate(viral_clips[:num_clips]):
+            start = clip['start']
+            end = clip['end']
+            # Ensure minimum duration (15 seconds)
+            if end - start < 15:
+                end = min(start + 15, transcription['segments'][-1]['end'])
+            # Ensure maximum duration (45 seconds)
+            if end - start > 45:
+                end = start + 45
+            vertical_path = os.path.join(OUTPUT_FOLDER, f"{video_id}_clip_{i+1}_vertical.mp4")
+            srt_path = os.path.join(OUTPUT_FOLDER, f"{video_id}_clip_{i+1}.srt")
+            final_path = os.path.join(OUTPUT_FOLDER, f"{video_id}_clip_{i+1}_final.mp4")
+            # Create vertical clip
+            self.create_vertical_clip(video_path, start, end, vertical_path)
+            # Generate SRT
+            self.generate_word_by_word_srt(transcription, start, end, srt_path)
+            # Add subtitles
+            self.add_subtitles(vertical_path, srt_path, final_path)
+            clips_info.append({
+                'clip_num': i+1,
+                'path': final_path,
+                'srt_path': srt_path,
+                'duration': round(end - start, 1),
+                'score': clip['score'],
+                'hook_text': clip['text'][:100],
+                'start_time': round(start, 1),
+                'end_time': round(end, 1)
+            })
+            print(f"✅ Clip {i+1}: {start:.1f}s - {end:.1f}s (Score: {clip['score']})")
+        # Cleanup original file if downloaded
+        if is_temp and os.path.exists(video_path):
+            os.remove(video_path)
+        return clips_info, None
+# Initialize cutter
+cutter = ViralCutter()
+# ========== API ENDPOINTS ==========
+@app.route('/health', methods=['GET'])
+def health():
+    return jsonify({
+        'status': 'healthy',
+        'storage': BASE_DIR,
+        'model': 'whisperx (local)',
+        'gemini': 'not required'
+    })
+@app.route('/process', methods=['POST'])
+def process_video():
+    """
+    Process video and generate shorts
+    Form data:
+        - url: YouTube URL
+        - video: video file
+        - num_clips: number of clips (default: 3)
+    """
+    # Get parameters
+    url = request.form.get('url', '')
+    num_clips = int(request.form.get('num_clips', 3))
+    # Check for uploaded file
+    if 'video' in request.files:
+        file = request.files['video']
+        if file.filename:
+            video_id = uuid.uuid4().hex[:8]
+            video_path = os.path.join(UPLOAD_FOLDER, f"{video_id}_uploaded.mp4")
+            file.save(video_path)
+            video_source = video_path
+        else:
+            return jsonify({'error': 'Empty file'}), 400
+    elif url:
+        video_source = url
+    else:
+        return jsonify({'error': 'Provide either video file or YouTube URL'}), 400
+    try:
+        clips, error = cutter.process_video(video_source, num_clips)
+        if error:
+            return jsonify({'error': error}), 500
+        # Return download links
+        result = {
+            'success': True,
+            'num_clips': len(clips),
+            'clips': []
+        }
+        for clip in clips:
+            result['clips'].append({
+                'clip_num': clip['clip_num'],
+                'download_url': f"/download/{os.path.basename(clip['path'])}",
+                'srt_url': f"/download/{os.path.basename(clip['srt_path'])}",
+                'duration': clip['duration'],
+                'score': clip['score'],
+                'hook_preview': clip['hook_text']
+            })
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+@app.route('/download/<filename>', methods=['GET'])
+def download_file(filename):
+    """Download generated file"""
+    filepath = os.path.join(OUTPUT_FOLDER, filename)
+    if not os.path.exists(filepath):
+        return jsonify({'error': 'File not found'}), 404
+    return send_file(filepath, as_attachment=True)
+@app.route('/list', methods=['GET'])
+def list_clips():
+    """List all generated clips"""
+    files = []
+    for f in os.listdir(OUTPUT_FOLDER):
+        if f.endswith('_final.mp4'):
+            stat = os.stat(os.path.join(OUTPUT_FOLDER, f))
+            files.append({
+                'filename': f,
+                'size_mb': round(stat.st_size / (1024 * 1024), 2),
+                'modified': datetime.fromtimestamp(stat.st_mtime).isoformat()
+            })
+    return jsonify({'clips': files})
+@app.route('/cleanup', methods=['POST'])
+def cleanup():
+    """Delete all generated files"""
+    import shutil
+    for folder in [UPLOAD_FOLDER, OUTPUT_FOLDER]:
+        for f in os.listdir(folder):
+            try:
+                os.remove(os.path.join(folder, f))
+            except:
+                pass
+    return jsonify({'status': 'cleaned'})
+# ========== MAIN ==========
+if __name__ == '__main__':
+    port = int(os.environ.get('PORT', 7860))
+    app.run(host='0.0.0.0', port=port, debug=False)