File size: 4,508 Bytes
e48ee91
230bcec
383c593
 
 
1d234a6
383c593
 
1d234a6
 
 
 
 
 
7a9bf26
1d234a6
 
2d2cc6d
525e22b
1d234a6
 
525e22b
230bcec
1d234a6
525e22b
 
7a9bf26
1d234a6
 
 
 
 
7a9bf26
1d234a6
 
 
 
383c593
1d234a6
230bcec
 
1d234a6
 
 
 
 
 
230bcec
1d234a6
230bcec
 
1d234a6
230bcec
 
1d234a6
 
 
 
 
 
 
 
 
 
230bcec
1d234a6
 
230bcec
 
1d234a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230bcec
 
 
1d234a6
230bcec
 
 
 
1d234a6
 
230bcec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d234a6
525e22b
1d234a6
230bcec
 
 
 
 
1d234a6
230bcec
383c593
230bcec
 
 
 
 
383c593
230bcec
1d234a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
from transformers import pipeline
import yt_dlp
import whisper
import os
import uuid
import re

# Delete temporary files
def clean_temp_files():
    temp_files = ["temp_video.mp4", "temp_audio.mp3"]
    for file in temp_files:
        if os.path.exists(file):
            os.remove(file)

# Download TikTok video
def download_video(video_url):
    try:
        ydl_opts = {
            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
            'outtmpl': 'temp_video.mp4',
            'quiet': True,
            'no_warnings': True,
            'extractor_args': {'tiktok': {'skip_watermark': True}}
        }
        
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video_url])
        return "temp_video.mp4"
    except Exception as e:
        print(f"Download error: {e}")
        return None

# Extract audio (temporary)
def extract_audio(video_path):
    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
    return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None

# Transcribe audio
def transcribe_audio(audio_path):
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        return result['text']
    except Exception as e:
        print(f"Transcription error: {e}")
        return None

# Classify content
def classify_content(text):
    try:
        if not text or len(text.strip()) == 0:
            return None, None
            
        classifier = pipeline("zero-shot-classification", 
                           model="facebook/bart-large-mnli")
        
        labels = ["educational", "entertainment", "news", "political", 
                "religious", "technical", "advertisement", "social"]
        
        result = classifier(text, 
                          candidate_labels=labels, 
                          hypothesis_template="This text is about {}.")
        
        return result['labels'][0], result['scores'][0]
    except Exception as e:
        print(f"Classification error: {e}")
        return None, None

# Main processing function
def process_video(video_url):
    clean_temp_files()
    
    if not video_url or len(video_url.strip()) == 0:
        return "Please enter a valid TikTok URL", ""
    
    if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
        return "This app is for TikTok links only", ""
    
    # Download video
    video_path = download_video(video_url)
    if not video_path:
        return "Failed to download video", ""
    
    # Extract audio
    audio_path = extract_audio(video_path)
    if not audio_path:
        clean_temp_files()
        return "Failed to extract audio", ""
    
    # Transcribe
    transcription = transcribe_audio(audio_path)
    if not transcription:
        clean_temp_files()
        return "Failed to transcribe audio", ""
    
    # Classify
    category, confidence = classify_content(transcription)
    if not category:
        clean_temp_files()
        return transcription, "Failed to classify content"
    
    # Clean up
    clean_temp_files()
    
    # Format classification result
    classification_result = f"{category} (confidence: {confidence:.2f})"
    return transcription, classification_result

# Gradio interface
with gr.Blocks(title="TikTok Content Analyzer") as demo:
    gr.Markdown("""
    # 🎬 TikTok Content Analyzer
    Enter a TikTok video URL to get transcription and content classification
    """)
    
    with gr.Row():
        url_input = gr.Textbox(
            label="TikTok URL",
            placeholder="Enter TikTok video URL here..."
        )
    
    with gr.Row():
        transcription_output = gr.Textbox(
            label="Transcription",
            interactive=True,
            lines=10,
            max_lines=20
        )
    
    with gr.Row():
        category_output = gr.Textbox(
            label="Content Category",
            interactive=False
        )
    
    submit_btn = gr.Button("Analyze Video", variant="primary")
    
    # Examples
    gr.Examples(
        examples=[
            ["https://www.tiktok.com/@example/video/123456789"],
            ["https://vm.tiktok.com/ZMexample/"]
        ],
        inputs=url_input
    )
    
    submit_btn.click(
        fn=process_video,
        inputs=url_input,
        outputs=[transcription_output, category_output]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)