agnixcode commited on
Commit
6afe35e
·
verified ·
1 Parent(s): f39f658

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ================================
2
+ # IMPORTS
3
+ # ================================
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
+ import numpy as np
8
+ import gradio as gr
9
+ from groq import Groq
10
+ import re
11
+ import os
12
+
13
+ # ================================
14
+ # CONFIG
15
+ # ================================
16
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # 🔐 Use HF secrets
17
+ client = Groq(api_key=GROQ_API_KEY)
18
+
19
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
20
+
21
+ # Global store
22
+ vector_store = None
23
+ stored_chunks = []
24
+
25
+ # ================================
26
+ # UTIL: EXTRACT VIDEO ID
27
+ # ================================
28
+ def extract_video_id(url):
29
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
30
+ return match.group(1) if match else None
31
+
32
+ # ================================
33
+ # STEP 1: GET TRANSCRIPT
34
+ # ================================
35
+ def get_transcript(url):
36
+ video_id = extract_video_id(url)
37
+ if not video_id:
38
+ return "❌ Invalid YouTube URL"
39
+
40
+ try:
41
+ api = YouTubeTranscriptApi()
42
+ transcript = api.fetch(video_id)
43
+
44
+ full_text = " ".join([t.text for t in transcript])
45
+ return full_text
46
+
47
+ except Exception as e:
48
+ return f"❌ Transcript Error: {str(e)}"
49
+
50
+ # ================================
51
+ # STEP 2: CHUNKING
52
+ # ================================
53
+ def chunk_text(text, chunk_size=300):
54
+ words = text.split()
55
+ chunks = []
56
+
57
+ for i in range(0, len(words), chunk_size):
58
+ chunk = " ".join(words[i:i + chunk_size])
59
+ chunks.append(chunk)
60
+
61
+ return chunks
62
+
63
+ # ================================
64
+ # STEP 3: VECTOR STORE
65
+ # ================================
66
+ def create_vector_store(chunks):
67
+ global vector_store, stored_chunks
68
+
69
+ embeddings = embed_model.encode(chunks)
70
+
71
+ dim = embeddings.shape[1]
72
+ index = faiss.IndexFlatL2(dim)
73
+ index.add(np.array(embeddings))
74
+
75
+ vector_store = index
76
+ stored_chunks = chunks
77
+
78
+ # ================================
79
+ # STEP 4: RETRIEVAL
80
+ # ================================
81
+ def retrieve(query, top_k=3):
82
+ query_embedding = embed_model.encode([query])
83
+ distances, indices = vector_store.search(np.array(query_embedding), top_k)
84
+
85
+ results = [stored_chunks[i] for i in indices[0]]
86
+ return "\n".join(results)
87
+
88
+ # ================================
89
+ # STEP 5: LLM
90
+ # ================================
91
+ def generate_answer(query, context):
92
+ prompt = f"""
93
+ You are a helpful assistant.
94
+
95
+ Use ONLY the context below to answer the question.
96
+
97
+ Context:
98
+ {context}
99
+
100
+ Question:
101
+ {query}
102
+
103
+ Answer:
104
+ """
105
+
106
+ response = client.chat.completions.create(
107
+ model="llama-3.3-70b-versatile",
108
+ messages=[{"role": "user", "content": prompt}],
109
+ temperature=0.3
110
+ )
111
+
112
+ return response.choices[0].message.content
113
+
114
+ # ================================
115
+ # HANDLERS
116
+ # ================================
117
+ def handle_process(url):
118
+ transcript = get_transcript(url)
119
+
120
+ if transcript.startswith("❌"):
121
+ return transcript, "", []
122
+
123
+ chunks = chunk_text(transcript)
124
+ create_vector_store(chunks)
125
+
126
+ preview = transcript[:500]
127
+
128
+ return "✅ Video processed successfully!", preview, []
129
+
130
+ def handle_chat(query, chat_history):
131
+ if vector_store is None:
132
+ return "", chat_history + [(query, "❌ Process a video first")]
133
+
134
+ context = retrieve(query)
135
+ answer = generate_answer(query, context)
136
+
137
+ chat_history.append((query, answer))
138
+ return "", chat_history
139
+
140
+ # ================================
141
+ # UI
142
+ # ================================
143
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
144
+
145
+ gr.Markdown("# 🎥 YouTube Video Assistant")
146
+ gr.Markdown("Paste a YouTube link → process → chat with the video")
147
+
148
+ with gr.Row():
149
+ url_input = gr.Textbox(label="🔗 YouTube URL", scale=4)
150
+ process_btn = gr.Button("🚀 Process", scale=1)
151
+
152
+ status_output = gr.Markdown("")
153
+
154
+ transcript_preview = gr.Textbox(
155
+ label="📄 Transcript Preview",
156
+ lines=5,
157
+ interactive=False
158
+ )
159
+
160
+ gr.Markdown("---")
161
+
162
+ chatbot = gr.Chatbot(label="💬 Chat with Video")
163
+
164
+ with gr.Row():
165
+ query_input = gr.Textbox(
166
+ placeholder="Ask something about the video...",
167
+ scale=4
168
+ )
169
+ send_btn = gr.Button("Send", scale=1)
170
+
171
+ process_btn.click(
172
+ handle_process,
173
+ inputs=url_input,
174
+ outputs=[status_output, transcript_preview, chatbot]
175
+ )
176
+
177
+ send_btn.click(
178
+ handle_chat,
179
+ inputs=[query_input, chatbot],
180
+ outputs=[query_input, chatbot]
181
+ )
182
+
183
+ app.launch()