Polarisailabs commited on
Commit
3aff892
·
verified ·
1 Parent(s): 046e27f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -269
app.py CHANGED
@@ -1,269 +1,45 @@
1
- # -*- coding: utf-8 -*-
2
- # Install required libraries if running outside Colab
3
- # !pip install gradio yt-dlp moviepy pillow speechrecognition llama-index lancedb google-generativeai
4
-
5
- import gradio as gr
6
- from moviepy import VideoFileClip
7
- from pathlib import Path
8
- import speech_recognition as sr
9
- from PIL import Image
10
- import os
11
- import shutil
12
- import json
13
- import matplotlib.pyplot as plt
14
- import yt_dlp
15
- import requests
16
- import base64
17
- from io import BytesIO
18
-
19
- # Add your existing methods here (download_video, video_to_images, video_to_audio, audio_to_text, prepare_video...)
20
-
21
- def plot_images(image_paths):
22
- images_shown = 0
23
- plt.figure(figsize=(16, 9))
24
- img_files = []
25
- for img_path in image_paths:
26
- if os.path.isfile(img_path):
27
- img_files.append(img_path)
28
- images_shown += 1
29
- if images_shown >= 7:
30
- break
31
- return img_files
32
-
33
- def download_video(video_url, output_video_path="./video_data/"):
34
- ydl_opts = {
35
- "format": "bestvideo+bestaudio/best",
36
- "merge_output_format": "mp4",
37
- "outtmpl": f"{output_video_path}/input_vid.mp4",
38
- "noplaylist": True,
39
- "quiet": False,
40
- # Uncomment and set your cookie file path if required
41
- # "cookiefile": "cookies.txt",
42
- }
43
- Path(output_video_path).mkdir(parents=True, exist_ok=True)
44
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
45
- info = ydl.extract_info(video_url, download=True)
46
- info = ydl.sanitize_info(info)
47
- return {
48
- "title": info.get("title"),
49
- "uploader": info.get("uploader"),
50
- "views": info.get("view_count"),
51
- }
52
-
53
- def video_to_images(video_path, output_folder):
54
- Path(output_folder).mkdir(parents=True, exist_ok=True)
55
- clip = VideoFileClip(video_path)
56
- clip.write_images_sequence(
57
- os.path.join(output_folder, "frame%04d.png"), fps=0.2
58
- )
59
-
60
- def video_to_audio(video_path, output_audio_path):
61
- clip = VideoFileClip(video_path)
62
- audio = clip.audio
63
- audio.write_audiofile(output_audio_path)
64
-
65
- def audio_to_text(audio_path):
66
- recognizer = sr.Recognizer()
67
- try:
68
- with sr.AudioFile(audio_path) as source:
69
- audio_data = recognizer.record(source)
70
- text = recognizer.recognize_google(audio_data)
71
- return text
72
- except sr.UnknownValueError:
73
- print("Google Speech Recognition could not understand the audio.")
74
- except sr.RequestError as e:
75
- print(f"Could not request results: {e}")
76
- return None
77
-
78
- def prepare_all_videos(
79
- video_folder="./video_data/",
80
- output_folder="./mixed_data/"
81
- ):
82
- """
83
- Processes all video files in video_folder, extracting images and text for each,
84
- and stores them in unique subfolders under output_folder.
85
- Returns a list of metadata dicts for all videos.
86
- """
87
- Path(output_folder).mkdir(parents=True, exist_ok=True)
88
- video_files = [f for f in os.listdir(video_folder) if f.lower().endswith(('.mp4', '.mov', '.avi', '.mkv'))]
89
- all_metadata = []
90
- for video_file in video_files:
91
- video_path = os.path.join(video_folder, video_file)
92
- video_name = Path(video_file).stem
93
- video_output_folder = os.path.join(output_folder, video_name)
94
- Path(video_output_folder).mkdir(parents=True, exist_ok=True)
95
- audio_path = os.path.join(video_output_folder, "output_audio.wav")
96
- # Extract images and audio
97
- video_to_images(video_path, video_output_folder)
98
- video_to_audio(video_path, audio_path)
99
- # Transcribe audio
100
- text_data = audio_to_text(audio_path)
101
- text_path = os.path.join(video_output_folder, "output_text.txt")
102
- with open(text_path, "w") as file:
103
- file.write(text_data if text_data else "")
104
- os.remove(audio_path)
105
- # Dummy metadata, you can enhance this as needed
106
- meta = {
107
- "title": video_name,
108
- "uploader": "unknown",
109
- "views": "unknown",
110
- "file": video_file
111
- }
112
- all_metadata.append({"meta": meta, "text": text_data, "folder": video_output_folder})
113
- return all_metadata
114
-
115
- from llama_index.core.indices import MultiModalVectorStoreIndex
116
- from llama_index.core import SimpleDirectoryReader, StorageContext
117
- from llama_index.vector_stores.lancedb import LanceDBVectorStore
118
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
119
- from llama_index.core import Settings
120
-
121
- def create_vector_db_for_all(image_txt_root_folder: str):
122
- """
123
- Loads all subfolders in image_txt_root_folder as documents for the vector DB.
124
- """
125
- text_store = LanceDBVectorStore(uri="lancedb", table_name="text_collection")
126
- image_store = LanceDBVectorStore(uri="lancedb", table_name="image_collection")
127
- storage_context = StorageContext.from_defaults(
128
- vector_store=text_store, image_store=image_store
129
- )
130
- Settings.embed_model = HuggingFaceEmbedding(
131
- model_name="sentence-transformers/all-MiniLM-L6-v2"
132
- )
133
- # Load all subfolders as documents
134
- documents = []
135
- for subfolder in Path(image_txt_root_folder).iterdir():
136
- if subfolder.is_dir():
137
- documents.extend(SimpleDirectoryReader(str(subfolder)).load_data())
138
- index = MultiModalVectorStoreIndex.from_documents(
139
- documents,
140
- storage_context=storage_context,
141
- )
142
- retriever_engine = index.as_retriever(
143
- similarity_top_k=2, image_similarity_top_k=3
144
- )
145
- return retriever_engine
146
-
147
- from llama_index.core.schema import ImageNode
148
-
149
- def retrieve(retriever_engine, query_str):
150
- retrieval_results = retriever_engine.retrieve(query_str)
151
- retrieved_image = []
152
- retrieved_text = []
153
- for res_node in retrieval_results:
154
- if isinstance(res_node.node, ImageNode):
155
- retrieved_image.append(res_node.node.metadata["file_path"])
156
- else:
157
- retrieved_text.append(res_node.text)
158
- return retrieved_image, retrieved_text
159
-
160
- qa_tmpl_str = (
161
- "Given the provided information, including relevant images and retrieved context from the video, \
162
- accurately and precisely answer the query without any additional prior knowledge.\n"
163
- "Please ensure honesty and responsibility, refraining from any racist or sexist remarks.\n"
164
- "---------------------\n"
165
- "Context: {context_str}\n"
166
- "Metadata for video: {metadata_str} \n"
167
- "---------------------\n"
168
- "Query: {query_str}\n"
169
- "Answer: "
170
- )
171
-
172
- # Define model values and their corresponding labels
173
- available_models = [
174
- {"value": "meta-llama/llama-4-maverick:free", "label": "Llama"},
175
- {"value": "qwen/qwen2.5-vl-72b-instruct:free", "label": "Qwen"},
176
- {"value": "google/gemma-3-27b-it:free", "label": "Gemma"},
177
- {"value": "moonshotai/kimi-vl-a3b-thinking:free", "label": "Kimi"},
178
- {"value": "google/gemini-2.0-flash-exp:free", "label": "Gemini"},
179
- # Add more models here if needed
180
- ]
181
-
182
- # Helper to get value from label or vice versa
183
- model_value_to_label = {item["value"]: item["label"] for item in available_models}
184
- model_label_to_value = {item["label"]: item["value"] for item in available_models}
185
-
186
- # Gradio interface function
187
- def gradio_chat(query, model_label):
188
- output_video_path = "./video_data/"
189
- output_folder = "./mixed_data/"
190
-
191
- try:
192
- # Process all videos
193
- all_metadata = prepare_all_videos(output_video_path, output_folder)
194
- # Combine metadata for all videos
195
- metadata_str = json.dumps([item["meta"] for item in all_metadata])
196
- retriever_engine = create_vector_db_for_all(output_folder)
197
-
198
- img, txt = retrieve(retriever_engine=retriever_engine, query_str=query)
199
- context_str = "".join(txt)
200
- prompt = qa_tmpl_str.format(
201
- context_str=context_str, query_str=query, metadata_str=metadata_str
202
- )
203
-
204
- OPENROUTER_API_KEY = os.environ['OPENROUTER_API_KEY']
205
- headers = {
206
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
207
- "Content-Type": "application/json",
208
- "HTTP-Referer": "<YOUR_SITE_URL>",
209
- "X-Title": "<YOUR_SITE_NAME>",
210
- }
211
-
212
- model_name = model_label_to_value.get(model_label, available_models[0]["value"])
213
-
214
- messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
215
- image_paths = []
216
- for img_path in img:
217
- try:
218
- image = Image.open(img_path)
219
- buffered = BytesIO()
220
- image.save(buffered, format="JPEG")
221
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
222
- messages[0]["content"].append({
223
- "type": "image_url",
224
- "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
225
- })
226
- image_paths.append(img_path)
227
- except Exception as e:
228
- print(f"Error loading image {img_path}: {e}")
229
-
230
- data = {
231
- "model": model_name,
232
- "messages": messages,
233
- }
234
-
235
- response = requests.post(
236
- url="https://openrouter.ai/api/v1/chat/completions",
237
- headers=headers,
238
- data=json.dumps(data)
239
- )
240
- response.raise_for_status()
241
- result_text = response.json()['choices'][0]['message']['content']
242
-
243
- return result_text, image_paths
244
- except Exception as e:
245
- return f"Error: {str(e)}", []
246
-
247
- # Gradio UI
248
-
249
- gradio_ui = gr.Interface(
250
- fn=gradio_chat,
251
- inputs=[
252
- gr.Textbox(label="",placeholder="Try: Best island in Maldives"),
253
- gr.Dropdown(
254
- choices=[item["label"] for item in available_models],
255
- value=available_models[0]["label"],
256
- label="Select Model:"
257
- )
258
- ],
259
- outputs=[
260
- gr.Textbox(label="Vega Response:"),
261
- gr.Gallery(label="Relevant Images", allow_preview=True),
262
- ],
263
- title="",
264
- description="",
265
- theme = gr.themes.Default(primary_hue="sky")
266
- )
267
-
268
- if __name__ == "__main__":
269
- gradio_ui.launch(share=True)
 
1
+ _H='custom'
2
+ _G='primary'
3
+ _F='e.g., business, technology, sports, entertainment'
4
+ _E='Custom Labels (for custom classification)'
5
+ _D='Classification Type:'
6
+ _C='sentiment'
7
+ _B='Spam'
8
+ _A='Sentiment'
9
+ import os,gradio as gr
10
+ from openai import OpenAI
11
+ API_KEY=os.environ['API_KEY']
12
+ client=OpenAI(base_url='https://openrouter.ai/api/v1',api_key=API_KEY)
13
+ def classify_text(text,classification_type=_C,custom_labels=''):
14
+ "\n Classify text using OpenRouter's GPT-OSS-20B model\n ";E='content';D='role';B=classification_type;A=text
15
+ if not A.strip():return'Please enter some text to classify.'
16
+ if B==_A:C=f"Classify the sentiment of the following text as Positive, Negative, or Neutral. Only respond with one word: Positive, Negative, or Neutral.\n\nText: {A}"
17
+ elif B==_B:C=f"Classify whether the following text is Spam or Not Spam. Only respond with: Spam or Not Spam.\n\nText: {A}"
18
+ try:F=client.chat.completions.create(model='openai/gpt-oss-20b',messages=[{D:'system',E:'You are a text classification assistant. Provide concise, accurate classifications.'},{D:'user',E:C}],max_tokens=50,temperature=.1,extra_headers={'Authorization':f"Bearer {API_KEY}",'HTTP-Referer':'https://your-app-url.com','X-Title':''});G=F.choices[0].message.content.strip();return f"Classification Result: {G}"
19
+ except Exception as H:return f"Error: {str(H)}"
20
+ def batch_classify(file,classification_type=_C,custom_labels=''):
21
+ '\n Classify multiple texts from uploaded file\n '
22
+ if file is None:return'Please upload a text file.'
23
+ try:
24
+ with open(file.name,'r',encoding='utf-8')as C:D=C.readlines()
25
+ B=[]
26
+ for(E,A)in enumerate(D[:10],1):
27
+ A=A.strip()
28
+ if A:F=classify_text(A,classification_type,custom_labels);B.append(f"{E}. **Text:** {A}\n **Result:** {F}\n")
29
+ return'\n'.join(B)if B else'No text found in file.'
30
+ except Exception as G:return f"Error processing file: {str(G)}"
31
+ with gr.Blocks(title='',theme=gr.themes.Default(primary_hue='sky'))as demo:
32
+ with gr.Tabs():
33
+ with gr.Tab('Single Text'):
34
+ with gr.Row():
35
+ with gr.Column(scale=2):text_input=gr.Textbox(label='',placeholder='Enter text to classify...',lines=4);classification_type=gr.Radio(choices=[_A,_B],value=_A,label=_D);custom_labels=gr.Textbox(label=_E,placeholder=_F,visible=False);classify_btn=gr.Button('Classify Text',variant=_G)
36
+ with gr.Column(scale=2):single_output=gr.Markdown(value='')
37
+ def toggle_custom_labels(choice):return gr.update(visible=choice==_H)
38
+ classification_type.change(toggle_custom_labels,inputs=[classification_type],outputs=[custom_labels]);classify_btn.click(classify_text,inputs=[text_input,classification_type,custom_labels],outputs=[single_output])
39
+ with gr.Tab('Batch Classification'):
40
+ with gr.Row():
41
+ with gr.Column(scale=2):gr.Markdown('Upload a text or csv file:');file_input=gr.File(label='Upload File',file_types=['.txt','.csv']);batch_classification_type=gr.Radio(choices=[_A,_B],value=_A,label=_D);batch_custom_labels=gr.Textbox(label=_E,placeholder=_F,visible=False);batch_classify_btn=gr.Button('🔍 Classify Batch',variant=_G)
42
+ with gr.Column(scale=2):batch_output=gr.Markdown(value='')
43
+ def toggle_batch_custom_labels(choice):return gr.update(visible=choice==_H)
44
+ batch_classification_type.change(toggle_batch_custom_labels,inputs=[batch_classification_type],outputs=[batch_custom_labels]);batch_classify_btn.click(batch_classify,inputs=[file_input,batch_classification_type,batch_custom_labels],outputs=[batch_output])
45
+ if __name__=='__main__':demo.launch(server_name='0.0.0.0',server_port=7860,share=True,show_error=True)