Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pixeltable as pxt | |
| from pixeltable.iterators import DocumentSplitter, FrameIterator, StringSplitter | |
| from pixeltable.functions.huggingface import sentence_transformer, clip_image, clip_text | |
| from pixeltable.functions.video import extract_audio | |
| from pixeltable.functions.audio import get_metadata | |
| from pixeltable.functions import openai | |
| import numpy as np | |
| import PIL.Image | |
| import os | |
| import getpass | |
| import requests | |
| import tempfile | |
| from datetime import datetime | |
| # Configuration | |
| PIXELTABLE_MEDIA_DIR = os.path.expanduser("~/.pixeltable/media") | |
| MAX_TOKENS_DEFAULT = 300 | |
| TEMPERATURE_DEFAULT = 0.7 | |
| CHUNK_SIZE_DEFAULT = 300 | |
| # Initialize API keys | |
| def init_api_keys(): | |
| if 'OPENAI_API_KEY' not in os.environ: | |
| os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:') | |
| # Common Utilities | |
| def initialize_pixeltable(dir_name='unified_app'): | |
| """Initialize Pixeltable directory""" | |
| pxt.drop_dir(dir_name, force=True) | |
| pxt.create_dir(dir_name) | |
| def create_prompt(top_k_list: list[dict], question: str) -> str: | |
| """Create a standardized prompt format""" | |
| concat_top_k = '\n\n'.join(elt['text'] for elt in reversed(top_k_list)) | |
| return f''' | |
| PASSAGES: | |
| {concat_top_k} | |
| QUESTION: | |
| {question}''' | |
| # Document Processing | |
| class DocumentProcessor: | |
| def process_documents(pdf_files, chunk_limit, chunk_separator): | |
| """Process uploaded documents for chatbot functionality""" | |
| initialize_pixeltable() | |
| docs = pxt.create_table( | |
| 'unified_app.documents', | |
| {'document': pxt.Document} | |
| ) | |
| docs.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf')) | |
| chunks = pxt.create_view( | |
| 'unified_app.chunks', | |
| docs, | |
| iterator=DocumentSplitter.create( | |
| document=docs.document, | |
| separators=chunk_separator, | |
| limit=chunk_limit if chunk_separator in ["token_limit", "char_limit"] else None | |
| ) | |
| ) | |
| chunks.add_embedding_index('text', string_embed=sentence_transformer.using(model_id='intfloat/e5-large-v2')) | |
| return "Documents processed successfully. You can start asking questions." | |
| def get_document_answer(question): | |
| """Get answer from processed documents""" | |
| try: | |
| chunks = pxt.get_table('unified_app.chunks') | |
| sim = chunks.text.similarity(question) | |
| relevant_chunks = chunks.order_by(sim, asc=False).limit(5).select(chunks.text).collect() | |
| context = "\n\n".join(chunk['text'] for chunk in relevant_chunks) | |
| temp_table = pxt.create_table( | |
| 'unified_app.temp_response', | |
| { | |
| 'question': pxt.String, | |
| 'context': pxt.String | |
| } | |
| ) | |
| temp_table.insert([{'question': question, 'context': context}]) | |
| temp_table.add_computed_column(response=openai.chat_completions( | |
| messages=[ | |
| { | |
| 'role': 'system', | |
| 'content': 'Answer the question based only on the provided context. If the context doesn\'t contain enough information, say so.' | |
| }, | |
| { | |
| 'role': 'user', | |
| 'content': f"Context:\n{context}\n\nQuestion: {question}" | |
| } | |
| ], | |
| model='gpt-4o-mini-2024-07-18' | |
| )) | |
| answer = temp_table.select( | |
| answer=temp_table.response.choices[0].message.content | |
| ).tail(1)['answer'][0] | |
| pxt.drop_table('unified_app.temp_response', force=True) | |
| return answer | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Call Analysis | |
| class CallAnalyzer: | |
| def process_call(video_file): | |
| """Process and analyze call recordings""" | |
| try: | |
| initialize_pixeltable() | |
| calls = pxt.create_table( | |
| 'unified_app.calls', | |
| {"video": pxt.Video} | |
| ) | |
| calls.add_computed_column(audio=extract_audio(calls.video, format='mp3')) | |
| calls.add_computed_column(transcription=openai.transcriptions(audio=calls.audio, model='whisper-1')) | |
| calls.add_computed_column(text=calls.transcription.text) | |
| sentences = pxt.create_view( | |
| 'unified_app.sentences', | |
| calls, | |
| iterator=StringSplitter.create(text=calls.text, separators='sentence') | |
| ) | |
| sentences.add_embedding_index('text', string_embed=sentence_transformer.using(model_id='intfloat/e5-large-v2')) | |
| def generate_insights(text: str) -> list[dict]: | |
| return [ | |
| {'role': 'system', 'content': 'Analyze this call transcript and provide key insights:'}, | |
| {'role': 'user', 'content': text} | |
| ] | |
| calls.add_computed_column(insights_prompt=generate_insights(calls.text)) | |
| calls.add_computed_column(insights=openai.chat_completions( | |
| messages=calls.insights_prompt, | |
| model='gpt-4o-mini-2024-07-18' | |
| ).choices[0].message.content) | |
| calls.insert([{"video": video_file}]) | |
| result = calls.select(calls.text, calls.audio, calls.insights).tail(1) | |
| return result['text'][0], result['audio'][0], result['insights'][0] | |
| except Exception as e: | |
| return f"Error processing call: {str(e)}", None, None | |
| # Video Search | |
| class VideoSearcher: | |
| def process_video(video_file): | |
| """Process video for searching""" | |
| try: | |
| initialize_pixeltable() | |
| videos = pxt.create_table('unified_app.videos', {'video': pxt.Video}) | |
| frames = pxt.create_view( | |
| 'unified_app.frames', | |
| videos, | |
| iterator=FrameIterator.create(video=videos.video, fps=1) | |
| ) | |
| # Embedding Functions | |
| frames.add_embedding_index('frame', | |
| string_embed=clip_text.using(model_id='openai/clip-vit-base-patch32'), | |
| image_embed=clip_image.using(model_id='openai/clip-vit-base-patch32') | |
| ) | |
| videos.insert([{'video': video_file.name}]) | |
| return "Video processed and indexed for search." | |
| except Exception as e: | |
| return f"Error processing video: {str(e)}" | |
| def search_video(search_type, text_query=None, image_query=None): | |
| """Search processed video frames""" | |
| try: | |
| frames = pxt.get_table('unified_app.frames') | |
| if search_type == "Text" and text_query: | |
| sim = frames.frame.similarity(text_query) | |
| elif search_type == "Image" and image_query is not None: | |
| sim = frames.frame.similarity(image_query) | |
| else: | |
| return [] | |
| results = frames.order_by(sim, asc=False).limit(5).select(frames.frame).collect() | |
| return [row['frame'] for row in results] | |
| except Exception as e: | |
| print(f"Search error: {str(e)}") | |
| return [] | |
| # Gradio Interface | |
| def create_interface(): | |
| with gr.Blocks(theme=gr.themes.Base()) as demo: | |
| # Header | |
| gr.HTML( | |
| """ | |
| <div style="text-align: left; margin-bottom: 1rem;"> | |
| <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" /> | |
| </div> | |
| """ | |
| ) | |
| gr.Markdown( | |
| """ | |
| # Multimodal Powerhouse | |
| """ | |
| ) | |
| gr.HTML( | |
| """ | |
| <p> | |
| <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> | |
| is a declarative interface for working with text, images, embeddings, and video, enabling you to store, transform, index, and iterate on data. | |
| </p> | |
| <div style="background-color: #E5DDD4; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin: 15px 0;"> | |
| <strong>โ ๏ธ Note:</strong> This app runs best with GPU. For optimal performance, consider | |
| <a href="https://huggingface.co/spaces/Pixeltable/Multimodal-Processing-Suite?duplicate=true" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">duplicating this space</a> | |
| to run locally or with better computing resources. | |
| </div> | |
| """ | |
| ) | |
| # Documentation Sections | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Accordion("๐ฏ What This App Does", open=False): | |
| gr.Markdown(""" | |
| 1. ๐ **Document Processing** | |
| * Chat with your documents using RAG | |
| * Process multiple document formats | |
| * Extract key insights | |
| 2. ๐ฅ **Video Analysis** | |
| * Text and image-based video search | |
| * Frame extraction and indexing | |
| * Visual content discovery | |
| 3. ๐๏ธ **Call Analysis** | |
| * Automatic transcription | |
| * Key insight extraction | |
| * Audio processing | |
| """) | |
| with gr.Column(): | |
| with gr.Accordion("โ๏ธ How It Works", open=False): | |
| gr.Markdown(""" | |
| 1. ๐ **Data Processing** | |
| * Chunking and indexing documents | |
| * Embedding generation for search | |
| * Multi-modal data handling | |
| 2. ๐ค **AI Integration** | |
| * LLM-powered analysis | |
| * Speech-to-text conversion | |
| * Semantic search capabilities | |
| 3. ๐ **Storage & Retrieval** | |
| * Efficient data organization | |
| * Quick content retrieval | |
| * Structured data management | |
| """) | |
| with gr.Tabs(): | |
| # Document Chat Tab | |
| with gr.TabItem("๐ Document Chat"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| doc_files = gr.File(label="Upload Documents", file_count="multiple") | |
| chunk_size = gr.Slider( | |
| minimum=100, | |
| maximum=500, | |
| value=CHUNK_SIZE_DEFAULT, | |
| label="Chunk Size" | |
| ) | |
| chunk_type = gr.Dropdown( | |
| choices=["token_limit", "char_limit", "sentence", "paragraph"], | |
| value="token_limit", | |
| label="Chunking Method" | |
| ) | |
| process_docs_btn = gr.Button("Process Documents") | |
| process_status = gr.Textbox(label="Status") | |
| with gr.Column(): | |
| chatbot = gr.Chatbot(label="Document Chat") | |
| msg = gr.Textbox(label="Ask a question") | |
| send_btn = gr.Button("Send") | |
| # Call Analysis Tab | |
| with gr.TabItem("๐๏ธ Call Analysis"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| call_upload = gr.Video(label="Upload Call Recording") | |
| analyze_btn = gr.Button("Analyze Call") | |
| with gr.Column(): | |
| with gr.Tabs(): | |
| with gr.TabItem("๐ Transcript"): | |
| transcript = gr.Textbox(label="Transcript", lines=10) | |
| with gr.TabItem("๐ก Insights"): | |
| insights = gr.Textbox(label="Key Insights", lines=10) | |
| with gr.TabItem("๐ Audio"): | |
| audio_output = gr.Audio(label="Extracted Audio") | |
| # Video Search Tab | |
| with gr.TabItem("๐ฅ Video Search"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| video_upload = gr.File(label="Upload Video") | |
| process_video_btn = gr.Button("Process Video") | |
| video_status = gr.Textbox(label="Processing Status") | |
| search_type = gr.Radio( | |
| choices=["Text", "Image"], | |
| label="Search Type", | |
| value="Text" | |
| ) | |
| text_input = gr.Textbox(label="Text Query") | |
| image_input = gr.Image(label="Image Query", type="pil", visible=False) | |
| search_btn = gr.Button("Search") | |
| with gr.Column(): | |
| results_gallery = gr.Gallery(label="Search Results") | |
| # Event Handlers | |
| def document_chat(message, chat_history): | |
| bot_message = DocumentProcessor.get_document_answer(message) | |
| chat_history.append((message, bot_message)) | |
| return "", chat_history | |
| def update_search_type(choice): | |
| return { | |
| text_input: gr.update(visible=choice=="Text"), | |
| image_input: gr.update(visible=choice=="Image") | |
| } | |
| # Connect Events | |
| process_docs_btn.click( | |
| DocumentProcessor.process_documents, | |
| inputs=[doc_files, chunk_size, chunk_type], | |
| outputs=[process_status] | |
| ) | |
| send_btn.click( | |
| document_chat, | |
| inputs=[msg, chatbot], | |
| outputs=[msg, chatbot] | |
| ) | |
| analyze_btn.click( | |
| CallAnalyzer.process_call, | |
| inputs=[call_upload], | |
| outputs=[transcript, audio_output, insights] | |
| ) | |
| process_video_btn.click( | |
| VideoSearcher.process_video, | |
| inputs=[video_upload], | |
| outputs=[video_status] | |
| ) | |
| search_type.change( | |
| update_search_type, | |
| search_type, | |
| [text_input, image_input] | |
| ) | |
| search_btn.click( | |
| VideoSearcher.search_video, | |
| inputs=[search_type, text_input, image_input], | |
| outputs=[results_gallery] | |
| ) | |
| # Related Pixeltable Spaces | |
| gr.Markdown("## ๐ Explore More Pixeltable Apps") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML( | |
| """ | |
| <div style="border: 1px solid #ddd; padding: 15px; border-radius: 8px; margin-bottom: 10px;"> | |
| <h3>๐ Document & Text Processing</h3> | |
| <ul style="list-style-type: none; padding-left: 0;"> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ค Multi-LLM RAG Comparison | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/Document-to-Audio-Synthesis" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ Document to Audio Synthesis | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/Prompt-Engineering-and-LLM-Studio" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ก Prompt Engineering Studio | |
| </a> | |
| </li> | |
| </ul> | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(): | |
| gr.HTML( | |
| """ | |
| <div style="border: 1px solid #ddd; padding: 15px; border-radius: 8px; margin-bottom: 10px;"> | |
| <h3>๐ฅ Video & Audio Processing</h3> | |
| <ul style="list-style-type: none; padding-left: 0;"> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/AI-Video-Analyzer-GTP4-Vision-TTS-Narration" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ฅ Video GPT Vision & TTS Narration | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/Call-Analysis-AI-Tool" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐๏ธ Call Analysis Tool | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/object-detection-in-videos-with-yolox" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ Video Object Detection | |
| </a> | |
| </li> | |
| </ul> | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(): | |
| gr.HTML( | |
| """ | |
| <div style="border: 1px solid #ddd; padding: 15px; border-radius: 8px; margin-bottom: 10px;"> | |
| <h3>๐ฎ Interactive Applications</h3> | |
| <ul style="list-style-type: none; padding-left: 0;"> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/AI-RPG-Adventure" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ฒ AI RPG Adventure | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/AI-Financial-Analysis-Platform" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ Financial Analysis Platform | |
| </a> | |
| </li> | |
| <li style="margin-bottom: 10px;"> | |
| <a href="https://huggingface.co/spaces/Pixeltable/video-to-social-media-post-generator" target="_blank" style="color: #F25022; text-decoration: none;"> | |
| ๐ฑ Social Media Post Generator | |
| </a> | |
| </li> | |
| </ul> | |
| </div> | |
| """ | |
| ) | |
| gr.HTML( | |
| """ | |
| <div style="margin-top: 2rem; padding-top: 1rem; border-top: 1px solid #e5e7eb;"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; flex-wrap: wrap; gap: 1rem;"> | |
| <div style="flex: 1;"> | |
| <h4 style="margin: 0; color: #374151;">๐ Built with Pixeltable</h4> | |
| <p style="margin: 0.5rem 0; color: #6b7280;"> | |
| Open Source AI Data infrastructure. | |
| </p> | |
| </div> | |
| <div style="flex: 1;"> | |
| <h4 style="margin: 0; color: #374151;">๐ Resources</h4> | |
| <div style="display: flex; gap: 1.5rem; margin-top: 0.5rem;"> | |
| <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #4F46E5; text-decoration: none;"> | |
| ๐ป GitHub | |
| </a> | |
| <a href="https://docs.pixeltable.com" target="_blank" style="color: #4F46E5; text-decoration: none;"> | |
| ๐ Documentation | |
| </a> | |
| <a href="https://huggingface.co/Pixeltable" target="_blank" style="color: #4F46E5; text-decoration: none;"> | |
| ๐ค Hugging Face | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| <p style="margin: 1rem 0 0; text-align: center; color: #9CA3AF; font-size: 0.875rem;"> | |
| ยฉ 2024 Pixeltable | Apache License 2.0 | |
| </p> | |
| </div> | |
| """ | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| init_api_keys() | |
| demo = create_interface() | |
| demo.launch( | |
| allowed_paths=[PIXELTABLE_MEDIA_DIR], | |
| show_api=False | |
| ) |