Spaces:

dzianisBY
/

YouTube_Creator_MetaData

Paused

App Files Files Community

@woai commited on Jun 2, 2025

Commit

e775565

1 Parent(s): 81339cb

🧹 Major code cleanup and internationalization - Remove Russian comments/strings, translate UI to English, clean linter errors, remove hardcoded tokens, delete test files. Ready for production deployment

Browse files

Files changed (13) hide show

README.md +16 -8
api_server.py +36 -36
cloudflare-config.yml +20 -0
deploy_changes.py +0 -53
gemini_helper.py +155 -93
gradio_app.py +42 -42
mcp_handlers.py +239 -93
run_telegram_bot.py +1 -1
telegram_bot.py +724 -139
update_server.bat +0 -43
update_server.ps1 +0 -54
update_server.sh +0 -42
update_tunnel.py +0 -56

README.md CHANGED Viewed

@@ -148,14 +148,22 @@ POST /api/gemini_timecodes
 ## 📁 Project Structure
 ```
-├── app.py               # Main Gradio application (HF Spaces entry point)
-├── api_server.py        # FastAPI backend server
-├── gemini_helper.py     # Gemini AI integration
-├── utils.py             # Utility functions
-├── models.py            # Data models
-├── mcp_handlers.py      # Model Context Protocol handlers
-├── requirements.txt     # Python dependencies
-└── README.md           # This file
 ```
 ## 🔬 Technology Stack

 ## 📁 Project Structure
 ```
+├── main.py                     # Unified launcher (API/UI/both modes)
+├── run_telegram_bot.py         # Telegram bot launcher
+├── api_server.py              # FastAPI backend server
+├── telegram_bot.py            # Telegram bot implementation
+├── mcp_handlers.py            # Model Context Protocol handlers
+├── gemini_helper.py           # Gemini AI integration
+├── utils.py                   # Utility functions
+├── models.py                  # Data models
+├── app.py                     # Gradio app (HF Spaces entry point)
+├── gradio_app.py              # Extended Gradio interface
+├── requirements.txt           # Python dependencies
+├── telegram_requirements.txt  # Telegram bot dependencies
+├── cloudflare-config.yml      # Cloudflare tunnel configuration
+├── TUNNEL_SOLUTIONS.md        # Tunnel troubleshooting guide
+├── youtube-content-metagen-agent.ipynb  # Kaggle reference notebook
+└── README.md                  # This file
 ```
 ## 🔬 Technology Stack

api_server.py CHANGED Viewed

@@ -14,10 +14,10 @@ from utils import format_timestamp, extract_video_id
 from models import MCPResponse
 import re
-# Загрузка переменных окружения
 load_dotenv()
-# Получение API ключа YouTube из переменных окружения
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 app = FastAPI(
@@ -26,7 +26,7 @@ app = FastAPI(
     version="0.1.0",
 )
-# Настройка CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -35,7 +35,7 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Инициализация YouTube API клиента
 def get_youtube_client():
     if not YOUTUBE_API_KEY:
         raise HTTPException(status_code=500, detail="YouTube API key is not configured")
@@ -45,7 +45,7 @@ def get_youtube_client():
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"YouTube API initialization error: {str(e)}")
-# Базовые модели данных для стандартных API запросов
 class SearchRequest(BaseModel):
     query: str
     max_results: Optional[int] = 10
@@ -63,28 +63,28 @@ class MCPRequestData(BaseModel):
     action: str
     parameters: Dict[str, Any]
-# Добавим новый маршрут для получения доступных языков транскрипта
 class TranscriptLanguagesRequest(BaseModel):
     video_id: str
-# Модель для запроса тайм-кодов
 class TimecodeRequest(BaseModel):
     video_id: str
     language_code: Optional[str] = None
-    segment_length: Optional[int] = 60  # Длина сегмента в секундах
     format: Optional[str] = "youtube"  # youtube, markdown
-# Загрузим модуль gemini_helper только после определения базовых моделей
 from gemini_helper import generate_timecodes_with_gemini, DEFAULT_MODEL
-# Модель для запроса тайм-кодов с помощью Gemini
 class GeminiTimecodeRequest(BaseModel):
     video_id: str
     language_code: Optional[str] = None
     format: Optional[str] = "youtube"  # youtube, markdown
-    model: Optional[str] = DEFAULT_MODEL  # модель Gemini (если None, используется модель по умолчанию)
-# Теперь можно загрузить mcp_handlers
 from mcp_handlers import (
     MCPQueryRequest,
     MCPVideoRequest,
@@ -113,7 +113,7 @@ def normalize_language_code(language_code: str) -> str:
     return language_code
-# Стандартные API маршруты
 @app.post("/api/search")
 async def search_videos(request: SearchRequest):
     try:
@@ -150,7 +150,7 @@ async def search_videos(request: SearchRequest):
 @app.post("/api/video_info")
 async def get_video_info(request: VideoInfoRequest):
     try:
-        # Извлекаем ID видео из ссылки, если это ссылка
         video_id = extract_video_id(request.video_id)
         youtube = get_youtube_client()
@@ -257,7 +257,7 @@ async def get_transcript(request: TranscriptRequest):
 @app.post("/api/transcript_languages")
 async def get_transcript_languages(request: TranscriptLanguagesRequest):
     try:
-        # Извлекаем ID видео из ссылки, если это ссылка
         video_id = extract_video_id(request.video_id)
         try:
@@ -278,7 +278,7 @@ async def get_transcript_languages(request: TranscriptLanguagesRequest):
     except Exception as e:
         return {"error": f"Error getting language list: {str(e)}"}
-# MCP эндпоинты
 @app.post("/api/mcp")
 async def mcp_endpoint(request: MCPRequestData):
     try:
@@ -309,12 +309,12 @@ async def mcp_endpoint(request: MCPRequestData):
     except Exception as e:
         return create_error_response(f"Error processing request: {str(e)}")
-# Маршрут для проверки здоровья сервера
 @app.get("/health")
 async def health_check():
     return {"status": "ok"}
-# Информационный маршрут, описывающий возможности API
 @app.get("/")
 async def root():
     return {
@@ -341,11 +341,11 @@ async def root():
 @app.post("/api/timecodes")
 async def generate_timecodes(request: TimecodeRequest):
     try:
-        # Извлекаем ID видео из ссылки, если это ссылка
         video_id = extract_video_id(request.video_id)
         print(f"Generating timecodes for ID: {video_id}")
-        # Пытаемся получить список доступных языков
         available_languages = []
         try:
             transcript_list_obj = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -359,11 +359,11 @@ async def generate_timecodes(request: TimecodeRequest):
         except Exception as e:
             print(f"Failed to get language list: {str(e)}")
-        # Получаем транскрипт
         transcript_list = None
         used_language = None
-        # Если указан язык, пробуем его использовать
         if request.language_code:
             try:
                 print(f"Trying to get transcript in language: {request.language_code}")
@@ -373,7 +373,7 @@ async def generate_timecodes(request: TimecodeRequest):
             except Exception as e:
                 print(f"Failed to get transcript in language {request.language_code}: {str(e)}")
-        # Если транскрипт не получен и есть доступные языки, используем первый доступный
         if not transcript_list and available_languages:
             try:
                 first_language = available_languages[0]["language_code"]
@@ -384,7 +384,7 @@ async def generate_timecodes(request: TimecodeRequest):
             except Exception as e:
                 print(f"Failed to get transcript in language {first_language}: {str(e)}")
-        # Если все еще нет транскрипта, пробуем получить на любом языке
         if not transcript_list:
             try:
                 print("Trying to get transcript in any available language")
@@ -396,7 +396,7 @@ async def generate_timecodes(request: TimecodeRequest):
         if not transcript_list:
             return {"error": "Transcript for this video is unavailable"}
-        # Группируем транскрипт по сегментам
         segments = []
         current_segment = {
             "start": transcript_list[0]["start"],
@@ -409,12 +409,12 @@ async def generate_timecodes(request: TimecodeRequest):
         for entry in transcript_list:
             start_time = entry["start"]
-            # Если текущий сегмент пустой или запись находится в пределах длины сегмента
             if not current_segment["text"] or (start_time - current_segment["start"]) <= segment_length:
                 current_segment["text"].append(entry["text"])
                 current_segment["end"] = start_time + entry["duration"]
             else:
-                # Закрываем текущий сегмент и начинаем новый
                 segments.append(dict(current_segment))
                 current_segment = {
                     "start": start_time,
@@ -422,31 +422,31 @@ async def generate_timecodes(request: TimecodeRequest):
                     "text": [entry["text"]]
                 }
-        # Добавляем последний сегмент
         if current_segment["text"]:
             segments.append(current_segment)
-        # Форматируем тайм-коды в соответствии с выбранным форматом
         format_type = request.format.lower()
         timecodes = []
         for segment in segments:
             start_formatted = format_timestamp(segment["start"])
-            # Суммарный текст сегмента (первые 100 символов)
             text_summary = " ".join(segment["text"])
             if len(text_summary) > 100:
                 text_summary = text_summary[:97] + "..."
             if format_type == "youtube":
-                # Формат для YouTube (для вставки в описание)
                 timecodes.append(f"{start_formatted} {text_summary}")
             elif format_type == "markdown":
-                # Формат для Markdown
                 youtube_link = f"https://www.youtube.com/watch?v={video_id}&t={int(segment['start'])}"
                 timecodes.append(f"- [{start_formatted}]({youtube_link}) {text_summary}")
-        # Возвращаем тайм-коды и дополнительную информацию
         response = {
             "content": {
                 "video_id": video_id,
@@ -519,7 +519,7 @@ async def generate_gemini_timecodes(request: GeminiTimecodeRequest):
         if not transcript_list:
             return {"error": "Transcript for this video is unavailable"}
-        # Получаем информацию о видео для заголовка
         youtube = get_youtube_client()
         video_title = "YouTube Video"
@@ -534,7 +534,7 @@ async def generate_gemini_timecodes(request: GeminiTimecodeRequest):
         except Exception as e:
             print(f"Failed to get video information: {str(e)}")
-        # Отправляем запрос в Gemini с указанием языка
         result = await generate_timecodes_with_gemini(
             transcript_entries=transcript_list,
             video_title=video_title,
@@ -546,7 +546,7 @@ async def generate_gemini_timecodes(request: GeminiTimecodeRequest):
         if "error" in result:
             return {"error": result["error"]}
-        # Добавляем информацию о языке транскрипта
         if used_language:
             result["used_language"] = used_language

 from models import MCPResponse
 import re
+# Load environment variables
 load_dotenv()
+# Get YouTube API key from environment variables
 YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
 app = FastAPI(
     version="0.1.0",
 )
+# Configure CORS
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Initialize YouTube API client
 def get_youtube_client():
     if not YOUTUBE_API_KEY:
         raise HTTPException(status_code=500, detail="YouTube API key is not configured")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"YouTube API initialization error: {str(e)}")
+# Base data models for standard API requests
 class SearchRequest(BaseModel):
     query: str
     max_results: Optional[int] = 10
     action: str
     parameters: Dict[str, Any]
+# Add new endpoint for getting available transcript languages
 class TranscriptLanguagesRequest(BaseModel):
     video_id: str
+# Model for timecode requests
 class TimecodeRequest(BaseModel):
     video_id: str
     language_code: Optional[str] = None
+    segment_length: Optional[int] = 60  # Segment length in seconds
     format: Optional[str] = "youtube"  # youtube, markdown
+# Load gemini_helper module only after defining base models
 from gemini_helper import generate_timecodes_with_gemini, DEFAULT_MODEL
+# Model for Gemini timecode requests
 class GeminiTimecodeRequest(BaseModel):
     video_id: str
     language_code: Optional[str] = None
     format: Optional[str] = "youtube"  # youtube, markdown
+    model: Optional[str] = DEFAULT_MODEL  # Gemini model (if None, uses default model)
+# Now we can load mcp_handlers
 from mcp_handlers import (
     MCPQueryRequest,
     MCPVideoRequest,
     return language_code
+# Standard API routes
 @app.post("/api/search")
 async def search_videos(request: SearchRequest):
     try:
 @app.post("/api/video_info")
 async def get_video_info(request: VideoInfoRequest):
     try:
+        # Extract video ID from URL if it's a URL
         video_id = extract_video_id(request.video_id)
         youtube = get_youtube_client()
 @app.post("/api/transcript_languages")
 async def get_transcript_languages(request: TranscriptLanguagesRequest):
     try:
+        # Extract video ID from URL if it's a URL
         video_id = extract_video_id(request.video_id)
         try:
     except Exception as e:
         return {"error": f"Error getting language list: {str(e)}"}
+# MCP endpoints
 @app.post("/api/mcp")
 async def mcp_endpoint(request: MCPRequestData):
     try:
     except Exception as e:
         return create_error_response(f"Error processing request: {str(e)}")
+# Route for health check
 @app.get("/health")
 async def health_check():
     return {"status": "ok"}
+# Information route, describing API capabilities
 @app.get("/")
 async def root():
     return {
 @app.post("/api/timecodes")
 async def generate_timecodes(request: TimecodeRequest):
     try:
+        # Extract video ID from URL if it's a URL
         video_id = extract_video_id(request.video_id)
         print(f"Generating timecodes for ID: {video_id}")
+        # Try to get list of available languages
         available_languages = []
         try:
             transcript_list_obj = YouTubeTranscriptApi.list_transcripts(video_id)
         except Exception as e:
             print(f"Failed to get language list: {str(e)}")
+        # Get transcript
         transcript_list = None
         used_language = None
+        # If language is specified, try to use it
         if request.language_code:
             try:
                 print(f"Trying to get transcript in language: {request.language_code}")
             except Exception as e:
                 print(f"Failed to get transcript in language {request.language_code}: {str(e)}")
+        # If transcript not obtained and there are available languages, use first available
         if not transcript_list and available_languages:
             try:
                 first_language = available_languages[0]["language_code"]
             except Exception as e:
                 print(f"Failed to get transcript in language {first_language}: {str(e)}")
+        # If still no transcript, try to get on any language
         if not transcript_list:
             try:
                 print("Trying to get transcript in any available language")
         if not transcript_list:
             return {"error": "Transcript for this video is unavailable"}
+        # Group transcript by segments
         segments = []
         current_segment = {
             "start": transcript_list[0]["start"],
         for entry in transcript_list:
             start_time = entry["start"]
+            # If current segment is empty or entry is within segment length
             if not current_segment["text"] or (start_time - current_segment["start"]) <= segment_length:
                 current_segment["text"].append(entry["text"])
                 current_segment["end"] = start_time + entry["duration"]
             else:
+                # Close current segment and start new
                 segments.append(dict(current_segment))
                 current_segment = {
                     "start": start_time,
                     "text": [entry["text"]]
                 }
+        # Add last segment
         if current_segment["text"]:
             segments.append(current_segment)
+        # Format timecodes according to selected format
         format_type = request.format.lower()
         timecodes = []
         for segment in segments:
             start_formatted = format_timestamp(segment["start"])
+            # Summary text of segment (first 100 characters)
             text_summary = " ".join(segment["text"])
             if len(text_summary) > 100:
                 text_summary = text_summary[:97] + "..."
             if format_type == "youtube":
+                # Format for YouTube (for embedding in description)
                 timecodes.append(f"{start_formatted} {text_summary}")
             elif format_type == "markdown":
+                # Format for Markdown
                 youtube_link = f"https://www.youtube.com/watch?v={video_id}&t={int(segment['start'])}"
                 timecodes.append(f"- [{start_formatted}]({youtube_link}) {text_summary}")
+        # Return timecodes and additional information
         response = {
             "content": {
                 "video_id": video_id,
         if not transcript_list:
             return {"error": "Transcript for this video is unavailable"}
+        # Get video information for video title
         youtube = get_youtube_client()
         video_title = "YouTube Video"
         except Exception as e:
             print(f"Failed to get video information: {str(e)}")
+        # Send request to Gemini with language specified
         result = await generate_timecodes_with_gemini(
             transcript_entries=transcript_list,
             video_title=video_title,
         if "error" in result:
             return {"error": result["error"]}
+        # Add transcript language information
         if used_language:
             result["used_language"] = used_language

cloudflare-config.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+tunnel: 3b196473-a231-4909-b8dd-0a1e34e2a9c5
+credentials-file: C:\Users\belov\.cloudflared\3b196473-a231-4909-b8dd-0a1e34e2a9c5.json
+# Protocol and connection settings
+protocol: http2
+no-autoupdate: true
+grace-period: 30s
+retries: 5
+loglevel: info
+ingress:
+  - hostname: youtube-bot.tuttech.net
+    service: http://127.0.0.1:8080
+    originRequest:
+      noTLSVerify: true
+      connectTimeout: 30s
+      tlsTimeout: 10s
+      keepAliveTimeout: 90s
+      keepAliveConnections: 10
+  - service: http_status:404

deploy_changes.py DELETED Viewed

@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-"""
-Скрипт для быстрого деплоя изменений
-Usage: python deploy_changes.py "commit message"
-"""
-import sys
-import subprocess
-import os
-def run_command(command, description):
-    """Выполнить команду с описанием"""
-    print(f"🔄 {description}...")
-    try:
-        result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
-        print(f"✅ {description} - успешно")
-        return True
-    except subprocess.CalledProcessError as e:
-        print(f"❌ {description} - ошибка: {e.stderr}")
-        return False
-def main():
-    if len(sys.argv) != 2:
-        print("Usage: python deploy_changes.py \"commit message\"")
-        print("Example: python deploy_changes.py \"Fix telegram bot MCP parameters\"")
-        sys.exit(1)
-    commit_message = sys.argv[1]
-    print("🚀 Начинаем деплой изменений...")
-    # 1. Git add
-    if not run_command("git add .", "Добавление файлов в git"):
-        return
-    # 2. Git commit
-    if not run_command(f'git commit -m "{commit_message}"', "Создание коммита"):
-        return
-    # 3. Git push
-    if not run_command("git push", "Отправка в удаленный репозиторий"):
-        return
-    print("\n✅ Изменения успешно отправлены!")
-    print("\n📋 Следующие шаги на удаленном сервере:")
-    print("1. git pull")
-    print("2. Перезапустить MCP сервер: python main.py --mode api --host 0.0.0.0 --port 8080")
-    print("3. Перезапустить Telegram бота: python run_telegram_bot.py")
-    print("\n🔗 Или используйте команду:")
-    print("ssh your-server 'cd /path/to/project && git pull && pkill -f main.py && python main.py --mode api --host 0.0.0.0 --port 8080 &'")
-if __name__ == "__main__":
-    main()

gemini_helper.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from google import genai
 from google.genai import types
 from dotenv import load_dotenv
 from typing import List, Dict, Any, Optional
 import traceback
@@ -18,6 +19,24 @@ if GEMINI_API_KEY:
     try:
         client = genai.Client(api_key=GEMINI_API_KEY)
         print("Gemini client successfully initialized")
     except Exception as e:
         print(f"Error initializing Gemini client: {str(e)}")
         traceback.print_exc()
@@ -32,27 +51,52 @@ ALTERNATIVE_MODELS = ["gemini-1.5-flash-001"]
 def format_transcript_for_prompt(transcript_entries: List[Dict[str, Any]], video_duration_seconds: int = None) -> str:
     """Formats transcript for passing to prompt."""
     formatted_transcript = ""
     # Determine maximum time in transcript if video duration is not provided
     if video_duration_seconds is None:
         if transcript_entries:
             last_entry = transcript_entries[-1]
-            max_time = last_entry.get("start", 0) + last_entry.get("duration", 0)
             video_duration_seconds = int(max_time) + 10  # Add small buffer
-    for entry in transcript_entries:
-        start_time = entry.get("start", 0)
-        text = entry.get("text", "")
         # Check that time doesn't exceed total video duration
         if video_duration_seconds and start_time > video_duration_seconds:
             continue
         # Format time in hours:minutes:seconds format
         time_str = format_time_hms(start_time)
         formatted_transcript += f"[{time_str}] {text}\n"
     return formatted_transcript
 def format_time_hms(seconds: float) -> str:
@@ -63,13 +107,13 @@ def format_time_hms(seconds: float) -> str:
     hours = int(seconds // 3600)
     minutes = int((seconds % 3600) // 60)
     secs = int(seconds % 60)
     if hours > 0:
         return f"{hours:02d}:{minutes:02d}:{secs:02d}"
     else:
         return f"{minutes:02d}:{secs:02d}"
-def get_timecode_prompt(video_title: str, transcript: str, format_type: str = "youtube", language: str = None, video_duration_minutes: int = None) -> str:
     """Creates prompt for generating timecodes based on transcript."""
     # Determine prompt language based on video language
@@ -78,64 +122,33 @@ def get_timecode_prompt(video_title: str, transcript: str, format_type: str = "y
         example_description = "Discussion of main principles"
     elif language and language.lower().startswith('ru'):
         target_language = "Russian"
-        example_description = "Обсуждение основных принципов"
     else:
         target_language = "the same language as the video transcript"
         example_description = "Discussion of main principles"
-    # Determine number of timecodes based on video duration
-    if video_duration_minutes:
-        if video_duration_minutes <= 30:
-            timecode_count = "10-15"
-        elif video_duration_minutes <= 60:
-            timecode_count = "15-20"
-        else:
-            timecode_count = "20-30"
-    else:
-        timecode_count = "15-25"
-    if format_type == "youtube":
-        format_instructions = (
-            f"Format should be: MM:SS Topic description for videos under 1 hour, or HH:MM:SS Topic description for longer videos\n"
-            f"Example: 05:30 {example_description} or 1:05:30 {example_description}\n"
-            f"This format is suitable for YouTube video descriptions."
-        )
-    elif format_type == "markdown":
-        format_instructions = (
-            f"Format should be Markdown: - [MM:SS](link) Topic description for videos under 1 hour, or - [HH:MM:SS](link) Topic description for longer videos\n"
-            f"Example: - [05:30](https://youtu.be/VIDEOID?t=330) {example_description} or - [1:05:30](https://youtu.be/VIDEOID?t=3930) {example_description}\n"
-            f"This format creates clickable links in Markdown."
-        )
-    else:  # txt
-        format_instructions = (
-            f"Format should be: MM:SS - Topic description for videos under 1 hour, or HH:MM:SS - Topic description for longer videos\n"
-            f"Example: 05:30 - {example_description} or 1:05:30 - {example_description}\n"
-            f"This format is suitable for plain text representation."
-        )
     prompt = f"""
-    You are an expert at creating timestamps for YouTube videos. You have been provided with a transcript of the video "{video_title}".
-    Your task is to create timestamps for the main themes and segments of the video based on the provided transcript.
-    Create timestamp descriptions in {target_language}.
-    {format_instructions}
-    Rules for creating timestamps:
-    1. Select {timecode_count} key video segments
-    2. Use the time markers provided in the transcript to determine the start of each segment
-    3. Create brief (3-7 words) descriptions for each segment that reflect its main theme, using appropriate terminology and style
-    4. Distribute timestamps approximately evenly throughout the video length
-    5. Use MM:SS format for videos under 1 hour (example: 05:30, 45:20), and HH:MM:SS format for videos 1 hour or longer (example: 1:05:30, 1:45:20)
-    6. DO NOT include standard markers like "Video start" or "Video end"
-    7. Ensure a clear structure so viewers can easily navigate through the video
-    8. The first timestamp does NOT have to be 00:00, start with the first meaningful topic
-    Here is the video transcript:
-    {transcript}
-    Create a list of timestamps in the specified format. Reply with ONLY the list of timestamps, without introduction or conclusion.
     """
     return prompt
@@ -149,14 +162,14 @@ async def generate_timecodes_with_gemini(
 ) -> Dict[str, Any]:
     """
     Generates timecodes using Gemini based on transcript.
     Args:
         transcript_entries: List of transcript entries
         video_title: Video title
         format_type: Timecode format (youtube, markdown)
         model_name: Gemini model name (defaults to DEFAULT_MODEL)
         language: Transcript language (if known)
     Returns:
         Dictionary with generation results
     """
@@ -164,16 +177,24 @@ async def generate_timecodes_with_gemini(
         return {
             "error": "Gemini API key is not configured. Please add GEMINI_API_KEY to .env file"
         }
     try:
         print(f"Starting timecode generation with model: {model_name or DEFAULT_MODEL}")
         # Determine transcript language if not provided
         detected_language = language
         if not detected_language:
             # Simple heuristic for language detection from first 10 segments
-            text_sample = " ".join([entry.get("text", "") for entry in transcript_entries[:10]])
             # Set of Ukrainian letters that differ from Russian alphabet
             ukrainian_specific = set("ґєії")
             # If there's at least one specific Ukrainian letter
@@ -187,33 +208,76 @@ async def generate_timecodes_with_gemini(
             else:
                 detected_language = "en"
                 print("Detected transcript language: English (or other)")
         # Determine video duration (in seconds and minutes)
         video_duration_seconds = 0
         if transcript_entries:
             last_entry = transcript_entries[-1]
-            video_duration_seconds = last_entry.get("start", 0) + last_entry.get("duration", 0)
             video_duration_minutes = int(video_duration_seconds / 60)
             print(f"Determined video duration: {video_duration_minutes} minutes ({video_duration_seconds} seconds)")
         else:
             video_duration_minutes = None
         # Format transcript for prompt
         formatted_transcript = format_transcript_for_prompt(transcript_entries, video_duration_seconds)
         # Create prompt considering language and duration
         prompt = get_timecode_prompt(
-            video_title,
-            formatted_transcript,
-            format_type,
-            detected_language,
-            video_duration_minutes
         )
         print(f"Prompt prepared, length: {len(prompt)} characters")
         # List of models to try
         models_to_try = [model_name or DEFAULT_MODEL] + [m for m in ALTERNATIVE_MODELS if m != (model_name or DEFAULT_MODEL)]
         last_error = None
         for current_model in models_to_try:
             try:
@@ -228,14 +292,14 @@ async def generate_timecodes_with_gemini(
                     )
                 )
                 print(f"Response received: {type(response)}")
                 # Get response text
                 timecodes_text = response.text
                 print(f"Response text length: {len(timecodes_text)}")
                 # Split into lines and clean
                 timecodes = [line.strip() for line in timecodes_text.split('\n') if line.strip()]
                 # Filter timecodes to remove "video start" and "video end"
                 filtered_timecodes = []
                 for tc in timecodes:
@@ -246,15 +310,13 @@ async def generate_timecodes_with_gemini(
                         # Skip timecodes with "video start" or "video end"
                         lowercase_desc = description.lower()
                         if any(phrase in lowercase_desc for phrase in [
-                            "начало видео", "конец видео", "початок відео", "кінець відео",
-                            "start of video", "end of video", "video start", "video end",
                             "beginning", "conclusion", "intro", "outro"
                         ]):
                             continue
                     filtered_timecodes.append(tc)
                 # If too many timecodes, select evenly distributed ones
-                max_timecodes = 25  # Maximum recommended number of timecodes
                 if len(filtered_timecodes) > max_timecodes:
                     print(f"Too many timecodes ({len(filtered_timecodes)}), reducing to {max_timecodes}")
                     # Calculate step for selecting timecodes evenly
@@ -268,9 +330,9 @@ async def generate_timecodes_with_gemini(
                     final_timecodes = [filtered_timecodes[i] for i in indices]
                 else:
                     final_timecodes = filtered_timecodes
                 print(f"Final timecodes count after processing: {len(final_timecodes)}")
                 return {
                     "timecodes": final_timecodes,
                     "format": format_type,
@@ -284,7 +346,7 @@ async def generate_timecodes_with_gemini(
                 traceback.print_exc()
                 last_error = api_error
                 continue
         # If all models failed
         return {
             "error": f"Failed to execute request with any model. Last error: {str(last_error)}"
@@ -294,4 +356,4 @@ async def generate_timecodes_with_gemini(
         traceback.print_exc()
         return {
             "error": f"Error generating timecodes with Gemini: {str(e)}"
-        }

 import os
 from google import genai
 from google.genai import types
+from google.api_core import retry
 from dotenv import load_dotenv
 from typing import List, Dict, Any, Optional
 import traceback
     try:
         client = genai.Client(api_key=GEMINI_API_KEY)
         print("Gemini client successfully initialized")
+        # Configure retry logic for API errors
+        def is_retriable(e):
+            return (isinstance(e, Exception) and
+                   (hasattr(e, 'code') and e.code in {429, 503}))
+        # Apply retry to generate_content method
+        if hasattr(client.aio.models, 'generate_content'):
+            original_method = client.aio.models.generate_content
+            client.aio.models.generate_content = retry.Retry(
+                predicate=is_retriable,
+                initial=1.0,  # Initial delay in seconds
+                maximum=60.0,  # Maximum delay in seconds
+                multiplier=2.0,  # Backoff multiplier
+                deadline=300.0  # Total timeout in seconds
+            )(original_method)
+            print("Retry logic configured for Gemini API")
     except Exception as e:
         print(f"Error initializing Gemini client: {str(e)}")
         traceback.print_exc()
 def format_transcript_for_prompt(transcript_entries: List[Dict[str, Any]], video_duration_seconds: int = None) -> str:
     """Formats transcript for passing to prompt."""
     formatted_transcript = ""
     # Determine maximum time in transcript if video duration is not provided
     if video_duration_seconds is None:
         if transcript_entries:
             last_entry = transcript_entries[-1]
+            # Handle both dict format and FetchedTranscriptSnippet objects
+            if hasattr(last_entry, 'start'):  # FetchedTranscriptSnippet object
+                max_time = last_entry.start + last_entry.duration
+            elif isinstance(last_entry, dict):  # Dict format
+                max_time = last_entry.get("start", 0) + last_entry.get("duration", 0)
+            else:
+                max_time = 0
             video_duration_seconds = int(max_time) + 10  # Add small buffer
+    # For very long videos (>60 min), sample transcript to ensure full coverage
+    if video_duration_seconds and video_duration_seconds > 3600:  # More than 60 minutes
+        # Sample every 3rd entry to reduce size but maintain coverage
+        sampled_entries = transcript_entries[::3]
+        print(f"Sampled transcript: {len(sampled_entries)} entries from {len(transcript_entries)} total")
+    elif video_duration_seconds and video_duration_seconds > 1800:  # More than 30 minutes
+        # Sample every 2nd entry
+        sampled_entries = transcript_entries[::2]
+        print(f"Sampled transcript: {len(sampled_entries)} entries from {len(transcript_entries)} total")
+    else:
+        sampled_entries = transcript_entries
+    for entry in sampled_entries:
+        # Handle both dict format and FetchedTranscriptSnippet objects
+        if hasattr(entry, 'start'):  # FetchedTranscriptSnippet object
+            start_time = entry.start
+            text = entry.text
+        elif isinstance(entry, dict):  # Dict format
+            start_time = entry.get("start", 0)
+            text = entry.get("text", "")
+        else:
+            continue  # Skip invalid entries
         # Check that time doesn't exceed total video duration
         if video_duration_seconds and start_time > video_duration_seconds:
             continue
         # Format time in hours:minutes:seconds format
         time_str = format_time_hms(start_time)
         formatted_transcript += f"[{time_str}] {text}\n"
     return formatted_transcript
 def format_time_hms(seconds: float) -> str:
     hours = int(seconds // 3600)
     minutes = int((seconds % 3600) // 60)
     secs = int(seconds % 60)
     if hours > 0:
         return f"{hours:02d}:{minutes:02d}:{secs:02d}"
     else:
         return f"{minutes:02d}:{secs:02d}"
+def get_timecode_prompt(video_title: str, transcript: str, format_type: str = "youtube", language: str = None, video_duration_minutes: int = None, timecode_count: str = None, interval_text: str = None) -> str:
     """Creates prompt for generating timecodes based on transcript."""
     # Determine prompt language based on video language
         example_description = "Discussion of main principles"
     elif language and language.lower().startswith('ru'):
         target_language = "Russian"
+        example_description = "Discussion of main principles"
     else:
         target_language = "the same language as the video transcript"
         example_description = "Discussion of main principles"
     prompt = f"""
+You are a YouTube assistant. Analyze the FULL TRANSCRIPT below and identify all major topic shifts or sections.
+Your task:
+- Generate timestamps that cover the ENTIRE {video_duration_minutes}-minute video
+- Each timestamp must be paired with a precise time from the transcript
+- Timestamps must reflect the actual content flow throughout the video
+Format requirements:
+- Plain text output ONLY
+- Each line format: MM:SS Topic description (or HH:MM:SS for longer videos)
+- Use {target_language} for descriptions (3-6 words each)
+- Start with early timestamp (first few minutes)
+- End with late timestamp (last 10-15 minutes of video)
+- NO explanations, NO numbering, NO extra text
+CRITICAL: The transcript below spans {video_duration_minutes} minutes. You MUST create timestamps that span from beginning to end, not just the first portion.
+Full transcript to analyze:
+{transcript}
+Generate {timecode_count} timestamps covering the complete {video_duration_minutes}-minute duration:
     """
     return prompt
 ) -> Dict[str, Any]:
     """
     Generates timecodes using Gemini based on transcript.
     Args:
         transcript_entries: List of transcript entries
         video_title: Video title
         format_type: Timecode format (youtube, markdown)
         model_name: Gemini model name (defaults to DEFAULT_MODEL)
         language: Transcript language (if known)
     Returns:
         Dictionary with generation results
     """
         return {
             "error": "Gemini API key is not configured. Please add GEMINI_API_KEY to .env file"
         }
     try:
         print(f"Starting timecode generation with model: {model_name or DEFAULT_MODEL}")
         # Determine transcript language if not provided
         detected_language = language
         if not detected_language:
             # Simple heuristic for language detection from first 10 segments
+            # Handle both dict format and FetchedTranscriptSnippet objects
+            text_sample_parts = []
+            for entry in transcript_entries[:10]:
+                if hasattr(entry, 'text'):  # FetchedTranscriptSnippet object
+                    text_sample_parts.append(entry.text)
+                elif isinstance(entry, dict):  # Dict format
+                    text_sample_parts.append(entry.get("text", ""))
+            text_sample = " ".join(text_sample_parts)
             # Set of Ukrainian letters that differ from Russian alphabet
             ukrainian_specific = set("ґєії")
             # If there's at least one specific Ukrainian letter
             else:
                 detected_language = "en"
                 print("Detected transcript language: English (or other)")
         # Determine video duration (in seconds and minutes)
         video_duration_seconds = 0
+        max_timecodes = 30  # Default value
         if transcript_entries:
             last_entry = transcript_entries[-1]
+            # Handle both dict format and FetchedTranscriptSnippet objects
+            if hasattr(last_entry, 'start'):  # FetchedTranscriptSnippet object
+                video_duration_seconds = last_entry.start + last_entry.duration
+            elif isinstance(last_entry, dict):  # Dict format
+                video_duration_seconds = last_entry.get("start", 0) + last_entry.get("duration", 0)
             video_duration_minutes = int(video_duration_seconds / 60)
             print(f"Determined video duration: {video_duration_minutes} minutes ({video_duration_seconds} seconds)")
+            # Set max_timecodes based on video duration
+            if video_duration_minutes <= 30:
+                max_timecodes = 20
+            elif video_duration_minutes <= 60:
+                max_timecodes = 35
+            elif video_duration_minutes <= 120:
+                max_timecodes = 50
+            else:
+                max_timecodes = 60
         else:
             video_duration_minutes = None
+        # Determine number of timecodes based on video duration
+        if video_duration_minutes:
+            if video_duration_minutes <= 30:
+                timecode_count = "8-12"
+                max_timecodes = 15
+            elif video_duration_minutes <= 60:
+                timecode_count = "12-18"
+                max_timecodes = 20
+            elif video_duration_minutes <= 120:
+                timecode_count = "18-25"
+                max_timecodes = 30
+            else:
+                timecode_count = "25-35"
+                max_timecodes = 40
+        else:
+            timecode_count = "10-15"
+            max_timecodes = 20
         # Format transcript for prompt
         formatted_transcript = format_transcript_for_prompt(transcript_entries, video_duration_seconds)
         # Create prompt considering language and duration
+        # Calculate recommended interval for timestamps
+        if video_duration_minutes and timecode_count:
+            target_count = int(timecode_count.split('-')[0]) if timecode_count.split('-')[0].isdigit() else 20
+            interval_minutes = video_duration_minutes // target_count
+            interval_text = f"approximately every {interval_minutes}-{interval_minutes + 2} minutes"
+        else:
+            interval_text = "evenly throughout the video"
         prompt = get_timecode_prompt(
+            video_title,
+            formatted_transcript,
+            format_type,
+            detected_language,
+            video_duration_minutes,
+            timecode_count,
+            interval_text
         )
         print(f"Prompt prepared, length: {len(prompt)} characters")
         # List of models to try
         models_to_try = [model_name or DEFAULT_MODEL] + [m for m in ALTERNATIVE_MODELS if m != (model_name or DEFAULT_MODEL)]
         last_error = None
         for current_model in models_to_try:
             try:
                     )
                 )
                 print(f"Response received: {type(response)}")
                 # Get response text
                 timecodes_text = response.text
                 print(f"Response text length: {len(timecodes_text)}")
                 # Split into lines and clean
                 timecodes = [line.strip() for line in timecodes_text.split('\n') if line.strip()]
                 # Filter timecodes to remove "video start" and "video end"
                 filtered_timecodes = []
                 for tc in timecodes:
                         # Skip timecodes with "video start" or "video end"
                         lowercase_desc = description.lower()
                         if any(phrase in lowercase_desc for phrase in [
+                            "video start", "video end", "start of video", "end of video",
                             "beginning", "conclusion", "intro", "outro"
                         ]):
                             continue
                     filtered_timecodes.append(tc)
                 # If too many timecodes, select evenly distributed ones
                 if len(filtered_timecodes) > max_timecodes:
                     print(f"Too many timecodes ({len(filtered_timecodes)}), reducing to {max_timecodes}")
                     # Calculate step for selecting timecodes evenly
                     final_timecodes = [filtered_timecodes[i] for i in indices]
                 else:
                     final_timecodes = filtered_timecodes
                 print(f"Final timecodes count after processing: {len(final_timecodes)}")
                 return {
                     "timecodes": final_timecodes,
                     "format": format_type,
                 traceback.print_exc()
                 last_error = api_error
                 continue
         # If all models failed
         return {
             "error": f"Failed to execute request with any model. Last error: {str(last_error)}"
         traceback.print_exc()
         return {
             "error": f"Error generating timecodes with Gemini: {str(e)}"
+        }

gradio_app.py CHANGED Viewed

@@ -233,27 +233,27 @@ with gr.Blocks(title="YouTube MCP") as demo:
     gr.Markdown("# YouTube Model Context Protocol (MCP)")
     gr.Markdown("This interface allows interaction with YouTube API through MCP protocol")
-    with gr.Tab("Поиск видео"):
         with gr.Row():
             with gr.Column():
-                search_query = gr.Textbox(label="Поисковый запрос", placeholder="Введите запрос...")
                 with gr.Row():
-                    max_results = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Количество результатов")
                     order = gr.Dropdown(
                         choices=["relevance", "date", "viewCount", "rating", "title"],
                         value="relevance",
-                        label="Сортировка"
                     )
                     video_duration = gr.Dropdown(
                         choices=["any", "short", "medium", "long"],
                         value="any",
-                        label="Длительность"
                     )
-                search_button = gr.Button("Поиск")
             with gr.Column():
-                search_results = gr.Markdown(label="Результаты")
-                search_json = gr.JSON(label="JSON данные")
         search_button.click(
             search_youtube,
@@ -261,18 +261,18 @@ with gr.Blocks(title="YouTube MCP") as demo:
             outputs=[search_results, search_json]
         )
-    with gr.Tab("Информация о видео"):
         with gr.Row():
             with gr.Column():
                 video_id_input = gr.Textbox(
-                    label="ID видео или ссылка на видео",
-                    placeholder="Введите ID видео или полную ссылку (youtube.com, youtu.be, shorts, embed)..."
                 )
-                get_info_button = gr.Button("Получить информацию")
             with gr.Column():
-                video_info_output = gr.Markdown(label="Информация о видео")
-                video_info_json = gr.JSON(label="JSON данные")
         get_info_button.click(
             get_video_info,
@@ -280,21 +280,21 @@ with gr.Blocks(title="YouTube MCP") as demo:
             outputs=[video_info_output, video_info_json]
         )
-    with gr.Tab("Транскрипт видео"):
         with gr.Row():
             with gr.Column():
                 transcript_video_id = gr.Textbox(
-                    label="ID видео или ссылка на видео",
-                    placeholder="Введите ID видео или полную ссылку (youtube.com, youtu.be, shorts, embed)..."
                 )
-                language_code = gr.Textbox(label="Код языка (опционально)", placeholder="ru, en, etc...")
                 with gr.Row():
-                    get_transcript_button = gr.Button("Получить транскрипт")
-                    get_languages_button = gr.Button("Получить доступные языки")
             with gr.Column():
-                transcript_output = gr.Markdown(label="Транскрипт")
-                transcript_json = gr.JSON(label="JSON данные")
         get_transcript_button.click(
             get_transcript,
@@ -308,25 +308,25 @@ with gr.Blocks(title="YouTube MCP") as demo:
             outputs=[transcript_output, transcript_json]
         )
-    with gr.Tab("Тайм-коды"):
         with gr.Row():
             with gr.Column():
                 timecode_video_id = gr.Textbox(
-                    label="ID видео или ссылка на видео",
-                    placeholder="Введите ID видео или полную ссылку (youtube.com, youtu.be, shorts, embed)..."
                 )
-                timecode_language = gr.Textbox(label="Код языка (опционально)", placeholder="ru, en, etc...")
-                segment_length = gr.Slider(minimum=30, maximum=300, value=60, step=30, label="Длина сегмента (секунды)")
                 format_type = gr.Dropdown(
                     choices=["youtube", "markdown"],
                     value="youtube",
-                    label="Формат тайм-кодов"
                 )
-                generate_timecodes_button = gr.Button("Сгенерировать тайм-коды")
             with gr.Column():
-                timecodes_output = gr.Markdown(label="Тайм-коды")
-                timecodes_json = gr.JSON(label="JSON данные")
         generate_timecodes_button.click(
             generate_timecodes,
@@ -334,30 +334,30 @@ with gr.Blocks(title="YouTube MCP") as demo:
             outputs=[timecodes_output, timecodes_json]
         )
-    with gr.Tab("Gemini Тайм-коды"):
         with gr.Row():
             with gr.Column():
                 gemini_video_id = gr.Textbox(
-                    label="ID видео или ссылка на видео",
-                    placeholder="Введите ID видео или полную ссылку (youtube.com, youtu.be, shorts, embed)..."
                 )
-                gemini_language = gr.Textbox(label="Код языка (опционально)", placeholder="ru, en, etc...")
                 gemini_format = gr.Dropdown(
                     choices=["youtube", "markdown"],
                     value="youtube",
-                    label="Формат тайм-кодов"
                 )
                 gemini_model = gr.Dropdown(
                     choices=["gemini-2.0-flash-001", "gemini-2.0-pro-001", "gemini-2.0-pro-vision-001"],
                     value="gemini-2.0-flash-001",
-                    label="Модель Gemini"
                 )
-                generate_gemini_button = gr.Button("Сгенерировать тайм-коды с Gemini")
             with gr.Column():
-                gemini_output = gr.Markdown(label="Информация о генерации")
-                gemini_timecodes = gr.Textbox(label="Тайм-коды", lines=10, max_lines=20, show_copy_button=True)
-                gemini_json = gr.JSON(label="JSON данные")
         async def process_gemini_result(video_id, language_code, format_type, model):
             result = await generate_gemini_timecodes(video_id, language_code, format_type, model)
@@ -378,6 +378,6 @@ with gr.Blocks(title="YouTube MCP") as demo:
             outputs=[gemini_output, gemini_timecodes, gemini_json]
         )
-# Запуск приложения
 if __name__ == "__main__":
     demo.launch()

     gr.Markdown("# YouTube Model Context Protocol (MCP)")
     gr.Markdown("This interface allows interaction with YouTube API through MCP protocol")
+    with gr.Tab("Video Search"):
         with gr.Row():
             with gr.Column():
+                search_query = gr.Textbox(label="Search Query", placeholder="Enter search query...")
                 with gr.Row():
+                    max_results = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Number of Results")
                     order = gr.Dropdown(
                         choices=["relevance", "date", "viewCount", "rating", "title"],
                         value="relevance",
+                        label="Sort By"
                     )
                     video_duration = gr.Dropdown(
                         choices=["any", "short", "medium", "long"],
                         value="any",
+                        label="Duration"
                     )
+                search_button = gr.Button("Search")
             with gr.Column():
+                search_results = gr.Markdown(label="Results")
+                search_json = gr.JSON(label="JSON Data")
         search_button.click(
             search_youtube,
             outputs=[search_results, search_json]
         )
+    with gr.Tab("Video Information"):
         with gr.Row():
             with gr.Column():
                 video_id_input = gr.Textbox(
+                    label="Video ID or URL",
+                    placeholder="Enter video ID or full URL (youtube.com, youtu.be, shorts, embed)..."
                 )
+                get_info_button = gr.Button("Get Information")
             with gr.Column():
+                video_info_output = gr.Markdown(label="Video Information")
+                video_info_json = gr.JSON(label="JSON Data")
         get_info_button.click(
             get_video_info,
             outputs=[video_info_output, video_info_json]
         )
+    with gr.Tab("Video Transcript"):
         with gr.Row():
             with gr.Column():
                 transcript_video_id = gr.Textbox(
+                    label="Video ID or URL",
+                    placeholder="Enter video ID or full URL (youtube.com, youtu.be, shorts, embed)..."
                 )
+                language_code = gr.Textbox(label="Language Code (optional)", placeholder="ru, en, etc...")
                 with gr.Row():
+                    get_transcript_button = gr.Button("Get Transcript")
+                    get_languages_button = gr.Button("Get Available Languages")
             with gr.Column():
+                transcript_output = gr.Markdown(label="Transcript")
+                transcript_json = gr.JSON(label="JSON Data")
         get_transcript_button.click(
             get_transcript,
             outputs=[transcript_output, transcript_json]
         )
+    with gr.Tab("Timecodes"):
         with gr.Row():
             with gr.Column():
                 timecode_video_id = gr.Textbox(
+                    label="Video ID or URL",
+                    placeholder="Enter video ID or full URL (youtube.com, youtu.be, shorts, embed)..."
                 )
+                timecode_language = gr.Textbox(label="Language Code (optional)", placeholder="ru, en, etc...")
+                segment_length = gr.Slider(minimum=30, maximum=300, value=60, step=30, label="Segment Length (seconds)")
                 format_type = gr.Dropdown(
                     choices=["youtube", "markdown"],
                     value="youtube",
+                    label="Timecode Format"
                 )
+                generate_timecodes_button = gr.Button("Generate Timecodes")
             with gr.Column():
+                timecodes_output = gr.Markdown(label="Timecodes")
+                timecodes_json = gr.JSON(label="JSON Data")
         generate_timecodes_button.click(
             generate_timecodes,
             outputs=[timecodes_output, timecodes_json]
         )
+    with gr.Tab("Gemini Timecodes"):
         with gr.Row():
             with gr.Column():
                 gemini_video_id = gr.Textbox(
+                    label="Video ID or URL",
+                    placeholder="Enter video ID or full URL (youtube.com, youtu.be, shorts, embed)..."
                 )
+                gemini_language = gr.Textbox(label="Language Code (optional)", placeholder="ru, en, etc...")
                 gemini_format = gr.Dropdown(
                     choices=["youtube", "markdown"],
                     value="youtube",
+                    label="Timecode Format"
                 )
                 gemini_model = gr.Dropdown(
                     choices=["gemini-2.0-flash-001", "gemini-2.0-pro-001", "gemini-2.0-pro-vision-001"],
                     value="gemini-2.0-flash-001",
+                    label="Gemini Model"
                 )
+                generate_gemini_button = gr.Button("Generate Timecodes with Gemini")
             with gr.Column():
+                gemini_output = gr.Markdown(label="Generation Information")
+                gemini_timecodes = gr.Textbox(label="Timecodes", lines=10, max_lines=20, show_copy_button=True)
+                gemini_json = gr.JSON(label="JSON Data")
         async def process_gemini_result(video_id, language_code, format_type, model):
             result = await generate_gemini_timecodes(video_id, language_code, format_type, model)
             outputs=[gemini_output, gemini_timecodes, gemini_json]
         )
+# Launch the application
 if __name__ == "__main__":
     demo.launch()

mcp_handlers.py CHANGED Viewed

@@ -41,6 +41,7 @@ class MCPGeminiRequest(BaseModel):
 async def process_mcp_search(youtube_client, request: MCPQueryRequest) -> List[MCPResponse]:
     """Process MCP request for video search."""
     try:
         search_response = youtube_client.search().list(
             q=request.query,
             part="snippet",
@@ -48,41 +49,70 @@ async def process_mcp_search(youtube_client, request: MCPQueryRequest) -> List[M
             type="video"
         ).execute()
         results = []
-        for item in search_response.get("items", []):
-            video_id = item["id"]["videoId"]
-            snippet = item["snippet"]
-            # Create MCP format response
-            video_data = {
-                "video_id": video_id,
-                "title": snippet["title"],
-                "description": snippet["description"],
-                "thumbnail": snippet["thumbnails"]["high"]["url"],
-                "channel_title": snippet["channelTitle"],
-                "published_at": snippet["publishedAt"]
-            }
-            # Format markdown for video display
-            markdown_text = (
-                f"## {snippet['title']}\n"
-                f"**Channel:** {snippet['channelTitle']}\n"
-                f"**Published:** {snippet['publishedAt']}\n\n"
-                f"[![Thumbnail]({snippet['thumbnails']['high']['url']})](https://www.youtube.com/watch?v={video_id})\n\n"
-                f"{snippet['description'][:300]}...\n\n"
-                f"[Watch on YouTube](https://www.youtube.com/watch?v={video_id})"
-            )
-            results.append(MCPResponse(
-                type="youtube_video",
-                markdown=markdown_text,
-                data=video_data
-            ))
         return results
     except HttpError as e:
         raise HTTPException(status_code=500, detail=f"YouTube API error: {str(e)}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
 async def process_mcp_video_info(youtube_client, request: MCPVideoRequest) -> MCPResponse:
@@ -120,8 +150,20 @@ async def process_mcp_video_info(youtube_client, request: MCPVideoRequest) -> MC
         }
         return MCPResponse(
-            type="text",
-            content=f"Video information:\n{json.dumps(video_data, indent=2, ensure_ascii=False)}"
         )
     except HttpError as e:
         return MCPResponse(
@@ -139,53 +181,99 @@ async def process_mcp_transcript(request: MCPTranscriptRequest) -> MCPResponse:
     try:
         # Extract video ID from URL if it's a URL
         video_id = extract_video_id(request.video_id)
         try:
-            languages = [request.language_code] if request.language_code else None
-            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
-        except Exception as transcript_error:
             if request.language_code:
                 try:
-                    print(f"Failed to get transcript in language {request.language_code}, trying to get available transcripts")
-                    transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-                except Exception as fallback_error:
-                    return MCPResponse(
-                        type="error",
-                        error=f"Transcript not found. Details: {str(fallback_error)}"
-                    )
-            else:
-                return MCPResponse(
-                    type="error",
-                    error=f"Failed to get transcript. Details: {str(transcript_error)}"
-                )
-        if not transcript_list:
             return MCPResponse(
                 type="error",
-                error="Transcript for this video is unavailable"
             )
         formatted_transcript = []
         for entry in transcript_list:
-            formatted_transcript.append({
-                "text": entry.get("text", ""),
-                "start": entry.get("start", 0),
-                "duration": entry.get("duration", 0)
-            })
         # Format markdown for transcript display
-        markdown_text = "# Transcript\n\n"
         for entry in formatted_transcript:
-            start_time = entry.get("start")
-            duration = entry.get("duration")
             end_time = start_time + duration
-            text = entry.get("text")
             # Convert time to hours:minutes:seconds format
             start_formatted = format_timestamp(start_time)
             end_formatted = format_timestamp(end_time)
-            markdown_text += f"[{start_formatted} - {end_formatted}] {text}\n\n"
         return MCPResponse(
             type="youtube_transcript",
@@ -196,6 +284,7 @@ async def process_mcp_transcript(request: MCPTranscriptRequest) -> MCPResponse:
             }
         )
     except Exception as e:
         return MCPResponse(
             type="error",
             error=f"Error getting transcript: {str(e)}"
@@ -236,29 +325,57 @@ async def process_mcp_timecodes(youtube_client, request: MCPTimecodeRequest) ->
         video_id = extract_video_id(request.video_id)
         # Get transcript
         try:
-            languages = [request.language_code] if request.language_code else None
-            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
-        except Exception as transcript_error:
             if request.language_code:
                 try:
-                    print(f"Failed to get transcript in language {request.language_code}, trying to get available transcripts")
-                    transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
-                except Exception as fallback_error:
-                    return MCPResponse(
-                        type="error",
-                        error=f"Transcript not found. Details: {str(fallback_error)}"
-                    )
-            else:
-                return MCPResponse(
-                    type="error",
-                    error=f"Failed to get transcript. Details: {str(transcript_error)}"
-                )
-        if not transcript_list:
             return MCPResponse(
                 type="error",
-                error="Transcript for this video is unavailable"
             )
         # Group transcript into segments
@@ -356,36 +473,65 @@ async def process_mcp_gemini_timecodes(youtube_client, request: MCPGeminiRequest
     """Process MCP request for Gemini timecode generation."""
     try:
         # Get transcript
         try:
-            languages = [request.language_code] if request.language_code else None
-            transcript_list = YouTubeTranscriptApi.get_transcript(request.video_id, languages=languages)
-        except Exception as transcript_error:
             if request.language_code:
                 try:
-                    print(f"Failed to get transcript in language {request.language_code}, trying to get available transcripts")
-                    transcript_list = YouTubeTranscriptApi.get_transcript(request.video_id)
-                except Exception as fallback_error:
-                    return MCPResponse(
-                        type="error",
-                        error=f"Transcript not found. Details: {str(fallback_error)}"
-                    )
-            else:
-                return MCPResponse(
-                    type="error",
-                    error=f"Failed to get transcript. Details: {str(transcript_error)}"
-                )
-        if not transcript_list:
             return MCPResponse(
                 type="error",
-                error="Transcript for this video is unavailable"
             )
         # Get video information for title
         try:
             video_response = youtube_client.videos().list(
                 part="snippet",
-                id=request.video_id
             ).execute()
             if video_response.get("items"):

 async def process_mcp_search(youtube_client, request: MCPQueryRequest) -> List[MCPResponse]:
     """Process MCP request for video search."""
     try:
+        print(f"Starting search for query: {request.query}")
         search_response = youtube_client.search().list(
             q=request.query,
             part="snippet",
             type="video"
         ).execute()
+        print(f"Search response received. Items count: {len(search_response.get('items', []))}")
         results = []
+        for i, item in enumerate(search_response.get("items", [])):
+            try:
+                print(f"Processing item {i}: {type(item)}")
+                print(f"Item keys: {list(item.keys()) if isinstance(item, dict) else 'Not a dict'}")
+                # Check if 'id' exists and has the right structure
+                if 'id' not in item:
+                    print(f"Warning: 'id' key not found in item {i}")
+                    continue
+                item_id = item["id"]
+                print(f"Item id type: {type(item_id)}, value: {item_id}")
+                if isinstance(item_id, dict) and "videoId" in item_id:
+                    video_id = item_id["videoId"]
+                elif isinstance(item_id, str):
+                    video_id = item_id
+                else:
+                    print(f"Warning: Unexpected id structure in item {i}: {item_id}")
+                    continue
+                snippet = item["snippet"]
+                # Create MCP format response
+                video_data = {
+                    "video_id": video_id,
+                    "title": snippet["title"],
+                    "description": snippet["description"],
+                    "thumbnail": snippet["thumbnails"]["high"]["url"],
+                    "channel_title": snippet["channelTitle"],
+                    "published_at": snippet["publishedAt"]
+                }
+                # Format markdown for video display
+                markdown_text = (
+                    f"## {snippet['title']}\n"
+                    f"**Channel:** {snippet['channelTitle']}\n"
+                    f"**Published:** {snippet['publishedAt']}\n\n"
+                    f"[![Thumbnail]({snippet['thumbnails']['high']['url']})](https://www.youtube.com/watch?v={video_id})\n\n"
+                    f"{snippet['description'][:300]}...\n\n"
+                    f"[Watch on YouTube](https://www.youtube.com/watch?v={video_id})"
+                )
+                results.append(MCPResponse(
+                    type="youtube_video",
+                    markdown=markdown_text,
+                    data=video_data
+                ))
+                print(f"Successfully processed item {i}")
+            except Exception as item_error:
+                print(f"Error processing item {i}: {str(item_error)}")
+                print(f"Item data: {item}")
+                continue
+        print(f"Search completed. Total results: {len(results)}")
         return results
     except HttpError as e:
+        print(f"YouTube API HttpError: {str(e)}")
         raise HTTPException(status_code=500, detail=f"YouTube API error: {str(e)}")
     except Exception as e:
+        print(f"Unexpected error in search: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Unexpected error: {str(e)}")
 async def process_mcp_video_info(youtube_client, request: MCPVideoRequest) -> MCPResponse:
         }
         return MCPResponse(
+            type="youtube_video_info",
+            data=video_data,
+            markdown=f"""# 📹 Video Information
+**🎬 Title:** {video_data['title']}
+**👤 Channel:** {video_data['channel_title']}
+**📅 Published:** {video_data['published_at']}
+**👁️ Views:** {video_data.get('view_count', 'N/A')}
+**👍 Likes:** {video_data.get('like_count', 'N/A')}
+**💬 Comments:** {video_data.get('comment_count', 'N/A')}
+**⏱️ Duration:** {video_data.get('duration', 'N/A')}
+[🔗 Watch on YouTube](https://www.youtube.com/watch?v={video_id})
+"""
         )
     except HttpError as e:
         return MCPResponse(
     try:
         # Extract video ID from URL if it's a URL
         video_id = extract_video_id(request.video_id)
+        print(f"Getting transcript for video: {video_id}")
+        transcript_list = None
         try:
+            # First, try to get list of available transcripts
+            available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
+            # Try to get transcript with specified language first
             if request.language_code:
+                print(f"Trying to get transcript in requested language: {request.language_code}")
                 try:
+                    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[request.language_code])
+                except Exception as lang_error:
+                    print(f"Requested language {request.language_code} failed: {str(lang_error)}")
+                    # Fall through to try other languages
+            # If no transcript yet, try to find any available transcript
+            if not transcript_list:
+                print("Trying to get any available transcript")
+                for transcript in available_transcripts:
+                    try:
+                        print(f"Trying language: {transcript.language_code} ({transcript.language})")
+                        # Try direct fetch first
+                        transcript_list = transcript.fetch()
+                        print(f"Successfully got transcript in {transcript.language_code}")
+                        break
+                    except Exception as lang_error:
+                        print(f"Direct fetch failed for {transcript.language_code}: {str(lang_error)}")
+                        # Try alternative method for problematic transcripts (usually Russian/Ukrainian)
+                        try:
+                            print(f"Trying alternative method for {transcript.language_code}")
+                            # Use direct API call with language code
+                            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[transcript.language_code])
+                            print(f"Alternative method succeeded for {transcript.language_code}")
+                            break
+                        except Exception as alt_error:
+                            print(f"Alternative method also failed for {transcript.language_code}: {str(alt_error)}")
+                            continue
+        except Exception as transcript_error:
+            print(f"Failed to get transcript list: {str(transcript_error)}")
+            return MCPResponse(
+                type="error",
+                error="Transcript not available for this video. The video may not have subtitles or may be restricted."
+            )
+        # Check if we got a valid transcript
+        if not transcript_list or len(transcript_list) == 0:
+            print("No transcript data received")
             return MCPResponse(
                 type="error",
+                error="No transcript available for this video. The video may not have subtitles."
             )
+        print(f"Successfully got transcript with {len(transcript_list)} entries")
+        # Format transcript entries
         formatted_transcript = []
         for entry in transcript_list:
+            # Handle both dict format (from direct API) and FetchedTranscriptSnippet objects
+            if hasattr(entry, 'text'):  # FetchedTranscriptSnippet object
+                formatted_transcript.append({
+                    "text": entry.text,
+                    "start": entry.start,
+                    "duration": entry.duration
+                })
+            elif isinstance(entry, dict):  # Dict format
+                formatted_transcript.append({
+                    "text": entry.get("text", ""),
+                    "start": entry.get("start", 0),
+                    "duration": entry.get("duration", 0)
+                })
+        if not formatted_transcript:
+            return MCPResponse(
+                type="error",
+                error="Transcript data is invalid or empty."
+            )
         # Format markdown for transcript display
+        markdown_text = "# 📝 Transcript\n\n"
         for entry in formatted_transcript:
+            start_time = entry.get("start", 0)
+            duration = entry.get("duration", 0)
             end_time = start_time + duration
+            text = entry.get("text", "")
             # Convert time to hours:minutes:seconds format
             start_formatted = format_timestamp(start_time)
             end_formatted = format_timestamp(end_time)
+            markdown_text += f"**[{start_formatted} - {end_formatted}]** {text}\n\n"
         return MCPResponse(
             type="youtube_transcript",
             }
         )
     except Exception as e:
+        print(f"Unexpected error in transcript processing: {str(e)}")
         return MCPResponse(
             type="error",
             error=f"Error getting transcript: {str(e)}"
         video_id = extract_video_id(request.video_id)
         # Get transcript
+        transcript_list = None
         try:
+            # First, try to get list of available transcripts
+            available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
+            # Try to get transcript with specified language first
             if request.language_code:
+                print(f"Trying to get transcript for timecodes in requested language: {request.language_code}")
                 try:
+                    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[request.language_code])
+                except Exception as lang_error:
+                    print(f"Requested language {request.language_code} failed: {str(lang_error)}")
+                    # Fall through to try other languages
+            # If no transcript yet, try to find any available transcript
+            if not transcript_list:
+                print("Trying to get any available transcript for timecodes")
+                for transcript in available_transcripts:
+                    try:
+                        print(f"Trying language: {transcript.language_code} ({transcript.language})")
+                        # Try direct fetch first
+                        transcript_list = transcript.fetch()
+                        print(f"Successfully got transcript in {transcript.language_code}")
+                        break
+                    except Exception as lang_error:
+                        print(f"Direct fetch failed for {transcript.language_code}: {str(lang_error)}")
+                        # Try alternative method for problematic transcripts (usually Russian/Ukrainian)
+                        try:
+                            print(f"Trying alternative method for {transcript.language_code}")
+                            # Use direct API call with language code
+                            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[transcript.language_code])
+                            print(f"Alternative method succeeded for {transcript.language_code}")
+                            break
+                        except Exception as alt_error:
+                            print(f"Alternative method also failed for {transcript.language_code}: {str(alt_error)}")
+                            continue
+        except Exception as transcript_error:
+            print(f"Failed to get transcript list for timecodes: {str(transcript_error)}")
+            return MCPResponse(
+                type="error",
+                error="Transcript not available for timecode generation. The video may not have subtitles or may be restricted."
+            )
+        # Check if we got a valid transcript
+        if not transcript_list or len(transcript_list) == 0:
+            print("No transcript data received for timecodes")
             return MCPResponse(
                 type="error",
+                error="No transcript available for timecode generation. The video may not have subtitles."
             )
         # Group transcript into segments
     """Process MCP request for Gemini timecode generation."""
     try:
         # Get transcript
+        transcript_list = None
+        video_id = extract_video_id(request.video_id)
         try:
+            # First, try to get list of available transcripts
+            available_transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
+            # Try to get transcript with specified language first
             if request.language_code:
+                print(f"Trying to get transcript for Gemini timecodes in requested language: {request.language_code}")
                 try:
+                    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[request.language_code])
+                except Exception as lang_error:
+                    print(f"Requested language {request.language_code} failed: {str(lang_error)}")
+                    # Fall through to try other languages
+            # If no transcript yet, try to find any available transcript
+            if not transcript_list:
+                print("Trying to get any available transcript for Gemini timecodes")
+                for transcript in available_transcripts:
+                    try:
+                        print(f"Trying language: {transcript.language_code} ({transcript.language})")
+                        # Try direct fetch first
+                        transcript_list = transcript.fetch()
+                        print(f"Successfully got transcript in {transcript.language_code}")
+                        break
+                    except Exception as lang_error:
+                        print(f"Direct fetch failed for {transcript.language_code}: {str(lang_error)}")
+                        # Try alternative method for problematic transcripts (usually Russian/Ukrainian)
+                        try:
+                            print(f"Trying alternative method for {transcript.language_code}")
+                            # Use direct API call with language code
+                            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=[transcript.language_code])
+                            print(f"Alternative method succeeded for {transcript.language_code}")
+                            break
+                        except Exception as alt_error:
+                            print(f"Alternative method also failed for {transcript.language_code}: {str(alt_error)}")
+                            continue
+        except Exception as transcript_error:
+            print(f"Failed to get transcript list for Gemini timecodes: {str(transcript_error)}")
+            return MCPResponse(
+                type="error",
+                error="Transcript not available for AI timecode generation. The video may not have subtitles or may be restricted."
+            )
+        # Check if we got a valid transcript
+        if not transcript_list or len(transcript_list) == 0:
+            print("No transcript data received for Gemini timecodes")
             return MCPResponse(
                 type="error",
+                error="No transcript available for AI timecode generation. The video may not have subtitles."
             )
         # Get video information for title
         try:
             video_response = youtube_client.videos().list(
                 part="snippet",
+                id=video_id
             ).execute()
             if video_response.get("items"):

run_telegram_bot.py CHANGED Viewed

@@ -22,7 +22,7 @@ def setup_logging():
 if __name__ == "__main__":
     print("🤖 Starting TubeMeta Telegram Bot...")
-    print("📋 Make sure your MCP server is running at: https://ag-source-knowledge-internal.trycloudflare.com")
     print("📱 Bot username: @tubemeta_bot")
     print("🔗 Bot link: https://t.me/tubemeta_bot")
     print("⏹️  Press Ctrl+C to stop\n")

 if __name__ == "__main__":
     print("🤖 Starting TubeMeta Telegram Bot...")
+    print("📋 Make sure your MCP server is running at: https://youtube-bot.tuttech.net")
     print("📱 Bot username: @tubemeta_bot")
     print("🔗 Bot link: https://t.me/tubemeta_bot")
     print("⏹️  Press Ctrl+C to stop\n")

telegram_bot.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import asyncio
-import json
 import logging
 from typing import Optional
 import aiohttp
@@ -8,13 +7,45 @@ from telegram.ext import Application, CommandHandler, MessageHandler, CallbackQu
 from telegram.constants import ParseMode
 import os
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Configuration
-TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "8168377961:AAHG-9KyFczCTkBo92ZS9OYKIY62s-NmuVo")
-MCP_BASE_URL = os.getenv("MCP_BASE_URL", "https://ag-source-knowledge-internal.trycloudflare.com/api/mcp")
 # Set up logging
 logging.basicConfig(
@@ -23,19 +54,27 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 class TubeMetaBot:
     def __init__(self):
         self.app = Application.builder().token(TELEGRAM_TOKEN).build()
         self.setup_handlers()
     def setup_handlers(self):
         """Set up command and message handlers"""
         self.app.add_handler(CommandHandler("start", self.start_command))
         self.app.add_handler(CommandHandler("help", self.help_command))
         self.app.add_handler(CommandHandler("search", self.search_command))
         self.app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, self.handle_message))
-        self.app.add_handler(CallbackQueryHandler(self.handle_callback))
     async def start_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /start command"""
         welcome_text = """
@@ -43,7 +82,7 @@ class TubeMetaBot:
 I can help you with YouTube videos:
 • 🔍 Search for videos
-• 📊 Get video metadata
 • 📝 Extract transcripts
 • ⏰ Generate AI timecodes with Gemini 2.0
@@ -55,7 +94,7 @@ I can help you with YouTube videos:
 Type `/help` for more information!
         """
         await update.message.reply_text(welcome_text, parse_mode=ParseMode.MARKDOWN)
     async def help_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /help command"""
         help_text = """
@@ -65,17 +104,18 @@ Type `/help` for more information!
 • `/start` - Welcome message
 • `/help` - Show this help
 • `/search <query>` - Search YouTube videos
 **Features:**
 • 🔍 **Video Search** - Find YouTube videos by keywords
-• 📊 **Video Info** - Get detailed metadata (title, duration, views, etc.)
 • 📝 **Transcripts** - Extract video transcripts/subtitles
 • ⏰ **AI Timecodes** - Generate smart timecodes with Gemini 2.0
 **Usage Examples:**
-• Send YouTube URL: `https://youtu.be/dQw4w9WgXcQ`
 • Search: `/search machine learning tutorial`
-• Or just send: `python programming`
 **Supported Languages:**
 🇺🇦 Ukrainian | 🇷🇺 Russian | 🇬🇧 English
@@ -83,109 +123,196 @@ Type `/help` for more information!
 Powered by Gemini 2.0 AI 🧠
         """
         await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
     async def search_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /search command"""
         if not context.args:
             await update.message.reply_text("Please provide a search query. Example: `/search python tutorial`")
             return
         query = " ".join(context.args)
         await self.handle_search(update, query)
     async def handle_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle regular text messages"""
         text = update.message.text.strip()
         # Check if it's a YouTube URL
         if self.is_youtube_url(text):
-            await self.handle_youtube_url(update, text)
         else:
             # Treat as search query
             await self.handle_search(update, text)
     def is_youtube_url(self, text: str) -> bool:
         """Check if text contains a YouTube URL"""
         youtube_domains = [
-            'youtube.com', 'youtu.be', 'www.youtube.com',
             'm.youtube.com', 'music.youtube.com'
         ]
         return any(domain in text.lower() for domain in youtube_domains)
     async def handle_youtube_url(self, update: Update, url: str):
         """Handle YouTube URL - provide full analysis options"""
         # Send initial message
         processing_msg = await update.message.reply_text("🔍 Analyzing YouTube video...")
         try:
             # Get basic video info first
-            video_info = await self.call_mcp_action("video_info", {"video_id": url})
-            if not video_info or "error" in video_info:
                 await processing_msg.edit_text("❌ Could not analyze this YouTube video. Please check the URL.")
                 return
-            # Format video info
-            info_text = self.format_video_info(video_info)
             # Create action buttons
             keyboard = [
                 [
-                    InlineKeyboardButton("📝 Get Transcript", callback_data=f"transcript:{url}"),
-                    InlineKeyboardButton("⏰ AI Timecodes", callback_data=f"timecodes:{url}")
                 ],
                 [
-                    InlineKeyboardButton("🔍 Search Similar", callback_data=f"search:{video_info.get('title', 'related videos')}")
                 ]
             ]
             reply_markup = InlineKeyboardMarkup(keyboard)
-            await processing_msg.edit_text(info_text, reply_markup=reply_markup, parse_mode=ParseMode.MARKDOWN)
         except Exception as e:
             logger.error(f"Error handling YouTube URL: {e}")
             await processing_msg.edit_text("❌ An error occurred while analyzing the video.")
     async def handle_search(self, update: Update, query: str):
         """Handle search query"""
-        processing_msg = await update.message.reply_text(f"🔍 Searching for: *{query}*", parse_mode=ParseMode.MARKDOWN)
         try:
             results = await self.call_mcp_action("search", {"query": query, "max_results": 5})
-            if not results or "error" in results:
                 await processing_msg.edit_text("❌ No results found for your search.")
                 return
             # Format search results
-            search_text = f"🔍 **Search Results for:** {query}\n\n"
-            for i, video in enumerate(results.get("videos", []), 1):
-                search_text += f"**{i}. {video.get('title', 'Unknown Title')}**\n"
-                search_text += f"👤 {video.get('channel', 'Unknown Channel')}\n"
-                search_text += f"⏱️ {video.get('duration', 'Unknown')}\n"
-                search_text += f"👁️ {video.get('view_count', 'Unknown')} views\n"
-                search_text += f"🔗 {video.get('url', '')}\n\n"
             # Add search refinement buttons
             keyboard = [
                 [InlineKeyboardButton("🔍 New Search", callback_data="new_search")]
             ]
             reply_markup = InlineKeyboardMarkup(keyboard)
-            await processing_msg.edit_text(search_text, reply_markup=reply_markup, parse_mode=ParseMode.MARKDOWN)
         except Exception as e:
             logger.error(f"Error handling search: {e}")
             await processing_msg.edit_text("❌ An error occurred during search.")
-    async def handle_callback(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
-        """Handle inline keyboard callbacks"""
         query = update.callback_query
         await query.answer()
         data = query.data
         if data.startswith("transcript:"):
             url = data.replace("transcript:", "")
             await self.get_transcript(query, url)
@@ -195,143 +322,601 @@ Powered by Gemini 2.0 AI 🧠
         elif data.startswith("search:"):
             search_query = data.replace("search:", "")
             await self.handle_search_callback(query, search_query)
         elif data == "new_search":
-            await query.edit_message_text("🔍 Send me a search query or YouTube URL!")
     async def get_transcript(self, query, url: str):
         """Get video transcript"""
         await query.edit_message_text("📝 Extracting transcript...")
         try:
-            transcript = await self.call_mcp_action("transcript", {"video_id": url})
-            if not transcript or "error" in transcript:
-                await query.edit_message_text("❌ Could not extract transcript. Video may not have subtitles or may be restricted.")
                 return
-            # Format transcript
-            transcript_text = f"📝 **Transcript**\n\n{transcript.get('text', 'No transcript available')}"
-            # Telegram message limit is 4096 characters
-            if len(transcript_text) > 4000:
-                transcript_text = transcript_text[:4000] + "...\n\n*Transcript truncated due to length*"
-            # Add back button
-            keyboard = [[InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")]]
-            reply_markup = InlineKeyboardMarkup(keyboard)
-            await query.edit_message_text(transcript_text, reply_markup=reply_markup, parse_mode=ParseMode.MARKDOWN)
         except Exception as e:
             logger.error(f"Error getting transcript: {e}")
             await query.edit_message_text("❌ An error occurred while extracting transcript.")
     async def get_timecodes(self, query, url: str):
         """Generate AI timecodes"""
         await query.edit_message_text("⏰ Generating AI timecodes with Gemini 2.0...")
         try:
-            timecodes = await self.call_mcp_action("gemini_timecodes", {
                 "video_id": url,
-                "language_code": "en",
                 "format": "youtube"
             })
-            if not timecodes or "error" in timecodes:
-                await query.edit_message_text("❌ Could not generate timecodes. Video may not have transcript or may be restricted.")
                 return
-            # Format timecodes
-            timecodes_text = f"⏰ **AI Generated Timecodes**\n\n{timecodes.get('timecodes', 'No timecodes generated')}"
-            # Telegram message limit
-            if len(timecodes_text) > 4000:
-                timecodes_text = timecodes_text[:4000] + "...\n\n*Timecodes truncated due to length*"
-            # Add back button
-            keyboard = [[InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")]]
-            reply_markup = InlineKeyboardMarkup(keyboard)
-            await query.edit_message_text(timecodes_text, reply_markup=reply_markup, parse_mode=ParseMode.MARKDOWN)
         except Exception as e:
             logger.error(f"Error generating timecodes: {e}")
             await query.edit_message_text("❌ An error occurred while generating timecodes.")
     async def handle_search_callback(self, query, search_query: str):
-        """Handle search from callback"""
-        await query.edit_message_text(f"🔍 Searching for: *{search_query}*", parse_mode=ParseMode.MARKDOWN)
-        # Simulate update object for reuse of search logic
-        class FakeUpdate:
-            def __init__(self, message):
-                self.message = message
-        fake_update = FakeUpdate(query.message)
-        await self.handle_search(fake_update, search_query)
     async def call_mcp_action(self, action: str, params: dict) -> Optional[dict]:
         """Call MCP server action"""
         try:
-            async with aiohttp.ClientSession() as session:
                 payload = {
                     "action": action,
                     "parameters": params
                 }
-                async with session.post(MCP_BASE_URL, json=payload, timeout=30) as response:
-                    if response.status == 200:
-                        return await response.json()
-                    else:
-                        logger.error(f"MCP server error: {response.status}")
-                        return None
         except Exception as e:
             logger.error(f"Error calling MCP server: {e}")
             return None
     def format_video_info(self, video_info: dict) -> str:
-        """Format video information for display"""
         title = video_info.get("title", "Unknown Title")
         channel = video_info.get("channel", "Unknown Channel")
         duration = video_info.get("duration", "Unknown")
         view_count = video_info.get("view_count", "Unknown")
         upload_date = video_info.get("upload_date", "Unknown")
         description = video_info.get("description", "")
         # Truncate description if too long
         if len(description) > 200:
             description = description[:200] + "..."
-        info_text = f"""🎬 **{title}**
-👤 **Channel:** {channel}
-⏱️ **Duration:** {duration}
-👁️ **Views:** {view_count}
-📅 **Uploaded:** {upload_date}
-📝 **Description:**
 {description}
 Choose an action below:"""
         return info_text
     async def run(self):
         """Start the bot"""
         logger.info("Starting TubeMeta Bot...")
-        await self.app.initialize()
-        await self.app.start()
-        await self.app.updater.start_polling()
         try:
             # Keep the bot running
             await asyncio.Event().wait()
         except KeyboardInterrupt:
-            logger.info("Shutting down bot...")
         finally:
-            await self.app.updater.stop()
-            await self.app.stop()
-            await self.app.shutdown()
 async def main():
     """Main function"""
@@ -339,4 +924,4 @@ async def main():
     await bot.run()
 if __name__ == "__main__":
-    asyncio.run(main())

 import asyncio
 import logging
 from typing import Optional
 import aiohttp
 from telegram.constants import ParseMode
 import os
 from dotenv import load_dotenv
+import warnings
+# Ignore standard warnings
+warnings.filterwarnings("ignore", message="SSL shutdown timed out")
+warnings.filterwarnings("ignore", message="Certificate verification failed")
+warnings.filterwarnings("ignore", message="SSL handshake failed")
+warnings.filterwarnings("ignore", message="Connection lost")
+# Disable SSL error logging from all possible sources
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
+logging.getLogger("httpcore.connection").setLevel(logging.ERROR)
+logging.getLogger("httpcore.http11").setLevel(logging.ERROR)
+logging.getLogger("asyncio").setLevel(logging.WARNING)
+# Create custom filter to suppress SSL errors
+class SSLErrorFilter(logging.Filter):
+    def filter(self, record):
+        message = record.getMessage()
+        return not any(phrase in message.lower() for phrase in [
+            'ssl shutdown timed out',
+            'connection lost',
+            'ssl handshake failed',
+            'certificate verification failed'
+        ])
+# Apply filter to root logger
+logging.getLogger().addFilter(SSLErrorFilter())
 # Load environment variables
 load_dotenv()
 # Configuration
+TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
+if not TELEGRAM_TOKEN:
+    raise ValueError("TELEGRAM_TOKEN environment variable is required")
+# Support both variable name variants
+MCP_BASE_URL = os.getenv("MCP_BASE_URL", os.getenv("MCP_BASE_URL", "https://youtube-bot.tuttech.net/api/mcp"))
 # Set up logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+def escape_markdown(text: str) -> str:
+    """Escape markdown special characters."""
+    escape_chars = ['_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!']
+    for char in escape_chars:
+        text = text.replace(char, '\\' + char)
+    return text
 class TubeMetaBot:
     def __init__(self):
         self.app = Application.builder().token(TELEGRAM_TOKEN).build()
         self.setup_handlers()
     def setup_handlers(self):
         """Set up command and message handlers"""
         self.app.add_handler(CommandHandler("start", self.start_command))
         self.app.add_handler(CommandHandler("help", self.help_command))
         self.app.add_handler(CommandHandler("search", self.search_command))
+        self.app.add_handler(CommandHandler("analyze", self.analyze_command))
         self.app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, self.handle_message))
+        self.app.add_handler(CallbackQueryHandler(self.handle_callback_query))
     async def start_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /start command"""
         welcome_text = """
 I can help you with YouTube videos:
 • 🔍 Search for videos
+• 📊 Get video metadata
 • 📝 Extract transcripts
 • ⏰ Generate AI timecodes with Gemini 2.0
 Type `/help` for more information!
         """
         await update.message.reply_text(welcome_text, parse_mode=ParseMode.MARKDOWN)
     async def help_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /help command"""
         help_text = """
 • `/start` - Welcome message
 • `/help` - Show this help
 • `/search <query>` - Search YouTube videos
+• `/analyze` - Analyze YouTube video (send after this command)
 **Features:**
 • 🔍 **Video Search** - Find YouTube videos by keywords
+• 📊 **Video Analysis** - Get detailed metadata (title, duration, views, etc.)
 • 📝 **Transcripts** - Extract video transcripts/subtitles
 • ⏰ **AI Timecodes** - Generate smart timecodes with Gemini 2.0
 **Usage Examples:**
 • Search: `/search machine learning tutorial`
+• Analysis: `/analyze` then send YouTube URL
+• Or just send: `python programming` for search
 **Supported Languages:**
 🇺🇦 Ukrainian | 🇷🇺 Russian | 🇬🇧 English
 Powered by Gemini 2.0 AI 🧠
         """
         await update.message.reply_text(help_text, parse_mode=ParseMode.MARKDOWN)
     async def search_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle /search command"""
         if not context.args:
             await update.message.reply_text("Please provide a search query. Example: `/search python tutorial`")
             return
         query = " ".join(context.args)
         await self.handle_search(update, query)
+    async def analyze_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
+        """Handle /analyze command"""
+        if not update.message:
+            return  # Skip if no message (shouldn't happen in command handlers)
+        if context.args:
+            # URL provided with command
+            url = " ".join(context.args)
+            if self.is_youtube_url(url):
+                await self.handle_youtube_url(update, url)
+            else:
+                await update.message.reply_text("❌ Please provide a valid YouTube URL. Example: `/analyze https://youtu.be/dQw4w9WgXcQ`")
+        else:
+            # Ask for URL
+            await update.message.reply_text("📺 Please send me a YouTube URL to analyze.\n\nExample: `https://youtu.be/dQw4w9WgXcQ`", parse_mode=ParseMode.MARKDOWN)
     async def handle_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
         """Handle regular text messages"""
         text = update.message.text.strip()
         # Check if it's a YouTube URL
         if self.is_youtube_url(text):
+            await update.message.reply_text(
+                "📺 I see you sent a YouTube URL! Use the `/analyze` command to analyze it.\n\n"
+                "Example: `/analyze https://youtu.be/dQw4w9WgXcQ`\n"
+                "Or just type `/analyze` and then send the URL.",
+                parse_mode=ParseMode.MARKDOWN
+            )
         else:
             # Treat as search query
             await self.handle_search(update, text)
     def is_youtube_url(self, text: str) -> bool:
         """Check if text contains a YouTube URL"""
         youtube_domains = [
+            'youtube.com', 'youtu.be', 'www.youtube.com',
             'm.youtube.com', 'music.youtube.com'
         ]
         return any(domain in text.lower() for domain in youtube_domains)
     async def handle_youtube_url(self, update: Update, url: str):
         """Handle YouTube URL - provide full analysis options"""
         # Send initial message
         processing_msg = await update.message.reply_text("🔍 Analyzing YouTube video...")
         try:
             # Get basic video info first
+            video_info_response = await self.call_mcp_action("video_info", {"video_id": url})
+            # Check if we got a valid response
+            if not video_info_response:
                 await processing_msg.edit_text("❌ Could not analyze this YouTube video. Please check the URL.")
                 return
+            # Check for error in response
+            if video_info_response.get("error"):
+                await processing_msg.edit_text(f"❌ Error: {video_info_response['error']}")
+                return
+            # Check if we have video data
+            video_data = video_info_response.get("data")
+            if not video_data:
+                await processing_msg.edit_text("❌ Could not retrieve video information. Please check the URL.")
+                return
+            # Format video info for display
+            info_text = self.format_video_info_from_data(video_data)
             # Create action buttons
+            video_id = video_data.get("video_id", url)
+            # Limit callback data to avoid Button_data_invalid error (Telegram limit is 64 bytes)
+            safe_video_id = video_id[:30] if video_id else url[:30]  # Limit video ID
+            video_title = video_data.get('title', 'related videos')
+            # Truncate title for search callback to fit in 64 byte limit
+            safe_title = video_title[:30] if len(video_title) > 30 else video_title
             keyboard = [
                 [
+                    InlineKeyboardButton("📝 Get Transcript", callback_data=f"transcript:{safe_video_id}"),
+                    InlineKeyboardButton("⏰ AI Timecodes", callback_data=f"timecodes:{safe_video_id}")
                 ],
                 [
+                    InlineKeyboardButton("🔍 Search Similar", callback_data=f"search:{safe_title}")
                 ]
             ]
             reply_markup = InlineKeyboardMarkup(keyboard)
+            await processing_msg.edit_text(info_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
         except Exception as e:
             logger.error(f"Error handling YouTube URL: {e}")
             await processing_msg.edit_text("❌ An error occurred while analyzing the video.")
+    def format_video_info_from_data(self, video_data: dict) -> str:
+        """Format video information from MCP response data for display using HTML"""
+        title = video_data.get("title", "Unknown Title")
+        channel = video_data.get("channel_title", "Unknown Channel")
+        duration = video_data.get("duration", "Unknown")
+        view_count = video_data.get("view_count", "Unknown")
+        upload_date = video_data.get("published_at", "Unknown")
+        like_count = video_data.get("like_count", "Unknown")
+        comment_count = video_data.get("comment_count", "Unknown")
+        info_text = f"""🎬 <b>{title}</b>
+👤 <b>Channel:</b> {channel}
+⏱️ <b>Duration:</b> {duration}
+👁️ <b>Views:</b> {view_count}
+👍 <b>Likes:</b> {like_count}
+💬 <b>Comments:</b> {comment_count}
+📅 <b>Uploaded:</b> {upload_date}
+Choose an action below:"""
+        return info_text
     async def handle_search(self, update: Update, query: str):
         """Handle search query"""
+        processing_msg = await update.message.reply_text(f"🔍 Searching for: <b>{query}</b>", parse_mode=ParseMode.HTML)
         try:
             results = await self.call_mcp_action("search", {"query": query, "max_results": 5})
+            if not results or (isinstance(results, dict) and "error" in results):
                 await processing_msg.edit_text("❌ No results found for your search.")
                 return
             # Format search results
+            search_text = f"🔍 <b>Search Results for:</b> {query}\n\n"
+            # Handle the case where results is a list (new format)
+            if isinstance(results, list):
+                videos = results
+            else:
+                # Fallback for old format
+                videos = results.get("videos", [])
+            for i, video_obj in enumerate(videos, 1):
+                # Extract video data from the response object
+                if isinstance(video_obj, dict) and "data" in video_obj:
+                    video = video_obj["data"]
+                else:
+                    video = video_obj
+                # Build video info (HTML auto-escapes dangerous chars)
+                title = video.get('title', 'Unknown Title')
+                channel = video.get('channel_title', video.get('channel', 'Unknown Channel'))
+                duration = str(video.get('duration', 'Unknown'))
+                view_count = str(video.get('view_count', 'Unknown'))
+                video_id = video.get('video_id', '')
+                search_text += f"<b>{i}. {title}</b>\n"
+                search_text += f"👤 {channel}\n"
+                if duration != 'Unknown':
+                    search_text += f"⏱️ {duration}\n"
+                if view_count != 'Unknown':
+                    search_text += f"👁️ {view_count} views\n"
+                search_text += f"🔗 https://www.youtube.com/watch?v={video_id}\n\n"
             # Add search refinement buttons
             keyboard = [
                 [InlineKeyboardButton("🔍 New Search", callback_data="new_search")]
             ]
             reply_markup = InlineKeyboardMarkup(keyboard)
+            await processing_msg.edit_text(search_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
         except Exception as e:
             logger.error(f"Error handling search: {e}")
             await processing_msg.edit_text("❌ An error occurred during search.")
+    async def handle_callback_query(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
+        """Handle inline keyboard button presses"""
         query = update.callback_query
         await query.answer()
         data = query.data
+        logger.info(f"Callback query: {data}")
         if data.startswith("transcript:"):
             url = data.replace("transcript:", "")
             await self.get_transcript(query, url)
         elif data.startswith("search:"):
             search_query = data.replace("search:", "")
             await self.handle_search_callback(query, search_query)
+        elif data.startswith("back:"):
+            url = data.replace("back:", "")
+            await self.handle_back_to_video(query, url)
+        elif data.startswith("full_transcript:"):
+            url = data.replace("full_transcript:", "")
+            await self.send_full_transcript(query, url)
+        elif data.startswith("full_timecodes:"):
+            url = data.replace("full_timecodes:", "")
+            await self.send_full_timecodes(query, url)
+        elif data.startswith("analyze:"):
+            video_id = data.replace("analyze:", "")
+            await self.analyze_video(query, f"https://www.youtube.com/watch?v={video_id}")
+        elif data.startswith("back_to_analysis:"):
+            url = data.replace("back_to_analysis:", "")
+            await self.handle_back_to_video(query, url)
         elif data == "new_search":
+            await query.edit_message_text(
+                "🔍 **Send me a new search query!**\n\nJust type your search terms and I'll find YouTube videos for you.",
+                parse_mode=ParseMode.MARKDOWN
+            )
+        else:
+            await query.edit_message_text("❌ Unknown action")
     async def get_transcript(self, query, url: str):
         """Get video transcript"""
         await query.edit_message_text("📝 Extracting transcript...")
         try:
+            transcript_response = await self.call_mcp_action("transcript", {"video_id": url})
+            # Check if we got a valid response
+            if not transcript_response:
+                await query.edit_message_text("❌ Could not extract transcript. Please try again later.")
                 return
+            # Check for error in response
+            if transcript_response.get("error"):
+                await query.edit_message_text(f"❌ {transcript_response['error']}")
+                return
+            # Check if we have transcript data
+            if transcript_response.get("type") not in ["youtube_transcript"]:
+                await query.edit_message_text("❌ Invalid transcript response format.")
+                return
+            # Get the markdown formatted transcript
+            transcript_text = transcript_response.get("markdown", "")
+            if not transcript_text:
+                await query.edit_message_text("❌ Transcript is empty or unavailable.")
+                return
+            # Handle long transcripts more intelligently
+            max_length = 4000  # Leave room for buttons and formatting
+            if len(transcript_text) > max_length:
+                # Create a summary message with first part
+                summary_text = "📝 **Transcript Preview** (showing first {} characters)\n\n".format(max_length)
+                summary_text += transcript_text[:max_length-200] + "...\n\n"
+                summary_text += f"<i>📄 Full transcript: {len(transcript_text)} characters total</i>\n"
+                summary_text += "<i>⚠️ Transcript is too long for Telegram. Showing preview only.</i>"
+                # Add back button and full transcript button
+                keyboard = [
+                    [
+                        InlineKeyboardButton("📄 Get Full Text", callback_data=f"full_transcript:{url}"),
+                        InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")
+                    ]
+                ]
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                await query.edit_message_text(summary_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
+            else:
+                # Short enough to display fully
+                # Add back button
+                keyboard = [[InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")]]
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                await query.edit_message_text(transcript_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
         except Exception as e:
             logger.error(f"Error getting transcript: {e}")
             await query.edit_message_text("❌ An error occurred while extracting transcript.")
     async def get_timecodes(self, query, url: str):
         """Generate AI timecodes"""
         await query.edit_message_text("⏰ Generating AI timecodes with Gemini 2.0...")
         try:
+            timecodes_response = await self.call_mcp_action("gemini_timecodes", {
                 "video_id": url,
                 "format": "youtube"
             })
+            # Check if we got a valid response
+            if not timecodes_response:
+                await query.edit_message_text("❌ Could not generate timecodes. Please try again later.")
                 return
+            # Check for error in response
+            if timecodes_response.get("error"):
+                await query.edit_message_text(f"❌ {timecodes_response['error']}")
+                return
+            # Check if we have timecodes data
+            if timecodes_response.get("type") not in ["youtube_gemini_timecodes"]:
+                await query.edit_message_text("❌ Invalid timecodes response format.")
+                return
+            # Get the markdown formatted timecodes
+            timecodes_text = timecodes_response.get("markdown", "")
+            if not timecodes_text:
+                await query.edit_message_text("❌ No timecodes were generated.")
+                return
+            # Handle long timecodes more intelligently
+            max_length = 4000  # Leave room for buttons and formatting
+            if len(timecodes_text) > max_length:
+                # Create a summary message with preview
+                data = timecodes_response.get("data", {})
+                timecodes_list = data.get("timecodes", [])
+                detected_language = data.get("detected_language", "unknown")
+                summary_text = "⏰ **AI Timecodes Generated**\n\n"
+                summary_text += f"🤖 **Model:** {data.get('model', 'Gemini AI')}\n"
+                summary_text += f"🌐 **Language:** {detected_language}\n"
+                summary_text += f"📊 **Total timecodes:** {len(timecodes_list)}\n\n"
+                # Calculate how many timecodes we can show
+                available_space = max_length - len(summary_text) - 300  # Reserve space for buttons and footer
+                # Show as many timecodes as possible within space limit
+                preview_text = "<b>Timecodes Preview:</b>\n<pre>"
+                current_length = 0
+                shown_count = 0
+                for tc in timecodes_list:
+                    tc_line = f"{tc}\n"
+                    if current_length + len(tc_line) < available_space:
+                        preview_text += tc_line
+                        current_length += len(tc_line)
+                        shown_count += 1
+                    else:
+                        break
+                preview_text += "</pre>\n\n"
+                if shown_count < len(timecodes_list):
+                    summary_text += preview_text
+                    summary_text += f"<i>📄 Showing {shown_count} of {len(timecodes_list)} timecodes</i>\n"
+                    summary_text += "<i>💾 Download full file for complete list</i>"
+                else:
+                    # All timecodes fit, show them directly
+                    summary_text = timecodes_text
+                # Add buttons for full timecodes and back
+                keyboard = [
+                    [
+                        InlineKeyboardButton("📄 Get Full List", callback_data=f"full_timecodes:{url}"),
+                        InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")
+                    ]
+                ]
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                await query.edit_message_text(summary_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
+            else:
+                # Short enough to display fully
+                # Add back button
+                keyboard = [[InlineKeyboardButton("⬅️ Back", callback_data=f"back:{url}")]]
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                # Convert markdown to HTML for proper code block rendering
+                html_timecodes = self.convert_markdown_to_html(timecodes_text)
+                await query.edit_message_text(html_timecodes, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
         except Exception as e:
             logger.error(f"Error generating timecodes: {e}")
             await query.edit_message_text("❌ An error occurred while generating timecodes.")
+    async def send_full_transcript(self, query, url: str):
+        """Send full transcript as a text file"""
+        await query.edit_message_text("📄 Preparing full transcript file...")
+        try:
+            transcript_response = await self.call_mcp_action("transcript", {"video_id": url})
+            if not transcript_response or transcript_response.get("error"):
+                await query.edit_message_text("❌ Could not extract full transcript.")
+                return
+            # Get full transcript text
+            full_transcript = transcript_response.get("markdown", "")
+            if not full_transcript:
+                await query.edit_message_text("❌ Transcript is empty.")
+                return
+            # Create a simple text version (without markdown formatting)
+            simple_text = full_transcript.replace("# 📝 Transcript\n\n", "")
+            simple_text = simple_text.replace("**[", "[").replace("]**", "]")
+            # Send as document
+            from io import BytesIO
+            import re
+            # Extract video title from URL for filename
+            video_info = await self.call_mcp_action("video_info", {"video_id": url})
+            title = "transcript"
+            if video_info and video_info.get("data"):
+                title = video_info["data"].get("title", "transcript")
+                # Clean title for filename
+                title = re.sub(r'[<>:"/\\|?*]', '_', title)[:50]
+            # Create file
+            transcript_file = BytesIO(simple_text.encode('utf-8'))
+            transcript_file.name = f"{title}_transcript.txt"
+            # Send file and create a new message instead of editing
+            await query.message.reply_document(
+                document=transcript_file,
+                caption=f"📄 Full transcript for: {title}",
+                reply_markup=InlineKeyboardMarkup([[
+                    InlineKeyboardButton("📹 Back to Video", callback_data=f"back_to_analysis:{url}")
+                ]])
+            )
+            # Edit original message to show completion
+            await query.edit_message_text(
+                "✅ Transcript file sent!\n\n📄 Check the file above for the complete transcript.",
+                reply_markup=InlineKeyboardMarkup([[
+                    InlineKeyboardButton("📹 Back to Video", callback_data=f"back_to_analysis:{url}")
+                ]])
+            )
+        except Exception as e:
+            logger.error(f"Error sending full transcript: {e}")
+            await query.edit_message_text("❌ An error occurred while preparing transcript file.")
+    async def send_full_timecodes(self, query, url: str):
+        """Send full timecodes as a text file"""
+        await query.edit_message_text("📄 Preparing full timecodes file...")
+        try:
+            timecodes_response = await self.call_mcp_action("gemini_timecodes", {
+                "video_id": url,
+                "format": "youtube"
+            })
+            if not timecodes_response or timecodes_response.get("error"):
+                await query.edit_message_text("❌ Could not generate full timecodes.")
+                return
+            # Get full timecodes
+            data = timecodes_response.get("data", {})
+            timecodes_list = data.get("timecodes", [])
+            if not timecodes_list:
+                await query.edit_message_text("❌ No timecodes available.")
+                return
+            # Create text content
+            content = "AI Generated Timecodes\n"
+            content += f"Model: {data.get('model', 'Gemini AI')}\n"
+            content += f"Language: {data.get('detected_language', 'auto-detected')}\n"
+            content += f"Total: {len(timecodes_list)} timecodes\n\n"
+            content += "\n".join(timecodes_list)
+            # Send as document
+            from io import BytesIO
+            import re
+            # Extract video title for filename
+            video_info = await self.call_mcp_action("video_info", {"video_id": url})
+            title = "timecodes"
+            if video_info and video_info.get("data"):
+                title = video_info["data"].get("title", "timecodes")
+                # Clean title for filename
+                title = re.sub(r'[<>:"/\\|?*]', '_', title)[:50]
+            # Create file
+            timecodes_file = BytesIO(content.encode('utf-8'))
+            timecodes_file.name = f"{title}_timecodes.txt"
+            # Send file and create a new message instead of editing
+            await query.message.reply_document(
+                document=timecodes_file,
+                caption=f"⏰ AI Timecodes for: {title}",
+                reply_markup=InlineKeyboardMarkup([[
+                    InlineKeyboardButton("📹 Back to Video", callback_data=f"back_to_analysis:{url}")
+                ]])
+            )
+            # Edit original message to show completion
+            await query.edit_message_text(
+                "✅ Timecodes file sent!\n\n📄 Check the file above for all timecodes.",
+                reply_markup=InlineKeyboardMarkup([[
+                    InlineKeyboardButton("📹 Back to Video", callback_data=f"back_to_analysis:{url}")
+                ]])
+            )
+        except Exception as e:
+            logger.error(f"Error sending full timecodes: {e}")
+            await query.edit_message_text("❌ An error occurred while preparing timecodes file.")
+    async def handle_back_to_video(self, query, url: str):
+        """Return to video analysis view"""
+        try:
+            # Check if the current message has text that can be edited
+            current_message = query.message
+            if not current_message or not current_message.text:
+                # If no text to edit, send a new message instead of editing
+                await query.answer("🔄 Loading video information...")
+                # Get basic video info
+                video_info_response = await self.call_mcp_action("video_info", {"video_id": url})
+                if not video_info_response or video_info_response.get("error"):
+                    await current_message.reply_text("❌ Could not analyze this YouTube video. Please check the URL.")
+                    return
+                video_data = video_info_response.get("data")
+                if not video_data:
+                    await current_message.reply_text("❌ Could not retrieve video information. Please check the URL.")
+                    return
+                # Format video info for display
+                info_text = self.format_video_info_from_data(video_data)
+                # Create action buttons
+                video_id = video_data.get("video_id", url)
+                safe_video_id = video_id[:30] if video_id else url[:30]
+                video_title = video_data.get('title', 'related videos')
+                safe_title = video_title[:30] if len(video_title) > 30 else video_title
+                keyboard = [
+                    [
+                        InlineKeyboardButton("📝 Get Transcript", callback_data=f"transcript:{safe_video_id}"),
+                        InlineKeyboardButton("⏰ AI Timecodes", callback_data=f"timecodes:{safe_video_id}")
+                    ],
+                    [
+                        InlineKeyboardButton("🔍 Search Similar", callback_data=f"search:{safe_title}")
+                    ]
+                ]
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                await current_message.reply_text(info_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
+                return
+            # If message has text, proceed with normal editing
+            await query.edit_message_text("🔄 Loading video information...")
+            # Re-analyze the video by calling handle_youtube_url logic
+            # Get basic video info first
+            video_info_response = await self.call_mcp_action("video_info", {"video_id": url})
+            # Check if we got a valid response
+            if not video_info_response:
+                await query.edit_message_text("❌ Could not analyze this YouTube video. Please check the URL.")
+                return
+            # Check for error in response
+            if video_info_response.get("error"):
+                await query.edit_message_text(f"❌ Error: {video_info_response['error']}")
+                return
+            # Check if we have video data
+            video_data = video_info_response.get("data")
+            if not video_data:
+                await query.edit_message_text("❌ Could not retrieve video information. Please check the URL.")
+                return
+            # Format video info for display
+            info_text = self.format_video_info_from_data(video_data)
+            # Create action buttons
+            video_id = video_data.get("video_id", url)
+            # Limit callback data to avoid Button_data_invalid error (Telegram limit is 64 bytes)
+            safe_video_id = video_id[:30] if video_id else url[:30]  # Limit video ID
+            video_title = video_data.get('title', 'related videos')
+            # Truncate title for search callback to fit in 64 byte limit
+            safe_title = video_title[:30] if len(video_title) > 30 else video_title
+            keyboard = [
+                [
+                    InlineKeyboardButton("📝 Get Transcript", callback_data=f"transcript:{safe_video_id}"),
+                    InlineKeyboardButton("⏰ AI Timecodes", callback_data=f"timecodes:{safe_video_id}")
+                ],
+                [
+                    InlineKeyboardButton("🔍 Search Similar", callback_data=f"search:{safe_title}")
+                ]
+            ]
+            reply_markup = InlineKeyboardMarkup(keyboard)
+            await query.edit_message_text(info_text, reply_markup=reply_markup, parse_mode=ParseMode.HTML)
+        except Exception as e:
+            logger.error(f"Error returning to video: {e}")
+            # If edit fails, try to send a new message
+            try:
+                await query.answer("❌ Could not load video information.")
+                await query.message.reply_text("❌ Could not load video information. Please try again.")
+            except Exception:
+                pass  # Ignore if this also fails
+    async def analyze_video(self, query, url: str):
+        """Analyze video for callback queries (alias for handle_back_to_video)"""
+        await self.handle_back_to_video(query, url)
     async def handle_search_callback(self, query, search_query: str):
+        """Handle search callback from inline keyboard"""
+        await query.edit_message_text(f"🔍 Searching for: {search_query}...")
+        try:
+            # Call MCP search action
+            search_response = await self.call_mcp_action("search", {
+                "query": search_query,
+                "max_results": 5
+            })
+            if not search_response:
+                await query.edit_message_text("❌ No results found for your search.")
+                return
+            # Format search results
+            if isinstance(search_response, list) and len(search_response) > 0:
+                results_text = f"🔍 **Search Results for:** {search_query}\n\n"
+                keyboard = []
+                for i, result in enumerate(search_response[:5], 1):
+                    if result.get("data"):
+                        video_data = result["data"]
+                        title = video_data.get("title", "Unknown Title")[:50]
+                        channel = video_data.get("channel_title", "Unknown Channel")
+                        video_id = video_data.get("video_id", "")
+                        results_text += f"**{i}.** {title}\n"
+                        results_text += f"👤 {channel}\n\n"
+                        # Add analyze button for each video
+                        if video_id:
+                            keyboard.append([InlineKeyboardButton(
+                                f"📹 Analyze Video {i}",
+                                callback_data=f"analyze:{video_id}"
+                            )])
+                # Add new search button
+                keyboard.append([InlineKeyboardButton("🔍 New Search", callback_data="new_search")])
+                reply_markup = InlineKeyboardMarkup(keyboard)
+                await query.edit_message_text(results_text, reply_markup=reply_markup, parse_mode=ParseMode.MARKDOWN)
+            else:
+                await query.edit_message_text("❌ No results found for your search.")
+        except Exception as e:
+            logger.error(f"Error in search callback: {e}")
+            await query.edit_message_text("❌ An error occurred during search.")
     async def call_mcp_action(self, action: str, params: dict) -> Optional[dict]:
         """Call MCP server action"""
         try:
+            # Configure timeout and connection settings
+            timeout = aiohttp.ClientTimeout(total=30, connect=10)
+            # Create connector without SSL for full SSL verification bypass
+            connector = aiohttp.TCPConnector(
+                limit=10,
+                limit_per_host=5,
+                ttl_dns_cache=300,
+                use_dns_cache=True,
+                enable_cleanup_closed=False,  # Disable cleanup to prevent errors
+                ssl=False
+            )
+            # Create session with full SSL bypass
+            async with aiohttp.ClientSession(
+                timeout=timeout,
+                connector=connector,
+                trust_env=True,
+                skip_auto_headers={'User-Agent'}
+            ) as session:
                 payload = {
                     "action": action,
                     "parameters": params
                 }
+                headers = {
+                    'Content-Type': 'application/json',
+                    'User-Agent': 'TubeMetaBot/1.0'
+                }
+                try:
+                    # Execute request with full SSL bypass
+                    async with session.post(
+                        MCP_BASE_URL,
+                        json=payload,
+                        headers=headers,
+                        ssl=False
+                    ) as response:
+                        if response.status == 200:
+                            result = await response.json()
+                            logger.info(f"MCP request successful: {action}")
+                            return result
+                        else:
+                            logger.error(f"MCP server error: {response.status}")
+                            return None
+                except aiohttp.ClientConnectorError as e:
+                    logger.error(f"Connection error: {e}")
+                    return None
+                except asyncio.TimeoutError as e:
+                    logger.error(f"Timeout error: {e}")
+                    return None
+                except Exception as e:
+                    logger.error(f"Request error: {e}")
+                    return None
         except Exception as e:
             logger.error(f"Error calling MCP server: {e}")
             return None
     def format_video_info(self, video_info: dict) -> str:
+        """Format video information for display using HTML"""
         title = video_info.get("title", "Unknown Title")
         channel = video_info.get("channel", "Unknown Channel")
         duration = video_info.get("duration", "Unknown")
         view_count = video_info.get("view_count", "Unknown")
         upload_date = video_info.get("upload_date", "Unknown")
         description = video_info.get("description", "")
         # Truncate description if too long
         if len(description) > 200:
             description = description[:200] + "..."
+        info_text = f"""🎬 <b>{title}</b>
+👤 <b>Channel:</b> {channel}
+⏱️ <b>Duration:</b> {duration}
+👁️ <b>Views:</b> {view_count}
+📅 <b>Uploaded:</b> {upload_date}
+📝 <b>Description:</b>
 {description}
 Choose an action below:"""
         return info_text
+    async def handle_analyze(self, update: Update):
+        """Handle analyze request"""
+        await update.message.reply_text("📺 Please send me a YouTube URL to analyze.\n\nExample: `https://youtu.be/dQw4w9WgXcQ`", parse_mode=ParseMode.MARKDOWN)
     async def run(self):
         """Start the bot"""
         logger.info("Starting TubeMeta Bot...")
         try:
+            await self.app.initialize()
+            await self.app.start()
+            await self.app.updater.start_polling(drop_pending_updates=True)
+            logger.info(f"Bot successfully started! MCP URL: {MCP_BASE_URL}")
             # Keep the bot running
             await asyncio.Event().wait()
         except KeyboardInterrupt:
+            logger.info("Shutting down bot due to keyboard interrupt...")
+        except Exception as e:
+            logger.error(f"Error in bot operation: {e}")
         finally:
+            # Graceful shutdown
+            logger.info("Shutting down bot...")
+            try:
+                await self.app.updater.stop()
+                await self.app.stop()
+                await self.app.shutdown()
+                logger.info("Bot shutdown complete")
+            except Exception as e:
+                logger.error(f"Error during shutdown: {e}")
+    def convert_markdown_to_html(self, text: str) -> str:
+        """Convert markdown formatting to HTML for Telegram."""
+        # Convert code blocks (```) to HTML
+        import re
+        # Replace triple backticks with HTML pre tags
+        text = re.sub(r'```\n(.*?)\n```', r'<pre>\1</pre>', text, flags=re.DOTALL)
+        # Convert **bold** to HTML
+        text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
+        # Convert *italic* to HTML
+        text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
+        # Convert inline code `code` to HTML
+        text = re.sub(r'`(.*?)`', r'<code>\1</code>', text)
+        return text
 async def main():
     """Main function"""
     await bot.run()
 if __name__ == "__main__":
+    asyncio.run(main())

update_server.bat DELETED Viewed

@@ -1,43 +0,0 @@
-@echo off
-rem Batch скрипт для обновления Windows сервера
-echo 🔄 Обновление сервера...
-rem 1. Остановка текущих процессов
-echo ⏹️ Остановка текущих процессов...
-taskkill /f /im python.exe >nul 2>&1
-rem 2. Обновление кода
-echo 📥 Получение последних изменений...
-git pull
-if %ERRORLEVEL% NEQ 0 (
-    echo ❌ Ошибка при git pull
-    pause
-    exit /b 1
-)
-rem 3. Проверка и установка зависимостей
-echo 📦 Проверка зависимостей...
-pip install -r requirements.txt --quiet
-pip install -r telegram_requirements.txt --quiet
-rem 4. Запуск MCP сервера в фоне
-echo 🚀 Запуск MCP сервера...
-start /B python main.py --mode api --host 0.0.0.0 --port 8080 > mcp_server.log 2>&1
-rem 5. Ждем 3 секунды
-timeout /t 3 /nobreak >nul
-rem 6. Запуск Telegram бота в фоне
-echo 🤖 Запуск Telegram бота...
-start /B python run_telegram_bot.py > telegram_bot.log 2>&1
-echo.
-echo ✅ Сервер обновлен и перезапущен!
-echo 📋 Процессы запущены в фоне
-echo.
-echo 📊 Для просмотра процессов: tasklist | findstr python
-echo 📊 Для просмотра логов:
-echo   type mcp_server.log
-echo   type telegram_bot.log
-echo.
-pause

update_server.ps1 DELETED Viewed

@@ -1,54 +0,0 @@
-# PowerShell скрипт для обновления Windows сервера
-# Usage: .\update_server.ps1
-Write-Host "🔄 Обновление сервера..." -ForegroundColor Yellow
-# 1. Остановка текущих процессов
-Write-Host "⏹️ Остановка текущих процессов..." -ForegroundColor Blue
-Get-Process | Where-Object {$_.ProcessName -like "*python*" -and $_.CommandLine -like "*main.py*"} | Stop-Process -Force -ErrorAction SilentlyContinue
-Get-Process | Where-Object {$_.ProcessName -like "*python*" -and $_.CommandLine -like "*run_telegram_bot.py*"} | Stop-Process -Force -ErrorAction SilentlyContinue
-# Альтернативный способ остановки процессов
-taskkill /f /im python.exe 2>$null
-Write-Host "✅ Процессы остановлены" -ForegroundColor Green
-# 2. Обновление кода
-Write-Host "📥 Получение последних изменений..." -ForegroundColor Blue
-git pull
-if ($LASTEXITCODE -ne 0) {
-    Write-Host "❌ Ошибка при git pull" -ForegroundColor Red
-    exit 1
-}
-# 3. Проверка и установка зависимостей
-Write-Host "📦 Проверка зависимостей..." -ForegroundColor Blue
-pip install -r requirements.txt --quiet
-pip install -r telegram_requirements.txt --quiet
-# 4. Запуск MCP сервера в фоне
-Write-Host "🚀 Запуск MCP сервера..." -ForegroundColor Blue
-$mcpJob = Start-Process python -ArgumentList "main.py --mode api --host 0.0.0.0 --port 8080" -PassThru -WindowStyle Hidden -RedirectStandardOutput "mcp_server.log" -RedirectStandardError "mcp_server_error.log"
-Write-Host "MCP сервер запущен с PID: $($mcpJob.Id)" -ForegroundColor Green
-# 5. Ждем 3 секунды для запуска MCP сервера
-Start-Sleep -Seconds 3
-# 6. Запуск Telegram бота в фоне
-Write-Host "🤖 Запуск Telegram бота..." -ForegroundColor Blue
-$botJob = Start-Process python -ArgumentList "run_telegram_bot.py" -PassThru -WindowStyle Hidden -RedirectStandardOutput "telegram_bot.log" -RedirectStandardError "telegram_bot_error.log"
-Write-Host "Telegram бот запущен с PID: $($botJob.Id)" -ForegroundColor Green
-Write-Host ""
-Write-Host "✅ Сервер обновлен и перезапущен!" -ForegroundColor Green
-Write-Host "📋 Процессы:" -ForegroundColor Yellow
-Write-Host "  MCP сервер PID: $($mcpJob.Id)" -ForegroundColor White
-Write-Host "  Telegram бот PID: $($botJob.Id)" -ForegroundColor White
-Write-Host ""
-Write-Host "📊 Для просмотра логов:" -ForegroundColor Yellow
-Write-Host "  MCP сервер: Get-Content mcp_server.log -Wait" -ForegroundColor White
-Write-Host "  Telegram бот: Get-Content telegram_bot.log -Wait" -ForegroundColor White
-Write-Host ""
-Write-Host "🔍 Для проверки процессов:" -ForegroundColor Yellow
-Write-Host "  Get-Process python" -ForegroundColor White

update_server.sh DELETED Viewed

@@ -1,42 +0,0 @@
-#!/bin/bash
-# Скрипт для обновления сервера после git push
-echo "🔄 Обновление сервера..."
-# 1. Остановка текущих процессов
-echo "⏹️ Остановка текущих процессов..."
-pkill -f "main.py" || echo "MCP сервер не был запущен"
-pkill -f "run_telegram_bot.py" || echo "Telegram бот не был запущен"
-# 2. Обновление кода
-echo "📥 Получение последних изменений..."
-git pull
-# 3. Проверка и установка зависимостей
-echo "📦 Проверка зависимостей..."
-pip install -r requirements.txt --quiet
-pip install -r telegram_requirements.txt --quiet
-# 4. Запуск MCP сервера в фоне
-echo "🚀 Запуск MCP сервера..."
-nohup python main.py --mode api --host 0.0.0.0 --port 8080 > mcp_server.log 2>&1 &
-MCP_PID=$!
-echo "MCP сервер запущен с PID: $MCP_PID"
-# 5. Ждем 3 секунды для запуска MCP сервера
-sleep 3
-# 6. Запуск Telegram бота в фоне
-echo "🤖 Запуск Telegram бота..."
-nohup python run_telegram_bot.py > telegram_bot.log 2>&1 &
-BOT_PID=$!
-echo "Telegram бот запущен с PID: $BOT_PID"
-echo "✅ Сервер обновлен и перезапущен!"
-echo "📋 Процессы:"
-echo "  MCP сервер PID: $MCP_PID"
-echo "  Telegram бот PID: $BOT_PID"
-echo ""
-echo "📊 Для просмотра логов:"
-echo "  MCP сервер: tail -f mcp_server.log"
-echo "  Telegram бот: tail -f telegram_bot.log"

update_tunnel.py DELETED Viewed

@@ -1,56 +0,0 @@
-#!/usr/bin/env python3
-"""
-Script to update MCP server URL when LocalTunnel changes
-Usage: python update_tunnel.py <new_tunnel_url>
-"""
-import sys
-import re
-def update_tunnel_url(new_url):
-    """Update the MCP_BASE_URL in telegram_config.env"""
-    # Validate URL format
-    if not new_url.startswith('https://') or '.loca.lt' not in new_url:
-        print("❌ Invalid URL format. Expected: https://xxx.loca.lt")
-        return False
-    # Add /api/mcp if not present
-    if not new_url.endswith('/api/mcp'):
-        if new_url.endswith('/'):
-            new_url = new_url + 'api/mcp'
-        else:
-            new_url = new_url + '/api/mcp'
-    try:
-        # Read current config
-        with open('telegram_config.env', 'r', encoding='utf-8') as f:
-            content = f.read()
-        # Update MCP_BASE_URL
-        updated_content = re.sub(
-            r'MCP_BASE_URL=.*',
-            f'MCP_BASE_URL={new_url}',
-            content
-        )
-        # Write back
-        with open('telegram_config.env', 'w', encoding='utf-8') as f:
-            f.write(updated_content)
-        print(f"✅ Updated MCP_BASE_URL to: {new_url}")
-        print("🔄 Please restart the Telegram bot to apply changes")
-        return True
-    except Exception as e:
-        print(f"❌ Error updating config: {e}")
-        return False
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python update_tunnel.py <new_tunnel_url>")
-        print("Example: python update_tunnel.py https://abc-def-ghi.loca.lt")
-        sys.exit(1)
-    new_url = sys.argv[1]
-    update_tunnel_url(new_url)