Badro commited on
Commit
d7f8c20
·
0 Parent(s):

Saving local versions of Clip-Engine core files

Browse files
Files changed (3) hide show
  1. .gitattributes +35 -0
  2. README.md +12 -0
  3. app.py +133 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Clip Engine
3
+ emoji: ⚡
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.33.2
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pytube import YouTube
3
+ from textblob import TextBlob
4
+ from transformers import pipeline
5
+ import logging
6
+ import os
7
+
8
+ # Configure basic logging
9
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
+
11
+ # Initialize sentiment analysis pipeline (once, to save resources)
12
+ # Using a specific model for potentially better results or if default is too large/slow
13
+ # You might need to adjust the model based on availability and performance on HF Spaces free tier
14
+ SENTIMENT_MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
15
+ sentiment_analyzer = None
16
+ try:
17
+ logging.info(f"Attempting to load sentiment analysis pipeline: {SENTIMENT_MODEL_NAME}...")
18
+ # Specify a cache directory within the Space's writable area if needed
19
+ # cache_dir = "/data/.cache/huggingface/transformers" # Example for some HF environments
20
+ # os.makedirs(cache_dir, exist_ok=True)
21
+ sentiment_analyzer = pipeline("sentiment-analysis", model=SENTIMENT_MODEL_NAME) #, cache_dir=cache_dir)
22
+ logging.info(f"Sentiment analysis pipeline '{SENTIMENT_MODEL_NAME}' loaded successfully.")
23
+ except Exception as e:
24
+ logging.warning(f"Failed to load {SENTIMENT_MODEL_NAME}: {e}. Falling back to default sentiment model.")
25
+ try:
26
+ sentiment_analyzer = pipeline("sentiment-analysis") #, cache_dir=cache_dir)
27
+ logging.info("Default sentiment analysis pipeline loaded successfully.")
28
+ except Exception as e_default:
29
+ logging.error(f"Failed to load default sentiment analysis pipeline: {e_default}")
30
+ # sentiment_analyzer will remain None
31
+
32
+
33
+ def analyze_youtube_content(youtube_url: str = "", transcript_text: str = "") -> dict:
34
+ """
35
+ Analyzes YouTube video content.
36
+ If a YouTube URL is provided, it attempts to fetch video information (e.g., title, views, length).
37
+ If transcript text is provided, it performs sentiment analysis on the text using
38
+ TextBlob and a Hugging Face transformer model.
39
+
40
+ Args:
41
+ youtube_url (str, optional): The URL of the YouTube video. Defaults to "".
42
+ transcript_text (str, optional): The transcript text of the video. Defaults to "".
43
+
44
+ Returns:
45
+ dict: A dictionary containing analysis results.
46
+ Includes 'video_info' if URL is processed and 'sentiment_analysis' if transcript is processed.
47
+ """
48
+ results = {}
49
+ logging.info(f"Tool 'analyze_youtube_content' called with URL: '{youtube_url}', Transcript provided: {bool(transcript_text)}")
50
+
51
+ if not youtube_url and not transcript_text:
52
+ logging.warning("No YouTube URL or transcript text provided.")
53
+ return {"error": "No YouTube URL or transcript text provided for analysis."}
54
+
55
+ if youtube_url:
56
+ try:
57
+ yt = YouTube(youtube_url)
58
+ results["video_info"] = {
59
+ "title": yt.title,
60
+ "views": yt.views,
61
+ "length_seconds": yt.length,
62
+ "publish_date": yt.publish_date.strftime('%Y-%m-%d') if yt.publish_date else None,
63
+ "status": "success"
64
+ }
65
+ logging.info(f"Successfully fetched info for video: {yt.title}")
66
+ except Exception as e:
67
+ logging.error(f"Error fetching video info from URL '{youtube_url}': {e}")
68
+ results["video_info"] = {
69
+ "status": "error",
70
+ "message": f"Could not fetch video info: {str(e)}"
71
+ }
72
+
73
+ if transcript_text:
74
+ analysis_data = {}
75
+ # TextBlob sentiment
76
+ try:
77
+ blob = TextBlob(transcript_text)
78
+ tb_sentiment = blob.sentiment
79
+ analysis_data["textblob"] = {
80
+ "polarity": round(tb_sentiment.polarity, 3),
81
+ "subjectivity": round(tb_sentiment.subjectivity, 3),
82
+ "assessment": "positive" if tb_sentiment.polarity > 0.05 else "negative" if tb_sentiment.polarity < -0.05 else "neutral"
83
+ }
84
+ logging.info("TextBlob sentiment analysis complete.")
85
+ except Exception as e:
86
+ logging.error(f"Error during TextBlob sentiment analysis: {e}")
87
+ analysis_data["textblob"] = {"error": str(e)}
88
+
89
+ # Hugging Face sentiment
90
+ if sentiment_analyzer:
91
+ try:
92
+ # Truncate for performance and model limits (default for many models is 512 tokens)
93
+ max_length_chars = 1000 # Heuristic, actual token limit is what matters
94
+ truncated_text = transcript_text[:max_length_chars]
95
+ hf_result = sentiment_analyzer(truncated_text)[0]
96
+ analysis_data["huggingface_transformer"] = {
97
+ "label": hf_result["label"],
98
+ "score": round(hf_result["score"], 3)
99
+ }
100
+ if len(transcript_text) > max_length_chars:
101
+ analysis_data["huggingface_transformer"]["note"] = f"Analyzed approximately the first {max_length_chars} characters of the transcript."
102
+ logging.info("Hugging Face transformer sentiment analysis complete.")
103
+ except Exception as e:
104
+ logging.error(f"Error during Hugging Face sentiment analysis: {e}")
105
+ analysis_data["huggingface_transformer"] = {"error": str(e)}
106
+ else:
107
+ analysis_data["huggingface_transformer"] = {"error": "Hugging Face sentiment analyzer not loaded."}
108
+ logging.warning("Hugging Face sentiment analyzer was not available for analysis.")
109
+
110
+ results["sentiment_analysis"] = analysis_data
111
+
112
+ if not results:
113
+ return {"status": "No analysis performed, though input was provided. Check logs."}
114
+
115
+ return results
116
+
117
+ # Create Gradio interface for the tool.
118
+ youtube_tool_interface = gr.Interface(
119
+ fn=analyze_youtube_content,
120
+ inputs=[
121
+ gr.Textbox(label="YouTube Video URL (Optional)", placeholder="Enter YouTube video URL..."),
122
+ gr.Textbox(label="Video Transcript Text (Optional)", placeholder="Paste video transcript here...", lines=5)
123
+ ],
124
+ outputs=gr.JSON(label="Analysis Result"),
125
+ title="YouTube Content Analyzer Tool",
126
+ description="Provides information and sentiment analysis for a YouTube video URL or its transcript. (For Agent Use via MCP)"
127
+ )
128
+
129
+ # Launch the Gradio app with the MCP server enabled.
130
+ if __name__ == "__main__":
131
+ logging.info("Launching Gradio app with MCP server enabled for the YouTube Content Analyzer Tool...")
132
+ # The `mcp_server=True` flag is crucial for the agent to connect and use the tool.
133
+ youtube_tool_interface.launch(mcp_server=True)