Spaces:
Runtime error
Runtime error
Update app.py (#44)
Browse files- Update app.py (5d53f733bed47a5e0f1b900a9e3bd7ea639ec15b)
app.py
CHANGED
|
@@ -14,7 +14,6 @@ import requests
|
|
| 14 |
GROQ_API_KEY = "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"
|
| 15 |
GROQ_MODEL = "llama3-70b-8192"
|
| 16 |
|
| 17 |
-
# Load BLIP and SentenceTransformer
|
| 18 |
@st.cache_resource(show_spinner=False)
|
| 19 |
def load_models():
|
| 20 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
|
@@ -24,7 +23,6 @@ def load_models():
|
|
| 24 |
|
| 25 |
processor, blip_model, embedder = load_models()
|
| 26 |
|
| 27 |
-
# Frame Extraction
|
| 28 |
def extract_frames(video_path, interval_sec=1):
|
| 29 |
cap = cv2.VideoCapture(video_path)
|
| 30 |
fps = cap.get(cv2.CAP_PROP_FPS) or 30
|
|
@@ -41,7 +39,6 @@ def extract_frames(video_path, interval_sec=1):
|
|
| 41 |
cap.release()
|
| 42 |
return frames
|
| 43 |
|
| 44 |
-
# Caption using BLIP
|
| 45 |
def caption_frame(image, processor, model):
|
| 46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 47 |
model.to(device)
|
|
@@ -50,7 +47,6 @@ def caption_frame(image, processor, model):
|
|
| 50 |
caption = processor.decode(out[0], skip_special_tokens=True)
|
| 51 |
return caption
|
| 52 |
|
| 53 |
-
# Call Groq API
|
| 54 |
def call_groq_api(prompt):
|
| 55 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
| 56 |
headers = {
|
|
@@ -60,7 +56,7 @@ def call_groq_api(prompt):
|
|
| 60 |
data = {
|
| 61 |
"model": GROQ_MODEL,
|
| 62 |
"messages": [
|
| 63 |
-
{"role": "system", "content": "You are an expert
|
| 64 |
{"role": "user", "content": prompt}
|
| 65 |
],
|
| 66 |
"temperature": 0.7,
|
|
@@ -69,7 +65,6 @@ def call_groq_api(prompt):
|
|
| 69 |
response.raise_for_status()
|
| 70 |
return response.json()["choices"][0]["message"]["content"]
|
| 71 |
|
| 72 |
-
# Create FAISS index
|
| 73 |
def create_faiss_index(texts):
|
| 74 |
embeddings = embedder.encode(texts, convert_to_numpy=True)
|
| 75 |
dim = embeddings.shape[1]
|
|
@@ -77,30 +72,26 @@ def create_faiss_index(texts):
|
|
| 77 |
index.add(embeddings)
|
| 78 |
return index, embeddings
|
| 79 |
|
| 80 |
-
# Search RAG facts
|
| 81 |
def rag_search(query, index, texts, embeddings, top_k=3):
|
| 82 |
query_vec = embedder.encode([query], convert_to_numpy=True)
|
| 83 |
D, I = index.search(query_vec, top_k)
|
| 84 |
return [texts[i] for i in I[0]]
|
| 85 |
|
| 86 |
-
# Knowledge base
|
| 87 |
knowledge_base = [
|
| 88 |
-
"
|
| 89 |
-
"
|
| 90 |
-
"
|
| 91 |
-
"
|
| 92 |
-
"
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"A four is when the ball reaches the boundary after bouncing at least once."
|
| 96 |
]
|
| 97 |
|
| 98 |
index, embeddings = create_faiss_index(knowledge_base)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
st.title("π Cricket Video Analyzer with RAG, BLIP & Groq")
|
| 102 |
|
| 103 |
-
uploaded_file = st.file_uploader("π Upload
|
| 104 |
|
| 105 |
if uploaded_file is not None:
|
| 106 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
|
|
@@ -120,27 +111,27 @@ if uploaded_file is not None:
|
|
| 120 |
for i, frame in enumerate(frames):
|
| 121 |
caption = caption_frame(frame, processor, blip_model)
|
| 122 |
captions.append(f"Scene {i+1}: {caption}")
|
| 123 |
-
st.image(frame, caption=captions[-1],
|
| 124 |
|
| 125 |
combined_description = " ".join(captions)
|
| 126 |
|
| 127 |
-
st.info("π Performing RAG search for
|
| 128 |
facts = rag_search(combined_description, index, knowledge_base, embeddings)
|
| 129 |
|
| 130 |
-
st.subheader("π
|
| 131 |
for fact in facts:
|
| 132 |
st.markdown(f"- {fact}")
|
| 133 |
|
| 134 |
-
st.info("π€ Generating summary using Groq
|
| 135 |
prompt = (
|
| 136 |
-
"
|
| 137 |
+ "\n".join(captions)
|
| 138 |
-
+ "\n\
|
| 139 |
)
|
| 140 |
|
| 141 |
try:
|
| 142 |
summary = call_groq_api(prompt)
|
| 143 |
-
st.subheader("π
|
| 144 |
st.write(summary)
|
| 145 |
except Exception as e:
|
| 146 |
st.error(f"Groq API Error: {e}")
|
|
|
|
| 14 |
GROQ_API_KEY = "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"
|
| 15 |
GROQ_MODEL = "llama3-70b-8192"
|
| 16 |
|
|
|
|
| 17 |
@st.cache_resource(show_spinner=False)
|
| 18 |
def load_models():
|
| 19 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
|
|
|
| 23 |
|
| 24 |
processor, blip_model, embedder = load_models()
|
| 25 |
|
|
|
|
| 26 |
def extract_frames(video_path, interval_sec=1):
|
| 27 |
cap = cv2.VideoCapture(video_path)
|
| 28 |
fps = cap.get(cv2.CAP_PROP_FPS) or 30
|
|
|
|
| 39 |
cap.release()
|
| 40 |
return frames
|
| 41 |
|
|
|
|
| 42 |
def caption_frame(image, processor, model):
|
| 43 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 44 |
model.to(device)
|
|
|
|
| 47 |
caption = processor.decode(out[0], skip_special_tokens=True)
|
| 48 |
return caption
|
| 49 |
|
|
|
|
| 50 |
def call_groq_api(prompt):
|
| 51 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
| 52 |
headers = {
|
|
|
|
| 56 |
data = {
|
| 57 |
"model": GROQ_MODEL,
|
| 58 |
"messages": [
|
| 59 |
+
{"role": "system", "content": "You are an expert video summarizer and scene analyzer."},
|
| 60 |
{"role": "user", "content": prompt}
|
| 61 |
],
|
| 62 |
"temperature": 0.7,
|
|
|
|
| 65 |
response.raise_for_status()
|
| 66 |
return response.json()["choices"][0]["message"]["content"]
|
| 67 |
|
|
|
|
| 68 |
def create_faiss_index(texts):
|
| 69 |
embeddings = embedder.encode(texts, convert_to_numpy=True)
|
| 70 |
dim = embeddings.shape[1]
|
|
|
|
| 72 |
index.add(embeddings)
|
| 73 |
return index, embeddings
|
| 74 |
|
|
|
|
| 75 |
def rag_search(query, index, texts, embeddings, top_k=3):
|
| 76 |
query_vec = embedder.encode([query], convert_to_numpy=True)
|
| 77 |
D, I = index.search(query_vec, top_k)
|
| 78 |
return [texts[i] for i in I[0]]
|
| 79 |
|
|
|
|
| 80 |
knowledge_base = [
|
| 81 |
+
"People cheer when something exciting happens in a video.",
|
| 82 |
+
"A person scoring or celebrating indicates a major event.",
|
| 83 |
+
"When people run or point, it's likely something important occurred.",
|
| 84 |
+
"Spectators usually react to notable plays or events.",
|
| 85 |
+
"Gestures and facial expressions convey emotions in a scene.",
|
| 86 |
+
"A presenter with a mic is likely commenting on a performance.",
|
| 87 |
+
"Slow motion replays are usually shown for critical events."
|
|
|
|
| 88 |
]
|
| 89 |
|
| 90 |
index, embeddings = create_faiss_index(knowledge_base)
|
| 91 |
|
| 92 |
+
st.title("π₯ General Video Analyzer with RAG, BLIP & Groq")
|
|
|
|
| 93 |
|
| 94 |
+
uploaded_file = st.file_uploader("π Upload any video file", type=["mp4", "mov", "avi"])
|
| 95 |
|
| 96 |
if uploaded_file is not None:
|
| 97 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
|
|
|
|
| 111 |
for i, frame in enumerate(frames):
|
| 112 |
caption = caption_frame(frame, processor, blip_model)
|
| 113 |
captions.append(f"Scene {i+1}: {caption}")
|
| 114 |
+
st.image(frame, caption=captions[-1], use_container_width=True)
|
| 115 |
|
| 116 |
combined_description = " ".join(captions)
|
| 117 |
|
| 118 |
+
st.info("π Performing RAG search for contextual facts...")
|
| 119 |
facts = rag_search(combined_description, index, knowledge_base, embeddings)
|
| 120 |
|
| 121 |
+
st.subheader("π Related Insights:")
|
| 122 |
for fact in facts:
|
| 123 |
st.markdown(f"- {fact}")
|
| 124 |
|
| 125 |
+
st.info("π€ Generating detailed summary using Groq...")
|
| 126 |
prompt = (
|
| 127 |
+
"Here are descriptions of scenes from a video:\n\n"
|
| 128 |
+ "\n".join(captions)
|
| 129 |
+
+ "\n\nGive a structured summary mentioning key events, people involved, and possible activities."
|
| 130 |
)
|
| 131 |
|
| 132 |
try:
|
| 133 |
summary = call_groq_api(prompt)
|
| 134 |
+
st.subheader("π Video Summary:")
|
| 135 |
st.write(summary)
|
| 136 |
except Exception as e:
|
| 137 |
st.error(f"Groq API Error: {e}")
|