Muthuraja18 commited on
Commit
30834f3
Β·
verified Β·
1 Parent(s): 14332cd
Files changed (1) hide show
  1. app.py +17 -26
app.py CHANGED
@@ -14,7 +14,6 @@ import requests
14
  GROQ_API_KEY = "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"
15
  GROQ_MODEL = "llama3-70b-8192"
16
 
17
- # Load BLIP and SentenceTransformer
18
  @st.cache_resource(show_spinner=False)
19
  def load_models():
20
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -24,7 +23,6 @@ def load_models():
24
 
25
  processor, blip_model, embedder = load_models()
26
 
27
- # Frame Extraction
28
  def extract_frames(video_path, interval_sec=1):
29
  cap = cv2.VideoCapture(video_path)
30
  fps = cap.get(cv2.CAP_PROP_FPS) or 30
@@ -41,7 +39,6 @@ def extract_frames(video_path, interval_sec=1):
41
  cap.release()
42
  return frames
43
 
44
- # Caption using BLIP
45
  def caption_frame(image, processor, model):
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  model.to(device)
@@ -50,7 +47,6 @@ def caption_frame(image, processor, model):
50
  caption = processor.decode(out[0], skip_special_tokens=True)
51
  return caption
52
 
53
- # Call Groq API
54
  def call_groq_api(prompt):
55
  url = "https://api.groq.com/openai/v1/chat/completions"
56
  headers = {
@@ -60,7 +56,7 @@ def call_groq_api(prompt):
60
  data = {
61
  "model": GROQ_MODEL,
62
  "messages": [
63
- {"role": "system", "content": "You are an expert cricket video summarizer and analyst."},
64
  {"role": "user", "content": prompt}
65
  ],
66
  "temperature": 0.7,
@@ -69,7 +65,6 @@ def call_groq_api(prompt):
69
  response.raise_for_status()
70
  return response.json()["choices"][0]["message"]["content"]
71
 
72
- # Create FAISS index
73
  def create_faiss_index(texts):
74
  embeddings = embedder.encode(texts, convert_to_numpy=True)
75
  dim = embeddings.shape[1]
@@ -77,30 +72,26 @@ def create_faiss_index(texts):
77
  index.add(embeddings)
78
  return index, embeddings
79
 
80
- # Search RAG facts
81
  def rag_search(query, index, texts, embeddings, top_k=3):
82
  query_vec = embedder.encode([query], convert_to_numpy=True)
83
  D, I = index.search(query_vec, top_k)
84
  return [texts[i] for i in I[0]]
85
 
86
- # Knowledge base
87
  knowledge_base = [
88
- "A six is when a batsman hits the ball over the boundary without it touching the ground.",
89
- "An out occurs when a batsman is dismissed by the fielding team.",
90
- "The umpire raises a finger to signal an out.",
91
- "When a player scores a century, the crowd often stands and applauds.",
92
- "A bowler appeals by shouting if they believe a batsman is out.",
93
- "The third umpire is used for reviewing close decisions.",
94
- "Fielders celebrate when a wicket is taken.",
95
- "A four is when the ball reaches the boundary after bouncing at least once."
96
  ]
97
 
98
  index, embeddings = create_faiss_index(knowledge_base)
99
 
100
- # Streamlit App
101
- st.title("🏏 Cricket Video Analyzer with RAG, BLIP & Groq")
102
 
103
- uploaded_file = st.file_uploader("πŸ“ Upload a video file", type=["mp4", "mov", "avi"])
104
 
105
  if uploaded_file is not None:
106
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
@@ -120,27 +111,27 @@ if uploaded_file is not None:
120
  for i, frame in enumerate(frames):
121
  caption = caption_frame(frame, processor, blip_model)
122
  captions.append(f"Scene {i+1}: {caption}")
123
- st.image(frame, caption=captions[-1], use_column_width=True)
124
 
125
  combined_description = " ".join(captions)
126
 
127
- st.info("πŸ” Performing RAG search for factual insights...")
128
  facts = rag_search(combined_description, index, knowledge_base, embeddings)
129
 
130
- st.subheader("πŸ“Œ Retrieved Relevant Cricket Facts:")
131
  for fact in facts:
132
  st.markdown(f"- {fact}")
133
 
134
- st.info("πŸ€– Generating summary using Groq's LLaMA 3...")
135
  prompt = (
136
- "Analyze the following cricket scenes and give a detailed summary:\n\n"
137
  + "\n".join(captions)
138
- + "\n\nAlso incorporate cricket facts if relevant."
139
  )
140
 
141
  try:
142
  summary = call_groq_api(prompt)
143
- st.subheader("πŸ“ AI Summary of Video:")
144
  st.write(summary)
145
  except Exception as e:
146
  st.error(f"Groq API Error: {e}")
 
14
  GROQ_API_KEY = "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"
15
  GROQ_MODEL = "llama3-70b-8192"
16
 
 
17
  @st.cache_resource(show_spinner=False)
18
  def load_models():
19
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 
23
 
24
  processor, blip_model, embedder = load_models()
25
 
 
26
  def extract_frames(video_path, interval_sec=1):
27
  cap = cv2.VideoCapture(video_path)
28
  fps = cap.get(cv2.CAP_PROP_FPS) or 30
 
39
  cap.release()
40
  return frames
41
 
 
42
  def caption_frame(image, processor, model):
43
  device = "cuda" if torch.cuda.is_available() else "cpu"
44
  model.to(device)
 
47
  caption = processor.decode(out[0], skip_special_tokens=True)
48
  return caption
49
 
 
50
  def call_groq_api(prompt):
51
  url = "https://api.groq.com/openai/v1/chat/completions"
52
  headers = {
 
56
  data = {
57
  "model": GROQ_MODEL,
58
  "messages": [
59
+ {"role": "system", "content": "You are an expert video summarizer and scene analyzer."},
60
  {"role": "user", "content": prompt}
61
  ],
62
  "temperature": 0.7,
 
65
  response.raise_for_status()
66
  return response.json()["choices"][0]["message"]["content"]
67
 
 
68
  def create_faiss_index(texts):
69
  embeddings = embedder.encode(texts, convert_to_numpy=True)
70
  dim = embeddings.shape[1]
 
72
  index.add(embeddings)
73
  return index, embeddings
74
 
 
75
  def rag_search(query, index, texts, embeddings, top_k=3):
76
  query_vec = embedder.encode([query], convert_to_numpy=True)
77
  D, I = index.search(query_vec, top_k)
78
  return [texts[i] for i in I[0]]
79
 
 
80
  knowledge_base = [
81
+ "People cheer when something exciting happens in a video.",
82
+ "A person scoring or celebrating indicates a major event.",
83
+ "When people run or point, it's likely something important occurred.",
84
+ "Spectators usually react to notable plays or events.",
85
+ "Gestures and facial expressions convey emotions in a scene.",
86
+ "A presenter with a mic is likely commenting on a performance.",
87
+ "Slow motion replays are usually shown for critical events."
 
88
  ]
89
 
90
  index, embeddings = create_faiss_index(knowledge_base)
91
 
92
+ st.title("πŸŽ₯ General Video Analyzer with RAG, BLIP & Groq")
 
93
 
94
+ uploaded_file = st.file_uploader("πŸ“ Upload any video file", type=["mp4", "mov", "avi"])
95
 
96
  if uploaded_file is not None:
97
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
 
111
  for i, frame in enumerate(frames):
112
  caption = caption_frame(frame, processor, blip_model)
113
  captions.append(f"Scene {i+1}: {caption}")
114
+ st.image(frame, caption=captions[-1], use_container_width=True)
115
 
116
  combined_description = " ".join(captions)
117
 
118
+ st.info("πŸ” Performing RAG search for contextual facts...")
119
  facts = rag_search(combined_description, index, knowledge_base, embeddings)
120
 
121
+ st.subheader("πŸ“Œ Related Insights:")
122
  for fact in facts:
123
  st.markdown(f"- {fact}")
124
 
125
+ st.info("πŸ€– Generating detailed summary using Groq...")
126
  prompt = (
127
+ "Here are descriptions of scenes from a video:\n\n"
128
  + "\n".join(captions)
129
+ + "\n\nGive a structured summary mentioning key events, people involved, and possible activities."
130
  )
131
 
132
  try:
133
  summary = call_groq_api(prompt)
134
+ st.subheader("πŸ“ Video Summary:")
135
  st.write(summary)
136
  except Exception as e:
137
  st.error(f"Groq API Error: {e}")