Spaces:

Hanzo03
/

RAG

Sleeping

App Files Files Community

Hanzo03 commited on Dec 17, 2025

Commit

ccdd4a4

1 Parent(s): e977014

inatial commit

Browse files

Files changed (14) hide show

.env +1 -0
.gitattributes +10 -0
.python-version +1 -0
app.py +106 -0
main.py +105 -0
modules/__pycache__/rag_indexer.cpython-312.pyc +0 -0
modules/__pycache__/rag_query.cpython-312.pyc +0 -0
modules/__pycache__/video_analyzer.cpython-312.pyc +0 -0
modules/rag_indexer.py +86 -0
modules/rag_query.py +87 -0
modules/video_analyzer.py +92 -0
pyproject.toml +24 -0
req.txt +12 -0
uv.lock +0 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ GEMINI_API_KEY = "your_gemini_api_key_here"

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# ======================== app.py ========================
+import gradio as gr
+import os
+import json
+import shutil # Used for file operations
+# Import the core functions from your existing files
+from modules.video_analyzer import analyze_video_for_ppe
+from modules.rag_indexer import index_analysis_data
+from modules.rag_query import run_query
+# Configuration (Keep consistent with rag_indexer.py and rag_query.py)
+VIDEO_FILENAME = "uploaded_video.mp4" # Temp name for the uploaded file
+RAW_ANALYSIS_FILE = 'raw_analysis.json'
+DB_PATH = "./chroma_db"
+COLLECTION_NAME = 'video_analysis_data' # Use the general collection name
+def pipeline_fn(video_file, user_query):
+    """
+    The main function connecting the Gradio inputs to the RAG pipeline.
+    Args:
+        video_file: The temporary file object from Gradio (gr.File).
+        user_query: The text question from Gradio (gr.Textbox).
+    Returns:
+        The text response from the RAG query.
+    """
+    if video_file is None:
+        return "Error: Please upload a video file first."
+    if not user_query:
+        return "Error: Please enter a question to query the video analysis."
+    # 1. Handle File Upload and Naming
+    # Gradio passes a temporary file path, we need to copy it
+    # and rename it so the analyzer can find it consistently.
+    try:
+        # We copy the file to the expected working directory
+        temp_video_path = os.path.join(os.getcwd(), VIDEO_FILENAME)
+        shutil.copy(video_file.name, temp_video_path)
+        print(f"Copied uploaded file to: {temp_video_path}")
+    except Exception as e:
+        return f"File handling error: {e}"
+    # 2. Analyze Video
+    print("\n--- STAGE 1: Analyzing Video ---")
+    # frames_per_sec=0.5 is a sensible default for a quick demo
+    analysis_results = analyze_video_for_ppe(
+        video_path=temp_video_path,
+        frames_per_sec= 2
+    )
+    # Save the raw analysis for the indexer to pick up
+    with open(RAW_ANALYSIS_FILE, 'w') as f:
+        json.dump(analysis_results, f, indent=4)
+    # 3. Index Data
+    print("\n--- STAGE 2: Indexing Analysis Data ---")
+    # This must be run to create/update the ChromaDB with the new analysis
+    index_analysis_data(json_file=RAW_ANALYSIS_FILE, collection_name=COLLECTION_NAME)
+    # 4. Execute RAG Query
+    print("\n--- STAGE 3: Executing RAG Query ---")
+    rag_answer = run_query(user_query)
+    # 5. Cleanup (Optional but Recommended for Demo)
+    os.remove(temp_video_path) # Remove the copied video file
+    os.remove(RAW_ANALYSIS_FILE) # Remove the temporary JSON file
+    return rag_answer
+# --- Gradio Interface Definition ---
+# Define the input components
+video_input = gr.File(
+    label="Upload Video File (.mp4, .mov, etc.)",
+    file_types=["video"], # Restrict to video files
+    type="filepath"
+)
+query_input = gr.Textbox(
+    label="Ask a Question about the Video Content",
+    placeholder="e.g., What are people doing in the video?",
+    lines=2
+)
+# Define the output component
+output_textbox = gr.Textbox(
+    label="RAG Analysis Result",
+    lines=10,
+    interactive=False
+)
+# Create the Gradio Interface
+demo = gr.Interface(
+    fn=pipeline_fn,
+    inputs=[video_input, query_input],
+    outputs=output_textbox,
+    title="🚀 Video Content RAG Pipeline",
+    description="Upload a video, and ask a question. The pipeline runs object detection, indexes the data, and uses Gemini to answer your question based on the analysis.",
+)
+if __name__ == "__main__":
+    print("Launching Gradio App...")
+    # This will open the app in your browser at http://127.0.0.1:7860/
+    demo.launch()

main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import argparse
+import os
+import json
+from modules.video_analyzer import analyze_video_for_ppe
+from modules.rag_indexer import index_analysis_data
+from modules.rag_query import run_query
+# --- Configuration ---
+RAW_ANALYSIS_FILE = 'raw_analysis.json'
+MODEL_PATH = 'yolov8n.pt' # Default YOLOv8 model for general objects
+def main():
+    """
+    Executes the full Video Analysis -> Indexing -> Querying RAG pipeline.
+    """
+    parser = argparse.ArgumentParser(
+        description="Run the full PPE Compliance RAG pipeline.",
+        formatter_class=argparse.RawTextHelpFormatter
+    )
+    parser.add_argument(
+        '--video-path',
+        type=str,
+        required=True,
+        help="Path to the video file to analyze (e.g., 'construction.mp4')."
+    )
+    parser.add_argument(
+        '--query',
+        type=str,
+        required=True,
+        help="The natural language query to ask the RAG system (e.g., 'Summarize safety violations')."
+    )
+    parser.add_argument(
+        '--frames_per_sec',
+        type=float,
+        default=0.5,
+        help="Number of frames to sample per second for analysis (Default: 0.5)."
+    )
+    args = parser.parse_args()
+    video_path = args.video_path
+    user_query = args.query
+    frames_per_sec = args.frames_per_sec
+    # 1. Check for prerequisites
+    if not os.path.exists(video_path):
+        print(f"Error: Video file not found at '{video_path}'.")
+        return
+    if not os.path.exists(MODEL_PATH):
+        print(f"Warning: YOLO model '{MODEL_PATH}' not found. You might need to download it or change MODEL_PATH.")
+        print("Proceeding, but analysis will likely fail if the model is missing.")
+        # We allow it to proceed to let the analyzer handle the error
+    print("="*60)
+    print("🚀 Starting PPE Compliance RAG Pipeline")
+    print("="*60)
+    # --- STAGE 1: Video Analysis ---
+    print(f"\n--- STAGE 1: Analyzing Video '{os.path.basename(video_path)}' ---")
+    print(f"Sampling Rate: {frames_per_sec} frames/sec")
+    analysis_results = analyze_video_for_ppe(
+        video_path=video_path,
+        model_path=MODEL_PATH,
+        frames_per_sec=frames_per_sec
+    )
+    if not analysis_results:
+        print("\nAnalysis failed or returned no results. Aborting pipeline.")
+        return
+    # Save raw results
+    with open(RAW_ANALYSIS_FILE, 'w') as f:
+        json.dump(analysis_results, f, indent=4)
+    print(f"Raw analysis saved to '{RAW_ANALYSIS_FILE}'. {len(analysis_results)} records created.")
+    # --- STAGE 2: Data Indexing (RAG Indexer) ---
+    print("\n--- STAGE 2: Indexing Analysis Data into ChromaDB ---")
+    # This function expects the file to be named RAW_ANALYSIS_FILE
+    index_analysis_data(json_file=RAW_ANALYSIS_FILE)
+    # --- STAGE 3: RAG Query ---
+    print("\n--- STAGE 3: Executing RAG Query ---")
+    print(f"User Question: {user_query}")
+    try:
+        # Run the RAG query pipeline
+        rag_answer = run_query(user_query)
+        print("\n" + "="*60)
+        print("✅ RAG Pipeline Complete")
+        print("="*60)
+        print("\n--- RAG ANSWER ---")
+        print(rag_answer)
+    except Exception as e:
+        print(f"\nError during RAG Query execution: {e}")
+        print("Please ensure your environment variables (like GOOGLE_API_KEY) are set and dependencies are installed.")
+if __name__ == '__main__':
+    main()

modules/__pycache__/rag_indexer.cpython-312.pyc ADDED Viewed

Binary file (3.28 kB). View file

modules/__pycache__/rag_query.cpython-312.pyc ADDED Viewed

Binary file (3.44 kB). View file

modules/__pycache__/video_analyzer.cpython-312.pyc ADDED Viewed

Binary file (3.32 kB). View file

modules/rag_indexer.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import chromadb
+import json
+COLLECTION_NAME = 'video_analysis_data'
+# ... (rest of imports/constants)
+def generate_text_summary(record):
+    """
+    Converts a structured detection record into a natural language text description
+    by summarizing all detected objects clearly.
+    """
+    video_id = record['video_id']
+    timestamp = record['timestamp_sec']
+    detections = record['detections']
+    if not detections:
+        return f"Analysis of video '{video_id}' at {timestamp} seconds: No objects were detected in this frame."
+    # Group detections by label for a complete object count summary
+    object_counts = {}
+    for det in detections:
+        label = det['label']
+        object_counts[label] = object_counts.get(label, 0) + 1
+    summary_parts = []
+    if object_counts:
+        # Format: N instances of 'label', M instances of 'other_label', etc.
+        object_descriptions = [
+            f"{count} instances of '{label}'"
+            for label, count in object_counts.items()
+        ]
+        summary_parts.append("Detected objects include: " + ", ".join(object_descriptions) + ".")
+    summary_doc = f"Analysis of video '{video_id}' at {timestamp} seconds: {' '.join(summary_parts)}"
+    return summary_doc
+def index_analysis_data(json_file='raw_analysis.json', collection_name='video_analysis_data'):
+    """
+    Loads raw analysis, generates documents, and indexes them in ChromaDB.
+    """
+    try:
+        with open(json_file, 'r') as f:
+            raw_data = json.load(f)
+    except FileNotFoundError:
+        print(f"Error: {json_file} not found. Run 'video_analyzer.py' first.")
+        return
+    # Initialize ChromaDB client
+    client = chromadb.PersistentClient(path="./chroma_db") # Stores data locally
+    # Changed collection name to be more generic
+    collection = client.get_or_create_collection(name=collection_name)
+    documents = []
+    metadatas = []
+    ids = []
+    print(f"Indexing {len(raw_data)} analysis records...")
+    for i, record in enumerate(raw_data):
+        doc_text = generate_text_summary(record)
+        if doc_text:
+            documents.append(doc_text)
+            # Metadata is crucial for filtering and context
+            metadatas.append({
+                'video_id': record['video_id'],
+                'timestamp_sec': record['timestamp_sec'],
+                'frame_id': record['frame_id']
+            })
+            ids.append(f"doc_{i}")
+    # ChromaDB automatically handles embedding and storage
+    if documents:
+        collection.add(
+            documents=documents,
+            metadatas=metadatas,
+            ids=ids
+        )
+        print(f"Successfully indexed {len(documents)} documents into ChromaDB collection '{collection_name}'.")
+    else:
+        print("No valid documents generated for indexing.")
+if __name__ == '__main__':
+    index_analysis_data()

modules/rag_query.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import chromadb
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+import os
+from dotenv import load_dotenv
+load_dotenv()  # Load environment variables from .env file
+# 1. Initialize RAG Components
+GEMINI_MODEL = "gemini-2.5-flash"
+COLLECTION_NAME = 'video_analysis_data' # Updated collection name
+DB_PATH = "./chroma_db"
+def run_query(user_query):
+    """
+    Executes the RAG pipeline: Retrieve relevant context and generate a general answer.
+    """
+    if not os.path.exists(DB_PATH):
+        print(f"Error: Database path {DB_PATH} not found. Run 'rag_indexer.py' first.")
+        return "Analysis data is not yet indexed. Please index the data first."
+    # ChromaDB setup
+    client = chromadb.PersistentClient(path=DB_PATH)
+    embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    vectorstore = Chroma(
+        client=client,
+        collection_name=COLLECTION_NAME,
+        embedding_function=embedding_function
+    )
+    # 2. Retrieval (R)
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 relevant documents
+    # 3. Generation (G)
+    # The new general-purpose system prompt
+    template = """
+    You are an expert Video Content Analyst. Your task is to describe the scene, objects, and potential context detected in a video based on the provided analysis logs (Context).
+    Use the Context to answer the user's Question.
+    IMPORTANT: Since the analysis only provides object labels and locations (Object Detection) and not specific actions (Activity Recognition), you must infer and describe the *scene* or *context* based on the objects present (e.g., "The presence of people and construction materials suggests activity on a construction site.").
+    Always include the video timestamp(s) where the relevant context was found.
+    Context:
+    {context}
+    Question: {question}
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+    llm = ChatGoogleGenerativeAI(model=GEMINI_MODEL)
+    # The RAG Chain logic
+    rag_chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+    )
+    # 4. Execute the chain
+    print(f"Executing RAG query for: '{user_query}'...")
+    response = rag_chain.invoke(user_query)
+    return response.content
+# Example Usage:
+if __name__ == '__main__':
+    # Ensure you have indexed data by running rag_indexer.py first
+    query1 = "What kind of objects were frequently detected in the video?"
+    answer1 = run_query(query1)
+    print("\n--- QUERY 1 ---")
+    print(f"Question: {query1}")
+    print(f"Answer:\n{answer1}")
+    print("\n" + "="*50 + "\n")
+    query2 = "What activity was detected around the 15-second mark in the video?"
+    answer2 = run_query(query2)
+    print("\n--- QUERY 2 ---")
+    print(f"Question: {query2}")
+    print(f"Answer:\n{answer2}")

modules/video_analyzer.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import cv2
+from ultralytics import YOLO
+import json
+import os
+def analyze_video_for_ppe(video_path, model_path='yolov8n.pt', frames_per_sec=1.0):
+    """
+    Analyzes a video for PPE compliance using a YOLOv8 model.
+    """
+    # 1. Load the YOLOv8 model (You'd replace 'yolov8n.pt' with a fine-tuned PPE model)
+    # The search results indicate YOLOv8 is excellent for this.
+    try:
+        model = YOLO(model_path)
+    except Exception as e:
+        print(f"Error loading model: {e}. Ensure you have a valid YOLOv8 model path.")
+        return []
+    # 2. Open the video file
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"Error: Could not open video file {video_path}")
+        return []
+    # Get video properties
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_interval = int(fps / frames_per_sec) # Calculate interval to sample frames
+    frame_count = 0
+    analysis_results = []
+    #
+    print(f"Video FPS: {fps}, Analyzing every {frame_interval} frames...")
+    while cap.isOpened():
+        # Read the next frame
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Check if the current frame is a sample frame
+        if frame_count % frame_interval == 0:
+            timestamp_sec = frame_count / fps
+            # 3. Run detection on the frame
+            results = model(frame, verbose=False) # Run detection
+            # 4. Process and structure results
+            detections = []
+            for r in results:
+                # r.boxes.data is a tensor with [x1, y1, x2, y2, confidence, class_id]
+                for box in r.boxes.data.tolist():
+                    x1, y1, x2, y2, conf, cls = box
+                    label = model.names[int(cls)]
+                    # Store only the necessary info
+                    detections.append({
+                        'label': label,
+                        'confidence': round(conf, 2),
+                        'bbox': [int(x1), int(y1), int(x2), int(y2)] # Bounding Box
+                    })
+            # Store structured result
+            analysis_results.append({
+                'video_id': os.path.basename(video_path),
+                'frame_id': frame_count,
+                'timestamp_sec': round(timestamp_sec, 2),
+                'detections': detections
+            })
+        frame_count += 1
+    # 5. Release video object
+    cap.release()
+    print(f"Analysis complete. Total frames analyzed: {len(analysis_results)}")
+    return analysis_results
+# Example Usage:
+if __name__ == '__main__':
+    # NOTE: You'll need a sample video file in the same directory (e.g., 'construction.mp4')
+    # and a trained PPE model file. For a quick test, you can use the default 'yolov8n.pt'
+    # which detects general objects (like 'person') until you fine-tune a PPE model.
+    if not os.path.exists('construction.mp4'):
+        print("Please place a video file named 'construction.mp4' in the current directory.")
+    else:
+        results = analyze_video_for_ppe('construction.mp4', frames_per_sec=0.5)
+        # Save raw results (optional, but good for debugging)
+        with open('raw_analysis.json', 'w') as f:
+            json.dump(results, f, indent=4)
+        print(f"Raw analysis saved to raw_analysis.json. {len(results)} records created.")

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "rag"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "chromadb>=1.3.6",
+    "faiss-cpu>=1.13.1",
+    "google-genai>=1.55.0",
+    "gradio>=6.1.0",
+    "langchain>=1.1.3",
+    "langchain-chroma>=1.0.0",
+    "langchain-community>=0.4.1",
+    "langchain-core>=1.1.3",
+    "langchain-google-genai>=4.0.0",
+    "langchain-huggingface>=1.1.0",
+    "numpy>=2.3.5",
+    "opencv-python>=4.11.0.86",
+    "python-dotenv>=1.2.1",
+    "sentence-transformers>=5.2.0",
+    "streamlit>=1.52.1",
+    "ultralytics>=8.3.236",
+]

req.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+opencv-python
+ultralytics
+numpy
+chromadb
+sentence-transformers
+google-genai
+langchain
+langchain-google-genai
+langchain-core
+langchain-community
+langchain-huggingface
+langchain-chroma

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff