Spaces:

ShadowGard3n
/

Spectra-Backend

Sleeping

File size: 16,441 Bytes

import os
import json
from fastapi import FastAPI, File, Form, UploadFile
from fastapi.responses import PlainTextResponse
from fastapi.middleware.cors import CORSMiddleware
import time 

# Import your custom PyPI library
from graphvision import GraphExtractor

app = FastAPI(title="STEM Sight Backend")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], # Allows any browser extension to connect
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize your custom PyPI library
print("Initializing STEM Sight Vision Engine...")
vision_engine = GraphExtractor()

@app.get("/")
async def root():
    return {"message": "STEM Sight API is online and ready."}

def generate_audio_summary(extraction_result: dict) -> str:
    """
    Hardcoded logic to generate a conversational summary from graph data 
    without relying on an external LLM.
    """
    chart_type = extraction_result.get("chart_type", "unknown").lower()
    title = extraction_result.get("title")
    title_text = f"titled {title}" if title else "without a specific title"
    
    # --- 1. PIE CHART LOGIC ---
    if chart_type == "pie":
        data = extraction_result.get("data", {})
        if not data:
            return f"This is a pie chart {title_text}, but no data could be extracted."
        
        max_cat = max(data, key=data.get)
        min_cat = min(data, key=data.get)
        
        summary = (
            f"This is a pie chart {title_text}. "
            f"The largest portion is {max_cat} at {data[max_cat]}. "
            f"The smallest portion is {min_cat} at {data[min_cat]}."
        )
        return summary
        
    # --- 2. BAR CHART LOGIC (HBAR & VBAR) ---
    elif chart_type in ["hbar_categorical", "vbar_categorical", "hbar", "vbar"]:
        data = extraction_result.get("data", [])
        x_label = extraction_result.get("x_axis_label", "the X axis")
        y_label = extraction_result.get("y_axis_label", "the Y axis")
        
        if not data:
            return f"This is a bar chart {title_text}, but no data could be extracted."
            
        max_item = max(data, key=lambda d: d.get("value", 0))
        min_item = min(data, key=lambda d: d.get("value", 0))
        
        summary = (
            f"This is a bar chart {title_text}, showing {y_label} against {x_label}. "
            f"The highest value is {max_item.get('category')} at {max_item.get('value')}. "
            f"The lowest value is {min_item.get('category')} at {min_item.get('value')}. "
        )
        
        # Filter out the max and min items so we don't repeat them
        other_items = [item for item in data if item != max_item and item != min_item]
        
        if other_items:
            # Join the remaining items with a comma so the text-to-speech engine adds a slight pause
            other_points_text = ", ".join([f"{item.get('category')} at {item.get('value')}" for item in other_items])
            summary += f"The other values are: {other_points_text}."
            
        return summary
        
    # --- 3. DOT / LINE CHART LOGIC ---
    elif chart_type == "dot_line":
        data = extraction_result.get("data", [])
        x_label = extraction_result.get("x_axis_label", "the X axis")
        y_label = extraction_result.get("y_axis_label", "the Y axis")
        total_points = extraction_result.get("total_points", len(data))
        
        if not data:
            return f"This is a line chart {title_text}, but no data could be extracted."
        
        # Group data points by their category (class)
        categories = {}
        for item in data:
            cat_name = item.get("class", "unknown")
            if cat_name not in categories:
                categories[cat_name] = []
            categories[cat_name].append(item)
            
        classes = list(categories.keys())
        
        # Format the classes cleanly for the introductory sentence
        classes_text = ", ".join(classes[:3])
        if len(classes) > 3:
            classes_text += f", and {len(classes) - 3} other categories"
            
        # Introductory overview
        summary = (
            f"This is a scatter plot {title_text}, with {x_label} on the X axis and {y_label} on the Y axis. "
            f"It shows {total_points} data points across categories like {classes_text}. "
        )
        
        # Calculate and append max/min for each category
        category_summaries = []
        for cat_name, points in categories.items():
            max_item = max(points, key=lambda d: d.get("y", 0))
            min_item = min(points, key=lambda d: d.get("y", 0))
            
            # Using a predictable sentence structure that includes the X coordinate
            category_summaries.append(
                f"For {cat_name}, the highest value is {max_item.get('y')} when X is {max_item.get('x')}, "
                f"and the lowest value is {min_item.get('y')} when X is {min_item.get('x')}."
            )
            
        # Join the category breakdowns with spaces so they read as separate sentences
        summary += " ".join(category_summaries)
        
        return summary
    
    elif chart_type == "line":
        # The Donut model already generated the perfect text summary!
        summary = extraction_result.get("summary", "")
        if not summary:
            return f"This is a line chart {title_text}, but the Vision Engine could not generate a summary."
        return summary
        
    # --- FALLBACK ---
    else:
        return f"Data has been extracted for a {chart_type} chart, but the summary feature for this specific format is not available."


@app.post("/analyze-graph", response_class=PlainTextResponse)
async def analyze_graph(file: UploadFile = File(...)):
    try:
        start_time = time.time()
        
        # 1. Save the uploaded image temporarily
        temp_image_path = f"temp_{file.filename}"
        with open(temp_image_path, "wb") as buffer:
            buffer.write(await file.read())
            
        print(f"⏱️ Image received and saved in: {time.time() - start_time:.2f} seconds")

        # 2. Extract structured data
        extract_start = time.time()
        print(f"Extracting data from {file.filename}...")
        extraction_json_string = vision_engine.extract(temp_image_path)
        print(f"⏱️ AI Extraction finished in: {time.time() - extract_start:.2f} seconds")
        
        if os.path.exists(temp_image_path):
            os.remove(temp_image_path)

        extraction_result = json.loads(extraction_json_string)
        print(f"Extracted data: {extraction_result}")

        if "error" in extraction_result:
            return f"I am sorry, I could not clearly identify the data in this graph. Reason: {extraction_result['error']}"

        # 3. Generate summary using hardcoded logic instead of Groq
        audio_script = generate_audio_summary(extraction_result)
        
        print(f"✅ TOTAL TIME: {time.time() - start_time:.2f} seconds")

        return audio_script

    except Exception as e:
        return f"An error occurred while analyzing the graph: {str(e)}"
    

@app.post("/ask", response_class=PlainTextResponse)
async def ask_chart_rule_based(
    file: UploadFile = File(...), 
    question: str = Form(...) 
):
    # 1. Extract JSON using GraphVision
    temp_image_path = f"temp_qa_{file.filename}"
    with open(temp_image_path, "wb") as buffer:
        buffer.write(await file.read())
        
    extraction_json_string = vision_engine.extract(temp_image_path)
    os.remove(temp_image_path)
    
    extraction_result = json.loads(extraction_json_string)

    chart_type = extraction_result.get("chart_type", "unknown").lower()
    if chart_type == "line":
        summary = extraction_result.get("summary")
        if summary:
            return summary
        else:
            return "I couldn't extract a summary from this line chart."
        
        
    data = extraction_result.get("data")
    
    if not data:
        return "I couldn't extract data from this chart. Please ensure the image is clear."

    # 2. Pre-process Data and Question
    question_lower = question.lower()
    
    # Dynamically find all categories available in this specific chart
    available_categories = []
    if isinstance(data, dict): # It's a Pie Chart
        available_categories = list(data.keys())
    elif isinstance(data, list): # It's a Bar, Line, or Scatter plot
        for item in data:
            cat = item.get("category", item.get("class"))
            if cat and cat not in available_categories:
                available_categories.append(cat)

    # Check if the user is asking about a SPECIFIC category
    target_category = None
    for cat in available_categories:
        if cat.lower() in question_lower:
            target_category = cat
            break # Found the category they are asking about

    # If they asked for a specific category in a list-based chart, filter the data!
    filtered_data = data
    if target_category and isinstance(data, list):
        filtered_data = [item for item in data if item.get("category", item.get("class")) == target_category]

    # 3. Rule-Based Intent Routing
    
    # Intent 1: Asking for the highest/maximum
    if any(word in question_lower for word in ["highest", "maximum", "most", "largest", "top"]):
        
        if isinstance(filtered_data, dict): # Pie Charts
            max_cat = max(filtered_data, key=filtered_data.get)
            val = filtered_data[max_cat]
            return f"Based on the extracted data, the highest is {max_cat} with a value of {val}."
        
        elif isinstance(filtered_data, list): # Bar/Line/Scatter
            max_item = max(filtered_data, key=lambda d: d.get("value", d.get("y", 0)))
            cat = max_item.get("category", max_item.get("class", "unknown"))
            val = max_item.get("value", max_item.get("y"))
            
            # If they asked for a specific category in a scatter plot, include the X coordinate
            if target_category:
                x_val = max_item.get("x")
                if x_val is not None:
                    return f"For the {target_category} category, the highest value is {val} when X is {x_val}."
                return f"For the {target_category} category, the highest value is {val}."
            else:
                return f"Based on the extracted data, the overall highest is {cat} with a value of {val}."

    # Intent 2: Asking for the lowest/minimum
    elif any(word in question_lower for word in ["lowest", "minimum", "least", "smallest", "bottom"]):
        
        if isinstance(filtered_data, dict): # Pie Charts
            min_cat = min(filtered_data, key=filtered_data.get)
            val = filtered_data[min_cat]
            return f"Based on the extracted data, the lowest is {min_cat} with a value of {val}."
        
        elif isinstance(filtered_data, list): # Bar/Line/Scatter
            min_item = min(filtered_data, key=lambda d: d.get("value", d.get("y", 0)))
            cat = min_item.get("category", min_item.get("class", "unknown"))
            val = min_item.get("value", min_item.get("y"))
            
            if target_category:
                x_val = min_item.get("x")
                if x_val is not None:
                    return f"For the {target_category} category, the lowest value is {val} when X is {x_val}."
                return f"For the {target_category} category, the lowest value is {val}."
            else:
                return f"Based on the extracted data, the overall lowest is {cat} with a value of {val}."

    # Intent 3: Asking for a specific category's value (General Lookup)
    elif target_category:
        if isinstance(data, dict): # Pie Charts
            val = data[target_category]
            return f"Based on the extracted data, the value for {target_category} is {val}."
        
        elif isinstance(filtered_data, list): # Bar charts
            if len(filtered_data) == 1:
                val = filtered_data[0].get("value", filtered_data[0].get("y"))
                return f"Based on the extracted data, the value for {target_category} is {val}."
            else:
                # If there are multiple values (like a line chart), tell them to be more specific
                return f"The category {target_category} has {len(filtered_data)} different data points. Please ask for the highest or lowest value for this category."

    # Intent 4: Fallback
    return "I am sorry, I do not understand the question. Please ask for the highest value, the lowest value, or ask about a specific category."



# import os
# import json
# from fastapi import FastAPI, File, UploadFile
# from fastapi.responses import PlainTextResponse
# from fastapi.middleware.cors import CORSMiddleware
# from groq import Groq
# import time 

# # Import your newly updated PyPI library!
# from graphvision import GraphExtractor

# app = FastAPI(title="STEM Sight Backend")

# app.add_middleware(
#     CORSMiddleware,
#     allow_origins=["*"], # Allows any browser extension to connect
#     allow_credentials=True,
#     allow_methods=["*"],
#     allow_headers=["*"],
# )

# # Initialize the Groq Client (Looks for the GROQ_API_KEY environment variable)
# groq_client = Groq()

# # Initialize your custom PyPI library
# print("Initializing STEM Sight Vision Engine...")
# vision_engine = GraphExtractor()

# @app.get("/")
# async def root():
#     return {"message": "STEM Sight API is online and ready."}



# @app.post("/analyze-graph", response_class=PlainTextResponse)
# async def analyze_graph(file: UploadFile = File(...)):
#     try:
#         start_time = time.time()
        
#         # 1. Save the uploaded image temporarily
#         temp_image_path = f"temp_{file.filename}"
#         with open(temp_image_path, "wb") as buffer:
#             buffer.write(await file.read())
            
#         print(f"⏱️ Image received and saved in: {time.time() - start_time:.2f} seconds")

#         # 2. Extract structured data
#         extract_start = time.time()
#         print(f"Extracting data from {file.filename}...")
#         extraction_json_string = vision_engine.extract(temp_image_path)
#         print(f"⏱️ AI Extraction finished in: {time.time() - extract_start:.2f} seconds")
        
#         if os.path.exists(temp_image_path):
#             os.remove(temp_image_path)

#         extraction_result = json.loads(extraction_json_string)
#         print(f"Extracted data: {extraction_result}")

#         if "error" in extraction_result:
#             return f"I'm sorry, I couldn't clearly identify the data in this graph. Reason: {extraction_result['error']}"

#         graph_type = extraction_result.get("chart_type", "unknown")
#         graph_data = extraction_result.get("data", [])
#         x_label = extraction_result.get("x_axis_label", "Unknown X-Axis")
#         y_label = extraction_result.get("y_axis_label", "Unknown Y-Axis")
#         title = extraction_result.get("title", "Untitled Graph")
        
#         prompt = f"""
#         You are an accessibility assistant for visually impaired students. 
#         I am giving you extracted data from a {graph_type} chart. 
#         Title: {title}
#         X-Axis Label: {x_label}
#         Y-Axis Label: {y_label}
        
#         Please summarize this data in one short, conversational, and easy-to-understand paragraph.
#         Point out the largest and smallest values if relevant.
#         Do not use markdown, bold text, or asterisks. Write it exactly as it should be spoken out loud by a text-to-speech engine.
        
#         Data:
#         {graph_data}
#         """

#         # 3. Send to Groq
#         groq_start = time.time()
#         print("Generating audio script with Groq Llama 3...")
#         chat_completion = groq_client.chat.completions.create(
#             messages=[{"role": "user", "content": prompt}],
#             model="llama-3.1-8b-instant",
#             temperature=0.4,
#         )
#         print(f"⏱️ Groq Llama 3 finished in: {time.time() - groq_start:.2f} seconds")
#         print(f"✅ TOTAL TIME: {time.time() - start_time:.2f} seconds")

#         return chat_completion.choices[0].message.content.strip()

#     except Exception as e:
#         return f"An error occurred while analyzing the graph: {str(e)}"