Learning_Agent

Sleeping

File size: 5,498 Bytes

# utils.py
import re
import json
import streamlit as st

def update_progress(container, percentage, message=""):
    """Update the progress bar"""
    progress_html = f"""
        <div class="progress-container">
            <div class="progress-bar" style="width: {percentage}%">{message}</div>
        </div>
    """
    container.markdown(progress_html, unsafe_allow_html=True)

def extract_section(text, section_name):
    """Extract a section from the text"""
    try:
        # Try multiple patterns to find the section
        patterns = [
            f"{section_name}.*?\n(.*?)(?=\n\n|$)",
            f"{section_name}[:\s](.*?)(?=\n\n|$)",
            f"{section_name}:\s*(.*?)(?=\n|$)",
            f"{section_name}\s*(.*?)(?=\n|$)"
        ]
        
        for pattern in patterns:
            match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
            if match:
                return match.group(1).strip()
        
        return f"No {section_name.lower()} information found"
    except Exception as e:
        print(f"Error extracting {section_name}: {str(e)}")
        return f"Error extracting {section_name.lower()}"

def extract_sources(text):
    """Extract sources from the text"""
    try:
        sources = []
        patterns = [
            r"Source:.*?(?:\n|$)",
            r"\[.*?\]",
            r"\(https?://.*?\)",
            r"Reference:.*?(?:\n|$)",
            r"Retrieved from:.*?(?:\n|$)"
        ]
        
        for pattern in patterns:
            matches = re.finditer(pattern, text, re.MULTILINE)
            sources.extend([match.group().strip() for match in matches])
        
        return sources if sources else ["Sources not explicitly mentioned"]
    except Exception as e:
        print(f"Error extracting sources: {str(e)}")
        return ["Error extracting sources"]

def format_json_output(raw_output):
    """Format CrewOutput into proper JSON structure"""
    try:
        # Get raw text from output
        if hasattr(raw_output, 'raw_output'):
            raw_text = str(raw_output.raw_output)
        else:
            raw_text = str(raw_output)
        
        print("Raw text received:", raw_text[:500])  # Debug print
        
        # Try to find and parse JSON structure
        try:
            json_pattern = r"\{[\s\S]*\}"
            match = re.search(json_pattern, raw_text)
            if match:
                json_str = match.group()
                parsed_json = json.loads(json_str)
                print("Successfully parsed JSON:", json_str[:500])  # Debug print
                
                # Ensure all required fields exist
                if isinstance(parsed_json, dict):
                    return {
                        "exec_summary": {
                            "summary": parsed_json.get('exec_summary', {}).get('summary', 
                                      extract_section(raw_text, "Executive Summary")),
                            "market_size": parsed_json.get('exec_summary', {}).get('market_size',
                                         extract_section(raw_text, "Market Size")),
                            "growth_rate": parsed_json.get('exec_summary', {}).get('growth_rate',
                                         extract_section(raw_text, "Growth Rate")),
                            "key_players": parsed_json.get('exec_summary', {}).get('key_players',
                                         extract_section(raw_text, "Key Players"))
                        },
                        "detailed_report": parsed_json.get('detailed_report', raw_text),
                        "sources": parsed_json.get('sources', extract_sources(raw_text)),
                        "metrics": parsed_json.get('metrics', {
                            "market_size_data": [],
                            "growth_rates": [],
                            "market_shares": {}
                        })
                    }
        except json.JSONDecodeError as e:
            print(f"JSON parsing error: {str(e)}")  # Debug print
        
        # If JSON parsing fails, create structured format from raw text
        print("Creating structured format from raw text")  # Debug print
        return {
            "exec_summary": {
                "summary": extract_section(raw_text, "Executive Summary"),
                "market_size": extract_section(raw_text, "Market Size"),
                "growth_rate": extract_section(raw_text, "Growth Rate"),
                "key_players": extract_section(raw_text, "Key Players")
            },
            "detailed_report": raw_text,
            "sources": extract_sources(raw_text),
            "metrics": {
                "market_size_data": [],
                "growth_rates": [],
                "market_shares": {}
            }
        }
    except Exception as e:
        print(f"Error in format_json_output: {str(e)}")  # Debug print
        # Return a safe default structure
        return {
            "exec_summary": {
                "summary": "Error processing report",
                "market_size": "Data not available",
                "growth_rate": "Data not available",
                "key_players": "Data not available"
            },
            "detailed_report": raw_text if 'raw_text' in locals() else str(raw_output),
            "sources": [],
            "metrics": {
                "market_size_data": [],
                "growth_rates": [],
                "market_shares": {}
            }
        }