import re
from markdownify import markdownify
import requests
import io
import pandas as pd
from PIL import Image
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from huggingface_hub import InferenceClient
from llama_index.core.agent.workflow import ReActAgent
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

client = InferenceClient(
  provider="hf-inference",
)

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
search_tool_spec = DuckDuckGoSearchToolSpec()

# Searching tools

def search_tool(query: str) -> str:
    """Browse the web using DuckDuckGo."""
    print(f"Calling search_tool with query: {query}")
    result = search_tool_spec.duckduckgo_full_search(query=query)
    
    print(f"Search results length: {len(result)}")
    print(f"First search result: {result[0] if result else 'No results found'}")
    # converts the list of results to a string
    result_str = "\n".join([f"{i+1}. {item.body}" for i, item in enumerate(result)])
    return result_str

def fetch_file_bytes(task_id: str) -> str | None:
    """
    Fetch a file from the given task ID.
    """
    print(f"Calling fetch_file_bytes with task_id: {task_id}")
    try:
        response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
        response.raise_for_status()
        print(f"File {task_id} fetched successfully.")
        return response.content
    except requests.exceptions.RequestException as e:
        print(f"Error fetching file {task_id}: {e}")
        return None

def bytes_to_image(image_bytes: bytes) -> Image:
    """Convert bytes to image URL."""
    print("Calling bytes_to_image")
    file = Image.open(io.BytesIO(image_bytes))
    file.save("temp_image.png")
    return file

def document_bytes_to_text(doc_bytes: bytes) -> str:
    """Convert document bytes to text."""
    print("Calling document_bytes_to_text")
    return doc_bytes.decode("utf-8")

def xlsx_to_text(file_bytes: bytes) -> str:
    """Convert XLSX file bytes to text using pandas."""
    print("Calling xlsx_to_text")
    io_bytes = io.BytesIO(file_bytes)
    df = pd.read_excel(io_bytes, engine='openpyxl')
    return df.to_string(index=False)

def extract_text_from_image(image_url: bytes) -> str:
    """Extract text from an image using Tesseract."""
    print("Calling extract_text_from_image")
    return client.image_to_text(image_url=image_url, task="image-to-text", model="Salesforce/blip-image-captioning-base").generated_text

def extract_text_from_csv(file_bytes: bytes) -> str:
    """Extract text from a CSV file."""
    print("Calling extract_text_from_csv")
    io_bytes = io.BytesIO(file_bytes)
    df = pd.read_csv(io_bytes)
    return df.to_string(index=False)

def extract_text_from_code_file(bytes: bytes) -> str:
    """Extract text from a code file."""
    print("Calling extract_text_from_code_file")
    return bytes.decode("utf-8")

def extract_text_from_audio_file(file_bytes: bytes) -> str:
    """Extract text from an audio file."""
    print("Calling extract_text_from_audio_file")
    return client.automatic_speech_recognition(file_bytes, model="openai/whisper-large-v2").text

def webpage_to_markdown(url: str) -> str:
    """
    Access a web page and return its content as markdown.
    Limits output to 10,000 characters to avoid excessive responses.
    """
    print(f"Calling webpage_to_markdown with url: {url}")
    try:
        response = requests.get(url, timeout=20)
        response.raise_for_status()
        markdown_content = markdownify(response.text).strip()
        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
        
        print(f"Markdown initial content: {markdown_content[:500]}...")
        
        return markdown_content
    except requests.exceptions.Timeout:
        return "Request timed out. Please try again later or check the URL."
    except requests.exceptions.RequestException as e:
        return f"Error fetching the webpage: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"


llm = HuggingFaceInferenceAPI(
  model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
  max_tokens=5000,
  max_new_tokens=5000,
)
# Initialize tools
# --- ReActAgent and AgentWorkflow tool declaration ---

search_agent = ReActAgent(
    name="search_agent",
    description="Searches the web using DuckDuckGo.",
    system_prompt="A helpful assistant that can search the web using DuckDuckGo.",
    tools=[search_tool],
    llm=llm,
)

fetch_file_agent = ReActAgent(
    name="fetch_file_agent",
    description="Fetches a file from a given task ID.",
    system_prompt="A helpful assistant that can fetch files by task ID.",
    tools=[fetch_file_bytes],
    llm=llm,
)

bytes_to_image_agent = ReActAgent(
    name="bytes_to_image_agent",
    description="Converts bytes to an image.",
    system_prompt="A helpful assistant that can convert bytes to an image.",
    tools=[bytes_to_image],
    llm=llm,
)

document_bytes_to_text_agent = ReActAgent(
    name="document_bytes_to_text_agent",
    description="Converts document bytes to text.",
    system_prompt="A helpful assistant that can convert document bytes to text.",
    tools=[document_bytes_to_text],
    llm=llm,
)

xlsx_to_text_agent = ReActAgent(
    name="xlsx_to_text_agent",
    description="Converts XLSX file bytes to text.",
    system_prompt="A helpful assistant that can convert XLSX file bytes to text.",
    tools=[xlsx_to_text],
    llm=llm,
)

extract_text_from_image_agent = ReActAgent(
    name="extract_text_from_image_agent",
    description="Extracts text from an image using Tesseract.",
    system_prompt="A helpful assistant that can extract text from images.",
    tools=[extract_text_from_image],
    llm=llm,
)

extract_text_from_csv_agent = ReActAgent(
    name="extract_text_from_csv_agent",
    description="Extracts text from a CSV file.",
    system_prompt="A helpful assistant that can extract text from CSV files.",
    tools=[extract_text_from_csv],
    llm=llm,
)

extract_text_from_code_file_agent = ReActAgent(
    name="extract_text_from_code_file_agent",
    description="Extracts text from a code file.",
    system_prompt="A helpful assistant that can extract text from code files.",
    tools=[extract_text_from_code_file],
    llm=llm,
)

extract_text_from_audio_file_agent = ReActAgent(
    name="extract_text_from_audio_file_agent",
    description="Extracts text from an audio file.",
    system_prompt="A helpful assistant that can extract text from audio files.",
    tools=[extract_text_from_audio_file],
    llm=llm,
)

webpage_to_markdown_agent = ReActAgent(
    name="webpage_to_markdown_agent",
    description="Accesses a web page by URL and returns the content as markdown.",
    system_prompt="A helpful assistant that can access web pages and return markdown.",
    tools=[webpage_to_markdown],
    llm=llm,
)