import os
import base64
import json
import requests
from time import sleep
from tqdm import tqdm

# -------------------------------------------------------------------
# Helper: encode image as base64
# -------------------------------------------------------------------
def encode_image(image_path: str) -> str:
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


# -------------------------------------------------------------------
# Main function: use InternVL to analyze obstacles
# -------------------------------------------------------------------
def analyze_obstacles_in_folder_internvl(
    image_dir: str,
    output_path: str,
    api_key: str = None,
    model: str = "internvl3.5-241b-a28b",
    temperature: float = 1.0,
    sleep_time: float = 1.0,
):
    """
    For each .png in image_dir, send the image to InternVL with the obstacle prompt
    and write results to output_path.
    """

    if api_key is None:
        # Try to read from environment variable as a fallback
        api_key = os.getenv("INTERNVL_API_KEY")

    if not api_key:
        raise ValueError(
            "No InternVL API key provided. "
            "Pass --api_key on the command line or set the INTERNVL_API_KEY env var."
        )

    url = "https://chat.intern-ai.org.cn/api/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}",
    }

    # Collect all PNG images
    image_paths = [
        os.path.join(image_dir, f)
        for f in os.listdir(image_dir)
        if f.lower().endswith(".png")
    ]
    image_paths.sort()

    # Keep behavior consistent with your original gpt.py
    # (skips the first image)
    # image_paths = image_paths[16:]

    if not image_paths:
        print(f"No .png images found in {image_dir}")
        return

    # The list of 8 questions – same as in gpt.py
    questions_prompt = (
        "1. Identify the obstacle on the sidewalk or walkable path ahead."
    )

    #  "8. Identify the single object most likely to be hit by a pedestrian moving straight ahead, and assign it a formal Out-of-Place Score (0–100) based only on its position.\n"
    #     "   Scoring scale:\n"
    #     "   0 = perfectly expected position (default/home location)\n"
    #     "   50 = somewhat out of place from where it is typically expected\n"
    #     "   100 = completely out of place and highly surprising\n"

    # "System" message text – we’ll prepend this to the text content
    system_message_text = (
        "I am fully blind. You are a mobility assistant who analyzes the scene and describes obstacles for safe navigation. Be concise and accurate. Start your answer with 1. "
    )

    with open(output_path, "a", encoding="utf-8") as out_f:
        for img_path in tqdm(image_paths, desc="Processing images with InternVL"):
            # cont_flag= True
            # if "GarbageBag_Set1_Pos1_OOPS0" in img_path or "Safety_Cone_Pos2_OOPS0.5" in img_path or "chair_0" in img_path:
            #     cont_flag= False
            # if cont_flag:
            #     continue
            try:
                img_b64 = encode_image(img_path)

                # Following the style of objects_name_intern.py:
                # first a short "session" exchange, then the actual multimodal user message
                data = {
                    "model": model,
                    "messages": [
                        {"role": "user", "content": "Start session."},
                        {"role": "assistant", "content": "Session started."},
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "text",
                                    "text": system_message_text
                                    + "\n\n"
                                    + questions_prompt,
                                },
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": f"data:image/png;base64,{img_b64}"
                                    },
                                },
                            ],
                        },
                    ],
                    "temperature": temperature,
                    "top_p": 0.9,
                    "max_tokens": 2048,
                }

                response = requests.post(
                    url, headers=headers, data=json.dumps(data)
                )
                response.raise_for_status()
                content = response.json()["choices"][0]["message"]["content"]

                out_f.write(f"IMAGE: {img_path}\n")
                out_f.write(content.strip() + "\n")
                out_f.write("\n" + "-" * 80 + "\n\n")
                out_f.flush()

                sleep(sleep_time)

            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                out_f.write(f"IMAGE: {img_path}\n")
                out_f.write(f"ERROR: {e}\n")
                out_f.write("\n" + "-" * 80 + "\n\n")
                out_f.flush()

    print(f"Done. Results saved to {output_path}")


# -------------------------------------------------------------------
# CLI
# -------------------------------------------------------------------
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(
        description="Process PNG images with InternVL for obstacle analysis."
    )
    parser.add_argument("--image_dir", required=True, help="Folder of .png images")
    parser.add_argument("--output", required=True, help="Output text file")
    parser.add_argument(
        "--api_key",
        default="sk-6yfk0jIHCoZk4mppCqI5O9wOBASwB0ZlbzeZ3F0FNFs5oN4S",
        help="InternVL API key (or set INTERNVL_API_KEY env var).",
    )
    parser.add_argument(
        "--model",
        default="internvl3.5-241b-a28b",
        help="Model name for InternVL (default: internvl-latest)",
    )
    parser.add_argument(
        "--temperature", type=float, default=0.2, help="Sampling temperature"
    )
    parser.add_argument(
        "--sleep",
        type=float,
        default=1.0,
        help="Sleep time between requests (seconds)",
    )

    args = parser.parse_args()

    analyze_obstacles_in_folder_internvl(
        image_dir=args.image_dir,
        output_path=args.output,
        api_key=args.api_key,
        model=args.model,
        temperature=args.temperature,
        sleep_time=args.sleep,
    )