from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pypdf import PdfReader
import requests
import json
import os
import time


def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text.strip()

def chunk_text(text, chunk_size=500, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,  # Overlap to preserve context
        separators=["\n\n", "\n", " ", ""],  # Prioritize logical breaks
    )
    return splitter.split_text(text)

embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def embedding_function(texts):
    return embedding_model.encode(texts, convert_to_numpy=True).tolist()


def generate_hypothetical_answer(query):
    import requests
    import json
    import os
    import time
    
    # Hugging Face API endpoint with vLLM
    api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
    

    # Get API token from environment variable
    api_token = os.getenv("HUGGINGFACE_API_TOKEN")
    if not api_token:
        print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
        return "Error: HUGGINGFACE_API_TOKEN environment variable not set"
    
    # Headers for the API request
    headers = {
        "Authorization": f"Bearer {api_token}",
        "Content-Type": "application/json"
    }
    
    # Create a prompt for generating a hypothetical answer
    prompt = f"""
    Given the following query, generate a hypothetical answer that might be found in a document:
    Query: {query}
    
    Hypothetical answer:
    """
    
    # Prepare the request payload for vLLM
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 256,
            "temperature": 0.7,
            "top_p": 0.95,
            "do_sample": True,
            "use_vllm": True  # Enable vLLM for faster inference
        }
    }
    
    try:
        # Make the API request to Hugging Face
        print("Sending request to Hugging Face API with vLLM for hypothetical answer...")
        print(f"API URL: {api_url}")
        print(f"Headers: {headers}")
        print(f"Payload: {json.dumps(payload, indent=2)}")
        
        start_time = time.time()
        
        # Set a longer timeout (5 minutes)
        response = requests.post(api_url, headers=headers, json=payload, timeout=300)
        
        end_time = time.time()
        print(f"Received hypothetical answer from Hugging Face API in {end_time - start_time:.2f} seconds")
        print(f"Response status code: {response.status_code}")
        print(f"Response headers: {response.headers}")
        
        # Try to print the response content for debugging
        try:
            print(f"Response content: {response.text[:1000]}...")  # Print first 1000 chars
        except:
            print("Could not print response content")
        
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Parse the response
        result = response.json()
        print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...")  # Print first 1000 chars
        
        # Extract the generated text
        if isinstance(result, list) and len(result) > 0:
            generated_text = result[0].get("generated_text", "")
        else:
            generated_text = result.get("generated_text", "")
            
        return generated_text.strip()
    
    except requests.exceptions.Timeout:
        print("Request to Hugging Face API timed out after 5 minutes")
        return "The request timed out. The model is taking too long to respond. Please try again with a simpler query."
    
    except requests.exceptions.ConnectionError:
        print("Could not connect to Hugging Face API")
        return "Could not connect to the Hugging Face API. Please check your internet connection."
    
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        print(f"Response status code: {e.response.status_code}")
        print(f"Response headers: {e.response.headers}")
        try:
            print(f"Response content: {e.response.text}")
        except:
            print("Could not print response content")
            
        if e.response.status_code == 401:
            return "Authentication error. Please check your Hugging Face API token."
        elif e.response.status_code == 429:
            return "Rate limit exceeded. Please try again later."
        return f"HTTP error occurred: {e}"
    
    except Exception as e:
        print(f"Error generating hypothetical answer: {e}")
        import traceback
        print(f"Traceback: {traceback.format_exc()}")
        return "Failed to generate a hypothetical answer."


def query_llm_with_context(query, context, top_n=3):
    import requests
    import json
    import os
    import time
    
    # Unpack the context tuple
    documents, similarity_scores = context
    
    # Use only the top N documents
    top_docs = documents[:top_n]
    
    # Create a context string by joining the top documents
    context_text = "\n\n===Document Boundary===\n\n".join(top_docs)
    
    # Create a prompt with the context and query
    prompt = f"""
    Context information is below.
    ---------------------
    {context_text}
    ---------------------
    
    Given the context information and not prior knowledge, answer the following query:
    Query: {query}
    """
    
    # Hugging Face API endpoint with vLLM
    api_url = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-2-7b-chat-hf/v1/chat/completions"
    
    # Get API token from environment variable
    api_token = os.getenv("HUGGINGFACE_API_TOKEN")
    if not api_token:
        print("Error: HUGGINGFACE_API_TOKEN environment variable not set")
        return "Error: HUGGINGFACE_API_TOKEN environment variable not set"
    
    # Headers for the API request
    headers = {
        "Authorization": f"Bearer {api_token}",
        "Content-Type": "application/json"
    }
    
    # Prepare the request payload for vLLM
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 512,
            "temperature": 0.7,
            "top_p": 0.95,
            "do_sample": True,
            "use_vllm": True  # Enable vLLM for faster inference
        }
    }
    
    try:
        # Make the API request to Hugging Face
        print("Sending request to Hugging Face API with vLLM...")
        print(f"API URL: {api_url}")
        print(f"Headers: {headers}")
        print(f"Payload: {json.dumps(payload, indent=2)}")
        
        start_time = time.time()
        
        # Set a longer timeout (5 minutes)
        response = requests.post(api_url, headers=headers, json=payload, timeout=300)
        
        end_time = time.time()
        print(f"Received response from Hugging Face API in {end_time - start_time:.2f} seconds")
        print(f"Response status code: {response.status_code}")
        print(f"Response headers: {response.headers}")
        
        # Try to print the response content for debugging
        try:
            print(f"Response content: {response.text[:1000]}...")  # Print first 1000 chars
        except:
            print("Could not print response content")
        
        response.raise_for_status()  # Raise an exception for HTTP errors
        
        # Parse the response
        result = response.json()
        print(f"Parsed response: {json.dumps(result, indent=2)[:1000]}...")  # Print first 1000 chars
        
        # Extract the generated text
        if isinstance(result, list) and len(result) > 0:
            generated_text = result[0].get("generated_text", "")
        else:
            generated_text = result.get("generated_text", "")
            
        return generated_text.strip()
    
    except requests.exceptions.Timeout:
        print("Request to Hugging Face API timed out after 5 minutes")
        return "The request timed out. The model is taking too long to respond. Please try again with a simpler query or fewer context documents."
    
    except requests.exceptions.ConnectionError:
        print("Could not connect to Hugging Face API")
        return "Could not connect to the Hugging Face API. Please check your internet connection."
    
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        print(f"Response status code: {e.response.status_code}")
        print(f"Response headers: {e.response.headers}")
        try:
            print(f"Response content: {e.response.text}")
        except:
            print("Could not print response content")
            
        if e.response.status_code == 401:
            return "Authentication error. Please check your Hugging Face API token."
        elif e.response.status_code == 429:
            return "Rate limit exceeded. Please try again later."
        return f"HTTP error occurred: {e}"
    
    except Exception as e:
        print(f"Error querying LLM with context: {e}")
        import traceback
        print(f"Traceback: {traceback.format_exc()}")
        return "Failed to generate an answer with the provided context."