import openai
import openai
import gradio as gr
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
from datasets import load_dataset
import faiss
import numpy as np
import os

# Set your OpenAI API key
openai.api_key = os.getenv("testkey") 

# Initialize global variables for models and datasets
dataset = None
index = None
sbert_model = None
bm25 = None

# Step 1: Load dataset and create FAISS index and BM25 index
def load_data_and_create_index():
    global dataset, index, sbert_model, bm25
    # Load a smaller subset of the dataset to speed up testing (e.g., 10% of the data)
    dataset = load_dataset('ms_marco', 'v2.1', split='train[:1000]')  # Example: smaller set for testing

    # Load SBERT model for high-quality sentence embeddings
    sbert_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')  # Lightweight model for efficient encoding
    
    # Extract the first passage from the dataset (adjusting for its structure)
    data_contexts = [item['passages']['passage_text'][0] for item in dataset]  # Adjust for nested structure
    
    # Build BM25 index
    tokenized_corpus = [doc.split(" ") for doc in data_contexts]
    bm25 = BM25Okapi(tokenized_corpus)  # Initialize BM25 index
    
    # Encode contexts with SBERT to create embeddings
    data_vectors = sbert_model.encode(data_contexts, convert_to_tensor=True)
    
    # Initialize FAISS index for vector search
    dimension = data_vectors.shape[1]  # Dimensionality of embeddings
    index = faiss.IndexFlatL2(dimension)  # Use L2 distance metric
    index.add(np.array(data_vectors, dtype='float32'))  # Add encoded vectors to the FAISS index

# Step 2: Perform BM25 retrieval
def bm25_retrieval(query, top_k=5):
    # Tokenize query and perform BM25 search
    query_tokens = query.split(" ")
    results = bm25.get_top_n(query_tokens, [item['passages']['passage_text'][0] for item in dataset], n=top_k)
    return results

# Step 3: Retrieve Neighbors using FAISS (SBERT Embeddings)
def retrieve_neighbors(query, sbert_model, faiss_index, dataset, num_neighbors=5):
    query_embedding = sbert_model.encode(query, convert_to_tensor=True)
    distances, indices = faiss_index.search(np.array([query_embedding], dtype='float32'), num_neighbors)
    neighbors = [dataset[int(idx)]['passages']['passage_text'][0] for idx in indices[0]]  # Adjust for structure
    return neighbors

# Step 4: Generate Response using GPT-4 and Context
def generate_response_gpt4(query, context_snippets, max_tokens=850, temperature=0.7):
    context = " ".join(context_snippets[:10])  
    input_text = f"Q: {query}\nHere is some relevant information:\n{context}\nBased on this information, provide a unique answer to the question."

    try:
        # Use the new ChatCompletion method from OpenAI v1.0.0+
        response = openai.chat.completions.create(
            model="gpt-4o-mini",  # Specify GPT-4
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": input_text}
            ],
            max_tokens=max_tokens  # Adjust based on the expected response length
            ,temperature =temperature
            # Set clean_up_tokenization_spaces to True to suppress warning
             
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error: {e}"

# Step 5: Handle the query (integrate BM25 and FAISS retrieval)
def handle_query(query, num_neighbors, max_tokens, temperature):
    bm25_contexts = bm25_retrieval(query, top_k=num_neighbors)
    faiss_neighbors = retrieve_neighbors(query, sbert_model, index, dataset, num_neighbors)
    combined_contexts = bm25_contexts + faiss_neighbors
    response = generate_response_gpt4(query, combined_contexts, max_tokens=max_tokens, temperature=temperature)
    return "\n\n".join(combined_contexts), response

# Step 6: Define Gradio Interface
def interface(query, num_neighbors, max_tokens, temperature):
    neighbors, response = handle_query(query, num_neighbors, max_tokens, temperature)
    return neighbors, response

# Step 7: Load data, create FAISS and BM25 index, and start the interface
load_data_and_create_index()

# Create Gradio interface with sliders for neighbors, max tokens, and temperature
gr.Interface(
    fn=interface, 
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your query here..."), 
        gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Number of Neighbors to Retrieve"),
        gr.Slider(minimum=50, maximum=15000, step=500, value=8500, label="Max Tokens"),  # Added max_tokens slider
        gr.Slider(minimum=0, maximum=1, step=0.1, value=0.7, label="Temperature")     # Added temperature slider
    ], 
    outputs=[
        gr.Textbox(label="Retrieved Context (BM25 and FAISS)"), 
        gr.Textbox(label="AI Generated Response")
    ],
    title="AI Query System with Contextual Data Retrieval (BM25 + SBERT) and GPT-4",
    description="Submit a query. The system retrieves relevant passages using BM25 and FAISS, then GPT-4 generates a response with adjustable max tokens and temperature."
).launch()