import gradio as gr
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import ServiceContext
from llama_index.core import PromptTemplate

from llama_index.core import VectorStoreIndex
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import torch


# Load documents
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as f:
            text = f.read()
        return text
    except Exception as e:
        return f"Error: {e}"

# Load documents from the PDF file
pdf_path = 't1.pdf'  # Path to your PDF file
documents = extract_text_from_pdf(pdf_path)

# Load documents from the PDF file
pdf_path = 't1.pdf'  # Path to your PDF file
documents = extract_text_from_pdf(pdf_path)
#documents = loader.load_data()

# Initialize Hugging Face LLM
system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided
"""
query_wrapper_prompt = PromptTemplate("{query_str}")

def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

# Load tokenizer and model
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B")
# Initialize Hugging Face LLM
system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided
"""
query_wrapper_prompt = PromptTemplate("{query_str}")

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=tokenizer,
    model_name=model,
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    model_kwargs={"torch_dtype": torch.float16}
)

# Initialize Hugging Face Embedding Model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")


# Initialize Hugging Face Embedding Model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")

# Initialize Service Context
service_context = ServiceContext.from_defaults(
    chunk_size=256,
    llm=llm,
    embed_model=embed_model
)

# Initialize Vector Store Index
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine(k=1)

# Streamlit interface
st.title("Q&A Assistant")

query = st.text_input("Enter your query:")
if query:
    result = query_engine.query(query)
    response = result.response
    st.markdown(f"**Response:** {response}")