import gradio as gr import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.core import ServiceContext from llama_index.core import PromptTemplate from llama_index.core import VectorStoreIndex from llama_index.llms.huggingface import HuggingFaceLLM from llama_index.embeddings.huggingface import HuggingFaceEmbedding import torch # Load documents def extract_text_from_pdf(pdf_path): try: with open(pdf_path, 'rb') as f: text = f.read() return text except Exception as e: return f"Error: {e}" # Load documents from the PDF file pdf_path = 't1.pdf' # Path to your PDF file documents = extract_text_from_pdf(pdf_path) # Load documents from the PDF file pdf_path = 't1.pdf' # Path to your PDF file documents = extract_text_from_pdf(pdf_path) #documents = loader.load_data() # Initialize Hugging Face LLM system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided """ query_wrapper_prompt = PromptTemplate("{query_str}") def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) return tokenizer, model # Load tokenizer and model # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B") model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B") # Initialize Hugging Face LLM system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided """ query_wrapper_prompt = PromptTemplate("{query_str}") llm = HuggingFaceLLM( context_window=4096, max_new_tokens=256, generate_kwargs={"temperature": 0.7, "do_sample": False}, system_prompt=system_prompt, query_wrapper_prompt=query_wrapper_prompt, tokenizer_name=tokenizer, model_name=model, device_map="auto", stopping_ids=[50278, 50279, 50277, 1, 0], tokenizer_kwargs={"max_length": 4096}, model_kwargs={"torch_dtype": torch.float16} ) # Initialize Hugging Face Embedding Model embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2") # Initialize Hugging Face Embedding Model embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2") # Initialize Service Context service_context = ServiceContext.from_defaults( chunk_size=256, llm=llm, embed_model=embed_model ) # Initialize Vector Store Index index = VectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine(k=1) # Streamlit interface st.title("Q&A Assistant") query = st.text_input("Enter your query:") if query: result = query_engine.query(query) response = result.response st.markdown(f"**Response:** {response}")