File size: 3,146 Bytes
1e606ab
e6435f5
8ac3a92
751ea51
16f0a7b
 
9f1a2b6
16f0a7b
6194bb6
8ac3a92
 
 
2b8be0d
8ac3a92
 
5b8027e
 
5b89e4e
 
5b8027e
 
 
 
5b89e4e
 
 
 
5b8027e
 
a7b2a1f
 
8ac3a92
 
 
 
 
 
 
669440c
 
 
 
 
 
355fd96
 
669440c
ca750ca
 
 
 
 
669440c
 
 
 
 
 
8ac3a92
 
 
 
 
 
669440c
 
8ac3a92
 
 
 
 
 
669440c
 
 
 
8ac3a92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

import gradio as gr
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import ServiceContext
from llama_index.core import PromptTemplate

from llama_index.core import VectorStoreIndex
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import torch


# Load documents
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as f:
            text = f.read()
        return text
    except Exception as e:
        return f"Error: {e}"

# Load documents from the PDF file
pdf_path = 't1.pdf'  # Path to your PDF file
documents = extract_text_from_pdf(pdf_path)

# Load documents from the PDF file
pdf_path = 't1.pdf'  # Path to your PDF file
documents = extract_text_from_pdf(pdf_path)
#documents = loader.load_data()

# Initialize Hugging Face LLM
system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided
"""
query_wrapper_prompt = PromptTemplate("{query_str}")

def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

# Load tokenizer and model
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B")
# Initialize Hugging Face LLM
system_prompt = """# You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided
"""
query_wrapper_prompt = PromptTemplate("{query_str}")

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=tokenizer,
    model_name=model,
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    model_kwargs={"torch_dtype": torch.float16}
)

# Initialize Hugging Face Embedding Model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")


# Initialize Hugging Face Embedding Model
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")

# Initialize Service Context
service_context = ServiceContext.from_defaults(
    chunk_size=256,
    llm=llm,
    embed_model=embed_model
)

# Initialize Vector Store Index
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine(k=1)

# Streamlit interface
st.title("Q&A Assistant")

query = st.text_input("Enter your query:")
if query:
    result = query_engine.query(query)
    response = result.response
    st.markdown(f"**Response:** {response}")