vivekvar commited on
Commit
79d6b4a
·
verified ·
1 Parent(s): a38964e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -37
app.py CHANGED
@@ -1,27 +1,21 @@
1
  import streamlit as st
2
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
3
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 
4
  from dotenv import load_dotenv
 
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from llama_index.core import Settings
7
- import os
8
- import base64
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
- # Configure the Llama index settings
14
- Settings.llm = HuggingFaceInferenceAPI(
15
- model_name="meta-llama/Llama-3.3-70B-Instruct",
16
- tokenizer_name="meta-llama/Llama-3.3-70B-Instruct",
17
- context_window=30000,
18
- api_token=os.getenv("HF_TOKEN"),
19
- max_new_tokens=512,
20
- generate_kwargs={"temperature": 0.1},
21
- )
22
- Settings.embed_model = HuggingFaceEmbedding(
23
- model_name="BAAI/bge-small-en-v1.5"
24
- )
25
 
26
  # Define the directory for persistent storage and data
27
  PERSIST_DIR = "./db"
@@ -31,48 +25,38 @@ DATA_DIR = "data"
31
  os.makedirs(DATA_DIR, exist_ok=True)
32
  os.makedirs(PERSIST_DIR, exist_ok=True)
33
 
 
34
  def displayPDF(file):
35
  with open(file, "rb") as f:
36
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
37
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
38
  st.markdown(pdf_display, unsafe_allow_html=True)
39
 
 
40
  def data_ingestion():
41
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
42
  storage_context = StorageContext.from_defaults()
43
  index = VectorStoreIndex.from_documents(documents)
44
  index.storage_context.persist(persist_dir=PERSIST_DIR)
45
 
46
- def handle_query(query):
47
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
48
- index = load_index_from_storage(storage_context)
49
- chat_text_qa_msgs = [
50
- (
51
- "user",
52
- """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
53
- Context:
54
- {context}
55
- Question:
56
- {query}
57
- """
58
- )
59
- ]
60
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
61
- query_engine = index.as_query_engine(text_qa_template=text_qa_template)
62
-
63
  try:
64
- answer = query_engine.query({"context": "Extracted context from PDF", "query": query})
65
- return answer.get('response', "Sorry, no answer found.")
66
  except Exception as e:
67
  return f"An error occurred: {str(e)}"
68
 
69
  # Streamlit app initialization
70
  st.title("Chat with your PDF 📄")
71
  st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
 
72
 
 
73
  if 'messages' not in st.session_state:
74
  st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
75
 
 
76
  with st.sidebar:
77
  st.title("Menu:")
78
  uploaded_file = st.file_uploader("Upload your PDF File")
@@ -84,12 +68,14 @@ with st.sidebar:
84
  data_ingestion()
85
  st.success("Data ingestion completed.")
86
 
 
87
  user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
88
  if user_prompt:
89
  st.session_state.messages.append({'role': 'user', "content": user_prompt})
90
- response = handle_query(user_prompt)
91
  st.session_state.messages.append({'role': 'assistant', "content": response})
92
 
 
93
  for message in st.session_state.messages:
94
  with st.chat_message(message['role']):
95
- st.write(message['content'])
 
1
  import streamlit as st
2
+ import os
3
+ import base64
4
+ from huggingface_hub import InferenceApi
5
  from dotenv import load_dotenv
6
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
  from llama_index.core import Settings
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
+ # Define the Hugging Face model API endpoint and your token
14
+ model_name = "meta-llama/Llama-3.3-70B-Instruct"
15
+ api_token = os.getenv("HF_TOKEN")
16
+
17
+ # Initialize the HuggingFace API for inference
18
+ inference = InferenceApi(repo_id=model_name, token=api_token)
 
 
 
 
 
 
19
 
20
  # Define the directory for persistent storage and data
21
  PERSIST_DIR = "./db"
 
25
  os.makedirs(DATA_DIR, exist_ok=True)
26
  os.makedirs(PERSIST_DIR, exist_ok=True)
27
 
28
+ # Function to display the PDF file in Streamlit
29
  def displayPDF(file):
30
  with open(file, "rb") as f:
31
  base64_pdf = base64.b64encode(f.read()).decode('utf-8')
32
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
33
  st.markdown(pdf_display, unsafe_allow_html=True)
34
 
35
+ # Function to process data ingestion
36
  def data_ingestion():
37
  documents = SimpleDirectoryReader(DATA_DIR).load_data()
38
  storage_context = StorageContext.from_defaults()
39
  index = VectorStoreIndex.from_documents(documents)
40
  index.storage_context.persist(persist_dir=PERSIST_DIR)
41
 
42
+ # Function to handle the query using Hugging Face's Inference API
43
+ def generate_response(input_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  try:
45
+ response = inference(inputs=input_text)
46
+ return response['generated_text'] # Adjust based on actual response structure
47
  except Exception as e:
48
  return f"An error occurred: {str(e)}"
49
 
50
  # Streamlit app initialization
51
  st.title("Chat with your PDF 📄")
52
  st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
53
+ st.markdown("Chat here")
54
 
55
+ # Initial message setup
56
  if 'messages' not in st.session_state:
57
  st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
58
 
59
+ # Sidebar for file upload and processing
60
  with st.sidebar:
61
  st.title("Menu:")
62
  uploaded_file = st.file_uploader("Upload your PDF File")
 
68
  data_ingestion()
69
  st.success("Data ingestion completed.")
70
 
71
+ # Handling user input for querying the PDF content
72
  user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
73
  if user_prompt:
74
  st.session_state.messages.append({'role': 'user', "content": user_prompt})
75
+ response = generate_response(user_prompt) # Use Hugging Face inference directly
76
  st.session_state.messages.append({'role': 'assistant', "content": response})
77
 
78
+ # Displaying chat messages
79
  for message in st.session_state.messages:
80
  with st.chat_message(message['role']):
81
+ st.write(message['content'])