Spaces:

vivekvar
/

chatpdf

Sleeping

App Files Files Community

vivekvar commited on Dec 22, 2024

Commit

58cd55d

verified ·

1 Parent(s): 79d6b4a

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -22

app.py CHANGED Viewed

@@ -1,21 +1,29 @@
 import streamlit as st
-import os
-import base64
-from huggingface_hub import InferenceApi
-from dotenv import load_dotenv
 from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
 # Load environment variables
 load_dotenv()
-# Define the Hugging Face model API endpoint and your token
-model_name = "meta-llama/Llama-3.3-70B-Instruct"
-api_token = os.getenv("HF_TOKEN")
-# Initialize the HuggingFace API for inference
-inference = InferenceApi(repo_id=model_name, token=api_token)
 # Define the directory for persistent storage and data
 PERSIST_DIR = "./db"
@@ -25,38 +33,48 @@ DATA_DIR = "data"
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
-# Function to display the PDF file in Streamlit
 def displayPDF(file):
     with open(file, "rb") as f:
         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
     pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
     st.markdown(pdf_display, unsafe_allow_html=True)
-# Function to process data ingestion
 def data_ingestion():
     documents = SimpleDirectoryReader(DATA_DIR).load_data()
     storage_context = StorageContext.from_defaults()
     index = VectorStoreIndex.from_documents(documents)
     index.storage_context.persist(persist_dir=PERSIST_DIR)
-# Function to handle the query using Hugging Face's Inference API
-def generate_response(input_text):
     try:
-        response = inference(inputs=input_text)
-        return response['generated_text']  # Adjust based on actual response structure
     except Exception as e:
         return f"An error occurred: {str(e)}"
 # Streamlit app initialization
 st.title("Chat with your PDF 📄")
 st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
-st.markdown("Chat here")
-# Initial message setup
 if 'messages' not in st.session_state:
     st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
-# Sidebar for file upload and processing
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF File")
@@ -68,14 +86,12 @@ with st.sidebar:
         data_ingestion()
         st.success("Data ingestion completed.")
-# Handling user input for querying the PDF content
 user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
 if user_prompt:
     st.session_state.messages.append({'role': 'user', "content": user_prompt})
-    response = generate_response(user_prompt)  # Use Hugging Face inference directly
     st.session_state.messages.append({'role': 'assistant', "content": response})
-# Displaying chat messages
 for message in st.session_state.messages:
     with st.chat_message(message['role']):
-        st.write(message['content'])

 import streamlit as st
 from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
+from llama_index.llms.huggingface import HuggingFaceInferenceAPI
+from dotenv import load_dotenv
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
+import os
+import base64
 # Load environment variables
 load_dotenv()
+# Configure the Llama index settings for using Hugging Face LLaMA model
+Settings.llm = HuggingFaceInferenceAPI(
+    model_name="facebook/bedrock-llama-7b",  # Use LLaMA 7B model here
+    tokenizer_name="facebook/bedrock-llama-7b",  # Tokenizer for the LLaMA model
+    context_window=30000,  # Set context window size (adjust if necessary)
+    api_token=os.getenv("HF_TOKEN"),  # Hugging Face API Token
+    max_new_tokens=512,
+    generate_kwargs={"temperature": 0.1},  # Control the generation temperature
+)
+# Set up Hugging Face Embedding model to use powerful LLaMA model
+Settings.embed_model = HuggingFaceEmbedding(
+    model_name="facebook/bedrock-llama-7b"  # Powerful model for embeddings
+)
 # Define the directory for persistent storage and data
 PERSIST_DIR = "./db"
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
 def displayPDF(file):
     with open(file, "rb") as f:
         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
     pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
     st.markdown(pdf_display, unsafe_allow_html=True)
 def data_ingestion():
     documents = SimpleDirectoryReader(DATA_DIR).load_data()
     storage_context = StorageContext.from_defaults()
     index = VectorStoreIndex.from_documents(documents)
     index.storage_context.persist(persist_dir=PERSIST_DIR)
+def handle_query(query):
+    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+    index = load_index_from_storage(storage_context)
+    chat_text_qa_msgs = [
+        (
+            "user",
+            """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
+            Context:
+            {context}
+            Question:
+            {query}
+            """
+        )
+    ]
+    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
+    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
     try:
+        answer = query_engine.query({"context": "Extracted context from PDF", "query": query})
+        return answer.get('response', "Sorry, no answer found.")
     except Exception as e:
         return f"An error occurred: {str(e)}"
 # Streamlit app initialization
 st.title("Chat with your PDF 📄")
 st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
 if 'messages' not in st.session_state:
     st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF File")
         data_ingestion()
         st.success("Data ingestion completed.")
 user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
 if user_prompt:
     st.session_state.messages.append({'role': 'user', "content": user_prompt})
+    response = handle_query(user_prompt)
     st.session_state.messages.append({'role': 'assistant', "content": response})
 for message in st.session_state.messages:
     with st.chat_message(message['role']):
+        st.write(message['content'])