Spaces:

vivekvar
/

chatpdf

Sleeping

App Files Files Community

vivekvar commited on Dec 22, 2024

Commit

79d6b4a

verified ·

1 Parent(s): a38964e

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -37

app.py CHANGED Viewed

@@ -1,27 +1,21 @@
 import streamlit as st
-from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
-from llama_index.llms.huggingface import HuggingFaceInferenceAPI
 from dotenv import load_dotenv
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
-import os
-import base64
 # Load environment variables
 load_dotenv()
-# Configure the Llama index settings
-Settings.llm = HuggingFaceInferenceAPI(
-    model_name="meta-llama/Llama-3.3-70B-Instruct",
-    tokenizer_name="meta-llama/Llama-3.3-70B-Instruct",
-    context_window=30000,
-    api_token=os.getenv("HF_TOKEN"),
-    max_new_tokens=512,
-    generate_kwargs={"temperature": 0.1},
-)
-Settings.embed_model = HuggingFaceEmbedding(
-    model_name="BAAI/bge-small-en-v1.5"
-)
 # Define the directory for persistent storage and data
 PERSIST_DIR = "./db"
@@ -31,48 +25,38 @@ DATA_DIR = "data"
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
 def displayPDF(file):
     with open(file, "rb") as f:
         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
     pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
     st.markdown(pdf_display, unsafe_allow_html=True)
 def data_ingestion():
     documents = SimpleDirectoryReader(DATA_DIR).load_data()
     storage_context = StorageContext.from_defaults()
     index = VectorStoreIndex.from_documents(documents)
     index.storage_context.persist(persist_dir=PERSIST_DIR)
-def handle_query(query):
-    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-    index = load_index_from_storage(storage_context)
-    chat_text_qa_msgs = [
-        (
-            "user",
-            """created by vivek created for Neonflake Enterprises OPC Pvt Ltd
-            Context:
-            {context}
-            Question:
-            {query}
-            """
-        )
-    ]
-    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
-    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
     try:
-        answer = query_engine.query({"context": "Extracted context from PDF", "query": query})
-        return answer.get('response', "Sorry, no answer found.")
     except Exception as e:
         return f"An error occurred: {str(e)}"
 # Streamlit app initialization
 st.title("Chat with your PDF 📄")
 st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
 if 'messages' not in st.session_state:
     st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF File")
@@ -84,12 +68,14 @@ with st.sidebar:
         data_ingestion()
         st.success("Data ingestion completed.")
 user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
 if user_prompt:
     st.session_state.messages.append({'role': 'user', "content": user_prompt})
-    response = handle_query(user_prompt)
     st.session_state.messages.append({'role': 'assistant', "content": response})
 for message in st.session_state.messages:
     with st.chat_message(message['role']):
-        st.write(message['content'])

 import streamlit as st
+import os
+import base64
+from huggingface_hub import InferenceApi
 from dotenv import load_dotenv
+from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import Settings
 # Load environment variables
 load_dotenv()
+# Define the Hugging Face model API endpoint and your token
+model_name = "meta-llama/Llama-3.3-70B-Instruct"
+api_token = os.getenv("HF_TOKEN")
+# Initialize the HuggingFace API for inference
+inference = InferenceApi(repo_id=model_name, token=api_token)
 # Define the directory for persistent storage and data
 PERSIST_DIR = "./db"
 os.makedirs(DATA_DIR, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
+# Function to display the PDF file in Streamlit
 def displayPDF(file):
     with open(file, "rb") as f:
         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
     pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
     st.markdown(pdf_display, unsafe_allow_html=True)
+# Function to process data ingestion
 def data_ingestion():
     documents = SimpleDirectoryReader(DATA_DIR).load_data()
     storage_context = StorageContext.from_defaults()
     index = VectorStoreIndex.from_documents(documents)
     index.storage_context.persist(persist_dir=PERSIST_DIR)
+# Function to handle the query using Hugging Face's Inference API
+def generate_response(input_text):
     try:
+        response = inference(inputs=input_text)
+        return response['generated_text']  # Adjust based on actual response structure
     except Exception as e:
         return f"An error occurred: {str(e)}"
 # Streamlit app initialization
 st.title("Chat with your PDF 📄")
 st.markdown("Built by [vivek](https://github.com/saravivek-cyber)")
+st.markdown("Chat here")
+# Initial message setup
 if 'messages' not in st.session_state:
     st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]
+# Sidebar for file upload and processing
 with st.sidebar:
     st.title("Menu:")
     uploaded_file = st.file_uploader("Upload your PDF File")
         data_ingestion()
         st.success("Data ingestion completed.")
+# Handling user input for querying the PDF content
 user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
 if user_prompt:
     st.session_state.messages.append({'role': 'user', "content": user_prompt})
+    response = generate_response(user_prompt)  # Use Hugging Face inference directly
     st.session_state.messages.append({'role': 'assistant', "content": response})
+# Displaying chat messages
 for message in st.session_state.messages:
     with st.chat_message(message['role']):
+        st.write(message['content'])