Spaces:

raz-135
/

DocumentsChats

Sleeping

raz-135 commited on Aug 25, 2024

Commit

0935401

verified ·

1 Parent(s): 75a8f80

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoad
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
 from langchain.vectorstores import Chroma
 import os
-from io import BytesIO
 from groq import Groq
 # Initialize the Groq API client
@@ -18,21 +18,27 @@ def get_groq_response(prompt, model="llama3-8b-8192"):
     return chat_completion.choices[0].message.content
 def process_file(uploaded_file):
-    file_type = uploaded_file.type
-    if file_type == "application/pdf":
-        pdf_loader = PyPDFLoader(BytesIO(uploaded_file.getvalue()))
         documents = pdf_loader.load()
-    elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        word_loader = UnstructuredWordDocumentLoader(BytesIO(uploaded_file.getvalue()))
         documents = word_loader.load()
-    elif file_type == "text/plain":
-        text_loader = TextLoader(BytesIO(uploaded_file.getvalue()), encoding="utf-8")
         documents = text_loader.load()
     else:
         st.error("Unsupported file type.")
         return None
     return documents
 def answer_with_retrieval(prompt, retriever):

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
 from langchain.vectorstores import Chroma
+import tempfile
 import os
 from groq import Groq
 # Initialize the Groq API client
     return chat_completion.choices[0].message.content
 def process_file(uploaded_file):
+    # Save the uploaded file to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+        temp_file.write(uploaded_file.getvalue())
+        temp_file_path = temp_file.name
+    # Process the file based on its type
+    if uploaded_file.type == "application/pdf":
+        pdf_loader = PyPDFLoader(temp_file_path)
         documents = pdf_loader.load()
+    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        word_loader = UnstructuredWordDocumentLoader(temp_file_path)
         documents = word_loader.load()
+    elif uploaded_file.type == "text/plain":
+        text_loader = TextLoader(temp_file_path)
         documents = text_loader.load()
     else:
         st.error("Unsupported file type.")
         return None
+    # Clean up the temporary file
+    os.remove(temp_file_path)
     return documents
 def answer_with_retrieval(prompt, retriever):