Spaces:

raz-135
/

DocumentsChats

Sleeping

App Files Files Community

raz-135 commited on Aug 25, 2024

Commit

6a7fe6e

verified ·

1 Parent(s): 77d0743

Create app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import streamlit as st
+from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
+from langchain.vectorstores import Chroma
+import os
+from io import BytesIO
+from groq import Groq
+# Initialize the Groq API client
+client = Groq(api_key='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')
+def get_groq_response(prompt, model="llama3-8b-8192"):
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": prompt}],
+        model=model,
+    )
+    return chat_completion.choices[0].message.content
+def process_file(uploaded_file):
+    file_type = uploaded_file.type
+    if file_type == "application/pdf":
+        pdf_loader = PyPDFLoader(BytesIO(uploaded_file.getvalue()))
+        documents = pdf_loader.load()
+    elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        word_loader = UnstructuredWordDocumentLoader(BytesIO(uploaded_file.getvalue()))
+        documents = word_loader.load()
+    elif file_type == "text/plain":
+        text_loader = TextLoader(BytesIO(uploaded_file.getvalue()), encoding="utf-8")
+        documents = text_loader.load()
+    else:
+        st.error("Unsupported file type.")
+        return None
+    return documents
+def answer_with_retrieval(prompt, retriever):
+    context = retriever.get_relevant_documents(prompt)
+    context_text = " ".join([doc.page_content for doc in context])
+    combined_prompt = f"{context_text}\n\n{prompt}"
+    return get_groq_response(combined_prompt)
+# Streamlit UI
+st.title("Upload and Interact with File Content")
+uploaded_file = st.file_uploader("Upload a file", type=["pdf", "docx", "txt"])
+if uploaded_file:
+    # Process the uploaded file
+    documents = process_file(uploaded_file)
+    if documents:
+        # Split the documents into chunks
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=50)
+        chunked_documents = text_splitter.split_documents(documents)
+        # Generate embeddings
+        HF_token = "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"
+        embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=HF_token, model_name="BAAI/bge-base-en-v1.5")
+        # Create a vector store
+        vectorstore = Chroma.from_documents(chunked_documents, embeddings)
+        retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
+        # User query
+        query = st.text_input("Enter your query:")
+        if query:
+            response = answer_with_retrieval(query, retriever)
+            st.write("### Response")
+            st.write(response)