raz-135 commited on
Commit
6a7fe6e
·
verified ·
1 Parent(s): 77d0743

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings
5
+ from langchain.vectorstores import Chroma
6
+ import os
7
+ from io import BytesIO
8
+ from groq import Groq
9
+
10
+ # Initialize the Groq API client
11
+ client = Groq(api_key='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')
12
+
13
+ def get_groq_response(prompt, model="llama3-8b-8192"):
14
+ chat_completion = client.chat.completions.create(
15
+ messages=[{"role": "user", "content": prompt}],
16
+ model=model,
17
+ )
18
+ return chat_completion.choices[0].message.content
19
+
20
+ def process_file(uploaded_file):
21
+ file_type = uploaded_file.type
22
+
23
+ if file_type == "application/pdf":
24
+ pdf_loader = PyPDFLoader(BytesIO(uploaded_file.getvalue()))
25
+ documents = pdf_loader.load()
26
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
27
+ word_loader = UnstructuredWordDocumentLoader(BytesIO(uploaded_file.getvalue()))
28
+ documents = word_loader.load()
29
+ elif file_type == "text/plain":
30
+ text_loader = TextLoader(BytesIO(uploaded_file.getvalue()), encoding="utf-8")
31
+ documents = text_loader.load()
32
+ else:
33
+ st.error("Unsupported file type.")
34
+ return None
35
+
36
+ return documents
37
+
38
+ def answer_with_retrieval(prompt, retriever):
39
+ context = retriever.get_relevant_documents(prompt)
40
+ context_text = " ".join([doc.page_content for doc in context])
41
+ combined_prompt = f"{context_text}\n\n{prompt}"
42
+ return get_groq_response(combined_prompt)
43
+
44
+ # Streamlit UI
45
+ st.title("Upload and Interact with File Content")
46
+
47
+ uploaded_file = st.file_uploader("Upload a file", type=["pdf", "docx", "txt"])
48
+
49
+ if uploaded_file:
50
+ # Process the uploaded file
51
+ documents = process_file(uploaded_file)
52
+
53
+ if documents:
54
+ # Split the documents into chunks
55
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=50)
56
+ chunked_documents = text_splitter.split_documents(documents)
57
+
58
+ # Generate embeddings
59
+ HF_token = "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"
60
+ embeddings = HuggingFaceInferenceAPIEmbeddings(api_key=HF_token, model_name="BAAI/bge-base-en-v1.5")
61
+
62
+ # Create a vector store
63
+ vectorstore = Chroma.from_documents(chunked_documents, embeddings)
64
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
65
+
66
+ # User query
67
+ query = st.text_input("Enter your query:")
68
+
69
+ if query:
70
+ response = answer_with_retrieval(query, retriever)
71
+ st.write("### Response")
72
+ st.write(response)