Spaces:
Sleeping
Sleeping
File size: 4,686 Bytes
a406aa7 d8554f5 a406aa7 b4c1bd0 a406aa7 d8554f5 a406aa7 8bbf95a a406aa7 2aebe83 561cfd5 a406aa7 561cfd5 a406aa7 bffd196 a406aa7 d8554f5 a406aa7 d8554f5 a406aa7 d8554f5 a406aa7 d8554f5 a406aa7 d8554f5 a406aa7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
import shutil
import streamlit as st
from io import BytesIO
from llama_index.llms.openai import OpenAI
from qdrant_client.http import models
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
import qdrant_client
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
raise ValueError("Please set your OPENAI_API_KEY environment variable.")
SYSTEM_PROMPT = (
"You are an AI assistant who answers the user questions, "
"use the schema fields to generate appropriate and valid json queries"
)
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
if os.path.exists("new_file"):
documents = SimpleDirectoryReader("new_file").load_data()
else:
documents = []
client = qdrant_client.QdrantClient(location=":memory:")
collection_name = "paper"
client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(
size=1536,
distance=models.Distance.COSINE,
),
optimizers_config=models.OptimizersConfigDiff(
indexing_threshold=20000,
)
)
vector_store = QdrantVectorStore(
collection_name="paper",
client=client,
vector_field="embedding",
enable_hybrid=True,
batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=chat_memory,
system_prompt=SYSTEM_PROMPT,
)
def process_uploaded_file(uploaded_file: BytesIO) -> str:
if uploaded_file is None:
return "No file uploaded."
uploads_dir = "uploads"
os.makedirs(uploads_dir, exist_ok=True)
file_name = uploaded_file.name
dest_path = os.path.join(uploads_dir, file_name)
with open(dest_path, "wb") as f:
f.write(uploaded_file.getbuffer())
temp_dir = "temp_upload"
os.makedirs(temp_dir, exist_ok=True)
for f_name in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f_name))
shutil.copy(dest_path, temp_dir)
new_docs = SimpleDirectoryReader(temp_dir).load_data()
global documents, index, chat_engine
documents.extend(new_docs)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=chat_memory,
system_prompt=SYSTEM_PROMPT,
)
return f"File '{file_name}' processed and added to the index."
def chat_with_ai(user_input: str) -> str:
response = chat_engine.chat(user_input)
references = response.source_nodes
ref = []
for node in references:
if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
ref.append(node.metadata["file_name"])
complete_response = str(response)
if ref:
complete_response += "\n\nReferences: " + ", ".join(ref)
return complete_response
st.set_page_config(page_title="LlamaIndex Chat & File Upload", layout="wide")
st.title("Chat Interface for LlamaIndex with File Upload")
tab1, tab2 = st.tabs(["Chat", "Upload"])
with tab1:
st.header("Chat with the AI")
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
for chat in st.session_state["chat_history"]:
st.markdown(f"**User:** {chat[0]}")
st.markdown(f"**AI:** {chat[1]}")
st.markdown("---")
user_input = st.text_input("Enter your question:")
if st.button("Send") and user_input:
with st.spinner("Processing..."):
response = chat_with_ai(user_input)
st.session_state["chat_history"].append((user_input, response))
st.experimental_rerun() # Refresh the page to show updated history
if st.button("Clear History"):
st.session_state["chat_history"] = []
st.experimental_rerun()
with tab2:
st.header("Upload a File")
uploaded_file = st.file_uploader("Choose a file to upload", type=["txt", "pdf", "doc", "docx", "csv", "xlsx"])
if st.button("Upload and Process"):
if uploaded_file is not None:
with st.spinner("Uploading and processing file..."):
status = process_uploaded_file(uploaded_file)
st.success(status)
else:
st.error("No file uploaded.")
|