File size: 4,686 Bytes
a406aa7
 
 
 
 
d8554f5
a406aa7
b4c1bd0
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8554f5
a406aa7
 
 
 
 
 
 
 
8bbf95a
a406aa7
2aebe83
561cfd5
 
 
 
 
 
 
 
 
 
 
 
 
a406aa7
561cfd5
a406aa7
bffd196
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8554f5
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8554f5
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
d8554f5
a406aa7
 
d8554f5
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8554f5
a406aa7
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import shutil
import streamlit as st
from io import BytesIO


from llama_index.llms.openai import OpenAI
from qdrant_client.http import models
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
import qdrant_client


openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("Please set your OPENAI_API_KEY environment variable.")

SYSTEM_PROMPT = (
    "You are an AI assistant who answers the user questions, "
    "use the schema fields to generate appropriate and valid json queries"
)


Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

if os.path.exists("new_file"):
    documents = SimpleDirectoryReader("new_file").load_data()
else:
    documents = []


client = qdrant_client.QdrantClient(location=":memory:")

collection_name = "paper"

client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=1536,  
        distance=models.Distance.COSINE,
    ),

    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=20000,
    )
)
vector_store = QdrantVectorStore(
    collection_name="paper",
    client=client,
    vector_field="embedding", 
    enable_hybrid=True,
    batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=chat_memory,
    system_prompt=SYSTEM_PROMPT,
)


def process_uploaded_file(uploaded_file: BytesIO) -> str:

    if uploaded_file is None:
        return "No file uploaded."

    uploads_dir = "uploads"
    os.makedirs(uploads_dir, exist_ok=True)

    file_name = uploaded_file.name
    dest_path = os.path.join(uploads_dir, file_name)
    with open(dest_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    temp_dir = "temp_upload"
    os.makedirs(temp_dir, exist_ok=True)
    for f_name in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, f_name))
    shutil.copy(dest_path, temp_dir)

    new_docs = SimpleDirectoryReader(temp_dir).load_data()

    global documents, index, chat_engine
    documents.extend(new_docs)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    chat_engine = index.as_chat_engine(
        chat_mode="context",
        memory=chat_memory,
        system_prompt=SYSTEM_PROMPT,
    )

    return f"File '{file_name}' processed and added to the index."

def chat_with_ai(user_input: str) -> str:

    response = chat_engine.chat(user_input)
    references = response.source_nodes
    ref = []
    for node in references:
        if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
            ref.append(node.metadata["file_name"])
    complete_response = str(response)
    if ref:
        complete_response += "\n\nReferences: " + ", ".join(ref)
    return complete_response

st.set_page_config(page_title="LlamaIndex Chat & File Upload", layout="wide")
st.title("Chat Interface for LlamaIndex with File Upload")


tab1, tab2 = st.tabs(["Chat", "Upload"])


with tab1:
    st.header("Chat with the AI")
    if "chat_history" not in st.session_state:
        st.session_state["chat_history"] = []

    for chat in st.session_state["chat_history"]:
        st.markdown(f"**User:** {chat[0]}")
        st.markdown(f"**AI:** {chat[1]}")
        st.markdown("---")

    user_input = st.text_input("Enter your question:")

    if st.button("Send") and user_input:
        with st.spinner("Processing..."):
            response = chat_with_ai(user_input)
        st.session_state["chat_history"].append((user_input, response))
        st.experimental_rerun()  # Refresh the page to show updated history

    if st.button("Clear History"):
        st.session_state["chat_history"] = []
        st.experimental_rerun()


with tab2:
    st.header("Upload a File")
    uploaded_file = st.file_uploader("Choose a file to upload", type=["txt", "pdf", "doc", "docx", "csv", "xlsx"])
    if st.button("Upload and Process"):
        if uploaded_file is not None:
            with st.spinner("Uploading and processing file..."):
                status = process_uploaded_file(uploaded_file)
            st.success(status)
        else:
            st.error("No file uploaded.")