Spaces:

pradeep4321
/

sample_rag

Sleeping

App Files Files Community

pradeep4321 commited on Apr 2

Commit

d1a57bc

verified ·

1 Parent(s): ddf3e90

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +99 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,105 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import numpy as np
+import faiss
+import os
+from sentence_transformers import SentenceTransformer
+from huggingface_hub import InferenceClient
+# ==============================
+# CONFIG
+# ==============================
+st.set_page_config(page_title="Company ChatGPT", layout="wide")
+st.title("🏢 Company AI Assistant")
+# ==============================
+# LOAD MODELS
+# ==============================
+@st.cache_resource
+def load_models():
+    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+    llm = InferenceClient(
+        model="meta-llama/Llama-3-8b-instruct",
+        token=os.environ.get("HF_TOKEN")
+    )
+    return embed_model, llm
+embed_model, llm = load_models()
+# ==============================
+# LOAD DATA
+# ==============================
+@st.cache_data
+def load_data():
+    df = pd.read_csv("data/company_docs.csv")
+    return df
+df = load_data()
+documents = df["text"].tolist()
+# ==============================
+# CREATE VECTOR DB
+# ==============================
+@st.cache_resource
+def create_faiss(docs):
+    embeddings = embed_model.encode(docs)
+    index = faiss.IndexFlatL2(embeddings.shape[1])
+    index.add(np.array(embeddings))
+    return index, embeddings
+index, doc_embeddings = create_faiss(documents)
+# ==============================
+# RETRIEVAL FUNCTION
+# ==============================
+def retrieve(query, top_k=3):
+    q_emb = embed_model.encode([query])
+    D, I = index.search(np.array(q_emb), top_k)
+    return [documents[i] for i in I[0]]
+# ==============================
+# CHAT HISTORY
+# ==============================
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display history
+for msg in st.session_state.messages:
+    st.chat_message(msg["role"]).write(msg["content"])
+# ==============================
+# USER INPUT
+# ==============================
+query = st.chat_input("Ask about company...")
+if query:
+    st.session_state.messages.append({"role": "user", "content": query})
+    st.chat_message("user").write(query)
+    # 🔍 Retrieve relevant docs
+    context_docs = retrieve(query)
+    context = "\n".join(context_docs)
+    # 🧠 Build prompt
+    prompt = f"""
+You are a company assistant. Answer ONLY based on the context below.
+Context:
+{context}
+Question:
+{query}
+Answer:
 """
+    # 🤖 LLM Call
+    response = llm.text_generation(
+        prompt,
+        max_new_tokens=200,
+        temperature=0.5
+    )
+    st.session_state.messages.append({"role": "assistant", "content": response})
+    st.chat_message("assistant").write(response)