Spaces:
Sleeping
Sleeping
Upload 23 files
Browse files- .gitattributes +41 -35
- README.md +13 -13
- app.py +110 -0
- faiss_index/index.faiss +3 -0
- faiss_index/index.pkl +3 -0
- faiss_indexes/2021_AfD_index/index.faiss +0 -0
- faiss_indexes/2021_AfD_index/index.pkl +3 -0
- faiss_indexes/2021_CDU-CSU_index/index.faiss +3 -0
- faiss_indexes/2021_CDU-CSU_index/index.pkl +3 -0
- faiss_indexes/2021_FDP_index/index.faiss +3 -0
- faiss_indexes/2021_FDP_index/index.pkl +3 -0
- faiss_indexes/2021_Freie wah_index/index.faiss +3 -0
- faiss_indexes/2021_Freie wah_index/index.pkl +3 -0
- faiss_indexes/2021_Greens_index/index.faiss +3 -0
- faiss_indexes/2021_Greens_index/index.pkl +3 -0
- faiss_indexes/2021_SPD_index/index.faiss +0 -0
- faiss_indexes/2021_SPD_index/index.pkl +3 -0
- faiss_indexes/2021_The Left_index/index.faiss +3 -0
- faiss_indexes/2021_The Left_index/index.pkl +3 -0
- llm.py +74 -0
- make_vecdb.py +39 -0
- model.py +0 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,41 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
faiss_indexes/2021_CDU-CSU_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
faiss_indexes/2021_FDP_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
faiss_indexes/2021_Freie[[:space:]]wah_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
faiss_indexes/2021_Greens_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
faiss_indexes/2021_The[[:space:]]Left_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: German Political Chatbot
|
| 3 |
-
emoji: 📊
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: pink
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
sdk_version: 1.41.1
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
license: mit
|
| 11 |
-
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: German Political Chatbot
|
| 3 |
+
emoji: 📊
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.41.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from llm import GeminiModel, api_key as SECRET_KEY
|
| 3 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 4 |
+
from langchain_community.vectorstores import FAISS
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class RAGEnabledModel:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.prompt = """
|
| 11 |
+
You are a helpful chat assistant who provides information about the public statements
|
| 12 |
+
and policy positions of different political parties in Germany for the upcoming 2025 elections.
|
| 13 |
+
When the user asks a question, you should respond in the same language they used
|
| 14 |
+
(e.g., if they ask in German, respond in German; if in English, respond in English).
|
| 15 |
+
|
| 16 |
+
Focus on factual information regarding each party’s stance, referencing relevant
|
| 17 |
+
policy areas such as economy, immigration, healthcare, the environment, and so on.
|
| 18 |
+
Stay neutral and objective, providing factual information without bias or personal
|
| 19 |
+
political opinions. Search online to find up-to-date latest information.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
# Load FAISS vector store
|
| 23 |
+
self.vector_db = FAISS.load_local("./faiss_index", GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=SECRET_KEY),allow_dangerous_deserialization=True)
|
| 24 |
+
|
| 25 |
+
# Instantiate the GeminiModel (replace with actual import or code)
|
| 26 |
+
self.model = GeminiModel()
|
| 27 |
+
|
| 28 |
+
def retrieve_documents(self, query):
|
| 29 |
+
"""Retrieve relevant documents from the FAISS vector database."""
|
| 30 |
+
results = self.vector_db.similarity_search(query, k=5)
|
| 31 |
+
return results
|
| 32 |
+
|
| 33 |
+
def predict(self, text, history):
|
| 34 |
+
"""Perform RAG-enabled prediction."""
|
| 35 |
+
# Step 1: Retrieve relevant documents
|
| 36 |
+
documents = self.retrieve_documents(text)
|
| 37 |
+
|
| 38 |
+
# Step 2: Incorporate retrieved documents into the prompt
|
| 39 |
+
context = "\n\n".join([doc.page_content for doc in documents])
|
| 40 |
+
augmented_prompt = f"{self.prompt}\n\nRelevant Context:\n{context}\n\nUser Query: {text}"
|
| 41 |
+
|
| 42 |
+
# Step 3: Use the model for prediction
|
| 43 |
+
outp, pricing = self.model.predict(
|
| 44 |
+
augmented_prompt,
|
| 45 |
+
history=history,
|
| 46 |
+
grounding_threshold=0.15
|
| 47 |
+
)
|
| 48 |
+
return outp
|
| 49 |
+
|
| 50 |
+
####################################
|
| 51 |
+
# 2) Streamlit application layout #
|
| 52 |
+
####################################
|
| 53 |
+
def main():
|
| 54 |
+
st.set_page_config(page_title="German 2025 Elections - Political Parties", layout="centered")
|
| 55 |
+
st.title("German Political Parties' Statements for the 2025 Elections")
|
| 56 |
+
|
| 57 |
+
# Initialize the conversation history
|
| 58 |
+
if "history" not in st.session_state:
|
| 59 |
+
# We'll store (speaker, message) tuples in this list
|
| 60 |
+
st.session_state.history = []
|
| 61 |
+
|
| 62 |
+
# Create an instance of our RegularModel
|
| 63 |
+
model = RAGEnabledModel()
|
| 64 |
+
|
| 65 |
+
#############################
|
| 66 |
+
# 3) Chat-style input form #
|
| 67 |
+
#############################
|
| 68 |
+
with st.form(key="user_form"):
|
| 69 |
+
user_input = st.text_input(
|
| 70 |
+
"You:",
|
| 71 |
+
placeholder="Ask about a political party's stance on any policy in Germany (2025 elections)..."
|
| 72 |
+
)
|
| 73 |
+
submitted = st.form_submit_button("Send")
|
| 74 |
+
|
| 75 |
+
##########################
|
| 76 |
+
# 4) Handle user submit #
|
| 77 |
+
##########################
|
| 78 |
+
if submitted and user_input:
|
| 79 |
+
# Save the user message
|
| 80 |
+
st.session_state.history.append(("user", user_input))
|
| 81 |
+
|
| 82 |
+
# Instruct or rely on your model to answer in the user's language
|
| 83 |
+
# Optionally, you could do some language detection and pass it along:
|
| 84 |
+
# For example, using langdetect:
|
| 85 |
+
# from langdetect import detect
|
| 86 |
+
# user_lang = detect(user_input)
|
| 87 |
+
# instruction = f"Please reply in {user_lang}."
|
| 88 |
+
# combined_input = f"{instruction}\n\nUser asked: {user_input}"
|
| 89 |
+
|
| 90 |
+
# But if your model can automatically detect & respond in the same language,
|
| 91 |
+
# you can simply pass the user_input.
|
| 92 |
+
response = model.predict(user_input, st.session_state.history)
|
| 93 |
+
|
| 94 |
+
# Save the model's response
|
| 95 |
+
st.session_state.history.append(("bot", response))
|
| 96 |
+
|
| 97 |
+
##################################
|
| 98 |
+
# 5) Display the chat messages #
|
| 99 |
+
##################################
|
| 100 |
+
for speaker, message in st.session_state.history:
|
| 101 |
+
if speaker == "user":
|
| 102 |
+
st.markdown(f"**You**: {message}")
|
| 103 |
+
else:
|
| 104 |
+
st.markdown(f"**Assistant**: {message}")
|
| 105 |
+
|
| 106 |
+
###################################
|
| 107 |
+
# 6) Entry point for the app #
|
| 108 |
+
###################################
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
main()
|
faiss_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaa51ba1ade1dd3bc411db82620130ef35562859ca0eed117b55b336db73a605
|
| 3 |
+
size 7154733
|
faiss_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3a05204ad5b9b86d755572f37a2c26d9cbd8c687c7c53f4ce50723687d1465f
|
| 3 |
+
size 1921037
|
faiss_indexes/2021_AfD_index/index.faiss
ADDED
|
Binary file (786 kB). View file
|
|
|
faiss_indexes/2021_AfD_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c943d033af8314989ee741837cc227585149fb3958814364b2c5f29ad50e6fed
|
| 3 |
+
size 235017
|
faiss_indexes/2021_CDU-CSU_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad8c886f5d11a244df07c3b7dab89786beb3c721dd2eedee7bb0d3537f07b9b4
|
| 3 |
+
size 1376301
|
faiss_indexes/2021_CDU-CSU_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73152129ed656981ba080fbeea3f206cb95d8821a582333255277204154cfade
|
| 3 |
+
size 425199
|
faiss_indexes/2021_FDP_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7143eb738429d45918dde851b06109be81924f8c38c1664416c270f564522de
|
| 3 |
+
size 1342509
|
faiss_indexes/2021_FDP_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d2bd7c2b4ee34fafc9dc31346ee6008a726dbcd7722bfdd2b565642d431a0d4
|
| 3 |
+
size 362556
|
faiss_indexes/2021_Freie wah_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0187492eef61b71459e92b256e78971536ae31428ee5c08e0d771325795f8738
|
| 3 |
+
size 1324077
|
faiss_indexes/2021_Freie wah_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dbdb92ab9516ed45e851e7644b9f19ee65b583ab7285abff9ef0d43c74a41cd
|
| 3 |
+
size 327166
|
faiss_indexes/2021_Greens_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f4153203b45222ec5e7b76bcd5be285e6483d5bc8d388ddaa8c85edffcdfbf1
|
| 3 |
+
size 2734125
|
faiss_indexes/2021_Greens_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6913c5fd21fa78a6267d37bc1e931ea4dfa46081a8fe2c352c75c80509dfb26e
|
| 3 |
+
size 700354
|
faiss_indexes/2021_SPD_index/index.faiss
ADDED
|
Binary file (817 kB). View file
|
|
|
faiss_indexes/2021_SPD_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3563521322671ca3c9e2fc0902e14f43946f7187dec1688e34240632490d59e
|
| 3 |
+
size 221543
|
faiss_indexes/2021_The Left_index/index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1995822bec2972c5cdbbf4087a5386e61cd5281a8aa3df8c954ba9687e2bd68
|
| 3 |
+
size 2279469
|
faiss_indexes/2021_The Left_index/index.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3708e81986319babbf10c0affb07c71b485360cffcd2d21242f5020161631d22
|
| 3 |
+
size 667322
|
llm.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
| 3 |
+
import google.generativeai as genai
|
| 4 |
+
import numpy as np
|
| 5 |
+
import os
|
| 6 |
+
from langchain.vectorstores import FAISS
|
| 7 |
+
api_key = os.environ["GOOGLE_GEMINI_API"]
|
| 8 |
+
|
| 9 |
+
def format_chat_history(chat_history):
|
| 10 |
+
"""Converts chat history from the provided format to the Gemini format."""
|
| 11 |
+
formatted_messages = []
|
| 12 |
+
for message in chat_history:
|
| 13 |
+
role = message[0]
|
| 14 |
+
if role == "user":
|
| 15 |
+
role = "user"
|
| 16 |
+
# For simplicity, assuming anything not "user" is the assistant
|
| 17 |
+
elif role =="bot": #You can expand this logic if you have other roles.
|
| 18 |
+
role ="model"
|
| 19 |
+
|
| 20 |
+
formatted_messages.append({"role": role, "parts": message[1]})
|
| 21 |
+
return formatted_messages
|
| 22 |
+
|
| 23 |
+
genai.configure(api_key=api_key)
|
| 24 |
+
class GeminiModel:
|
| 25 |
+
def __init__(self) -> None:
|
| 26 |
+
self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
|
| 27 |
+
|
| 28 |
+
def predict(self, inp, history, grounding_threshold = 1.0):
|
| 29 |
+
chat = self.model.start_chat(history=format_chat_history(history))
|
| 30 |
+
response = chat.send_message(inp, tools ={"google_search_retrieval": {
|
| 31 |
+
"dynamic_retrieval_config": {
|
| 32 |
+
"mode": "unspecified",
|
| 33 |
+
"dynamic_threshold": grounding_threshold}}})
|
| 34 |
+
|
| 35 |
+
cost = (response.usage_metadata.total_token_count / 1_000_000) * 10
|
| 36 |
+
txt = response.text.replace('`', '').replace("\n","")
|
| 37 |
+
if "json" in txt[:4]:
|
| 38 |
+
txt = txt[4:]
|
| 39 |
+
return txt, cost
|
| 40 |
+
|
| 41 |
+
def generate_title(self, initial_message):
|
| 42 |
+
prompt = f"Generate a concise and descriptive title for the following conversation:\n\n{initial_message}\n\nTitle:"
|
| 43 |
+
response = self.model.generate_content(prompt)
|
| 44 |
+
title = response.text.strip()
|
| 45 |
+
return title
|
| 46 |
+
|
| 47 |
+
class GeminiEmbeddings:
|
| 48 |
+
def __init__(self) -> None:
|
| 49 |
+
self.model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
|
| 50 |
+
|
| 51 |
+
def predict(self, input):
|
| 52 |
+
embedding = self.model.embed_query(input)
|
| 53 |
+
embedding = np.array(embedding).reshape(1, -1).astype('float32')
|
| 54 |
+
return embedding
|
| 55 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key) # Ensure your OpenAI API key is set in the environment
|
| 56 |
+
|
| 57 |
+
def query_all_indexes(query):
|
| 58 |
+
indexes_path = "faiss_indexes"
|
| 59 |
+
results = []
|
| 60 |
+
|
| 61 |
+
for index_dir in os.listdir(indexes_path):
|
| 62 |
+
index_path = os.path.join(indexes_path, index_dir)
|
| 63 |
+
if os.path.isdir(index_path):
|
| 64 |
+
# Load the FAISS vectorstore
|
| 65 |
+
faiss_vectorstore = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
|
| 66 |
+
|
| 67 |
+
# Perform the search query
|
| 68 |
+
search_results = faiss_vectorstore.similarity_search(query, k=2) # Adjust k for number of results
|
| 69 |
+
results.extend([(res.page_content, index_dir) for res in search_results])
|
| 70 |
+
|
| 71 |
+
return results
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
print(query_all_indexes("Was sagen Parteien zum Klimawandel"))
|
make_vecdb.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.document_loaders import TextLoader
|
| 2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 3 |
+
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
|
| 4 |
+
from langchain.vectorstores import FAISS
|
| 5 |
+
import os
|
| 6 |
+
import random
|
| 7 |
+
from llm import api_key as SECRET_KEY
|
| 8 |
+
# Path to the folder containing the text files
|
| 9 |
+
folder_path = "./data"
|
| 10 |
+
|
| 11 |
+
# Initialize variables
|
| 12 |
+
documents = []
|
| 13 |
+
|
| 14 |
+
# Load all text files from the folder
|
| 15 |
+
for filename in os.listdir(folder_path):
|
| 16 |
+
if filename.endswith(".txt"):
|
| 17 |
+
file_path = os.path.join(folder_path, filename)
|
| 18 |
+
loader = TextLoader(file_path, encoding="utf-8")
|
| 19 |
+
documents.extend(loader.load())
|
| 20 |
+
|
| 21 |
+
# Split the documents into chunks for better vectorization
|
| 22 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 23 |
+
chunk_size=1000, # Size of each chunk
|
| 24 |
+
chunk_overlap=200 # Overlap between chunks
|
| 25 |
+
)
|
| 26 |
+
random.shuffle(documents)
|
| 27 |
+
split_docs = text_splitter.split_documents(documents)
|
| 28 |
+
|
| 29 |
+
# Initialize embeddings (using OpenAIEmbeddings as an example)
|
| 30 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=SECRET_KEY) # Ensure your OpenAI API key is set in the environment
|
| 31 |
+
|
| 32 |
+
# Create the FAISS vectorstore
|
| 33 |
+
faiss_vectorstore = FAISS.from_documents(split_docs, embeddings)
|
| 34 |
+
|
| 35 |
+
# Save the FAISS vectorstore to disk
|
| 36 |
+
output_path = "faiss_index"
|
| 37 |
+
faiss_vectorstore.save_local(output_path)
|
| 38 |
+
|
| 39 |
+
print(f"FAISS vector database created and saved to: {output_path}")
|
model.py
ADDED
|
File without changes
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
google-generativeai
|
| 3 |
+
langchain-community
|
| 4 |
+
langchain
|
| 5 |
+
langchain-core
|
| 6 |
+
langchain-google-genai==2.0.7
|