Sadique5 commited on
Commit
d2224c7
·
verified ·
1 Parent(s): 5353f49

Upload 23 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,41 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ faiss_indexes/2021_CDU-CSU_index/index.faiss filter=lfs diff=lfs merge=lfs -text
38
+ faiss_indexes/2021_FDP_index/index.faiss filter=lfs diff=lfs merge=lfs -text
39
+ faiss_indexes/2021_Freie[[:space:]]wah_index/index.faiss filter=lfs diff=lfs merge=lfs -text
40
+ faiss_indexes/2021_Greens_index/index.faiss filter=lfs diff=lfs merge=lfs -text
41
+ faiss_indexes/2021_The[[:space:]]Left_index/index.faiss filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: German Political Chatbot
3
- emoji: 📊
4
- colorFrom: yellow
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.41.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: German Political Chatbot
3
+ emoji: 📊
4
+ colorFrom: yellow
5
+ colorTo: pink
6
+ sdk: streamlit
7
+ sdk_version: 1.41.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llm import GeminiModel, api_key as SECRET_KEY
3
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+
7
+
8
+ class RAGEnabledModel:
9
+ def __init__(self):
10
+ self.prompt = """
11
+ You are a helpful chat assistant who provides information about the public statements
12
+ and policy positions of different political parties in Germany for the upcoming 2025 elections.
13
+ When the user asks a question, you should respond in the same language they used
14
+ (e.g., if they ask in German, respond in German; if in English, respond in English).
15
+
16
+ Focus on factual information regarding each party’s stance, referencing relevant
17
+ policy areas such as economy, immigration, healthcare, the environment, and so on.
18
+ Stay neutral and objective, providing factual information without bias or personal
19
+ political opinions. Search online to find up-to-date latest information.
20
+ """
21
+
22
+ # Load FAISS vector store
23
+ self.vector_db = FAISS.load_local("./faiss_index", GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=SECRET_KEY),allow_dangerous_deserialization=True)
24
+
25
+ # Instantiate the GeminiModel (replace with actual import or code)
26
+ self.model = GeminiModel()
27
+
28
+ def retrieve_documents(self, query):
29
+ """Retrieve relevant documents from the FAISS vector database."""
30
+ results = self.vector_db.similarity_search(query, k=5)
31
+ return results
32
+
33
+ def predict(self, text, history):
34
+ """Perform RAG-enabled prediction."""
35
+ # Step 1: Retrieve relevant documents
36
+ documents = self.retrieve_documents(text)
37
+
38
+ # Step 2: Incorporate retrieved documents into the prompt
39
+ context = "\n\n".join([doc.page_content for doc in documents])
40
+ augmented_prompt = f"{self.prompt}\n\nRelevant Context:\n{context}\n\nUser Query: {text}"
41
+
42
+ # Step 3: Use the model for prediction
43
+ outp, pricing = self.model.predict(
44
+ augmented_prompt,
45
+ history=history,
46
+ grounding_threshold=0.15
47
+ )
48
+ return outp
49
+
50
+ ####################################
51
+ # 2) Streamlit application layout #
52
+ ####################################
53
+ def main():
54
+ st.set_page_config(page_title="German 2025 Elections - Political Parties", layout="centered")
55
+ st.title("German Political Parties' Statements for the 2025 Elections")
56
+
57
+ # Initialize the conversation history
58
+ if "history" not in st.session_state:
59
+ # We'll store (speaker, message) tuples in this list
60
+ st.session_state.history = []
61
+
62
+ # Create an instance of our RegularModel
63
+ model = RAGEnabledModel()
64
+
65
+ #############################
66
+ # 3) Chat-style input form #
67
+ #############################
68
+ with st.form(key="user_form"):
69
+ user_input = st.text_input(
70
+ "You:",
71
+ placeholder="Ask about a political party's stance on any policy in Germany (2025 elections)..."
72
+ )
73
+ submitted = st.form_submit_button("Send")
74
+
75
+ ##########################
76
+ # 4) Handle user submit #
77
+ ##########################
78
+ if submitted and user_input:
79
+ # Save the user message
80
+ st.session_state.history.append(("user", user_input))
81
+
82
+ # Instruct or rely on your model to answer in the user's language
83
+ # Optionally, you could do some language detection and pass it along:
84
+ # For example, using langdetect:
85
+ # from langdetect import detect
86
+ # user_lang = detect(user_input)
87
+ # instruction = f"Please reply in {user_lang}."
88
+ # combined_input = f"{instruction}\n\nUser asked: {user_input}"
89
+
90
+ # But if your model can automatically detect & respond in the same language,
91
+ # you can simply pass the user_input.
92
+ response = model.predict(user_input, st.session_state.history)
93
+
94
+ # Save the model's response
95
+ st.session_state.history.append(("bot", response))
96
+
97
+ ##################################
98
+ # 5) Display the chat messages #
99
+ ##################################
100
+ for speaker, message in st.session_state.history:
101
+ if speaker == "user":
102
+ st.markdown(f"**You**: {message}")
103
+ else:
104
+ st.markdown(f"**Assistant**: {message}")
105
+
106
+ ###################################
107
+ # 6) Entry point for the app #
108
+ ###################################
109
+ if __name__ == "__main__":
110
+ main()
faiss_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa51ba1ade1dd3bc411db82620130ef35562859ca0eed117b55b336db73a605
3
+ size 7154733
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a05204ad5b9b86d755572f37a2c26d9cbd8c687c7c53f4ce50723687d1465f
3
+ size 1921037
faiss_indexes/2021_AfD_index/index.faiss ADDED
Binary file (786 kB). View file
 
faiss_indexes/2021_AfD_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c943d033af8314989ee741837cc227585149fb3958814364b2c5f29ad50e6fed
3
+ size 235017
faiss_indexes/2021_CDU-CSU_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad8c886f5d11a244df07c3b7dab89786beb3c721dd2eedee7bb0d3537f07b9b4
3
+ size 1376301
faiss_indexes/2021_CDU-CSU_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73152129ed656981ba080fbeea3f206cb95d8821a582333255277204154cfade
3
+ size 425199
faiss_indexes/2021_FDP_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7143eb738429d45918dde851b06109be81924f8c38c1664416c270f564522de
3
+ size 1342509
faiss_indexes/2021_FDP_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d2bd7c2b4ee34fafc9dc31346ee6008a726dbcd7722bfdd2b565642d431a0d4
3
+ size 362556
faiss_indexes/2021_Freie wah_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0187492eef61b71459e92b256e78971536ae31428ee5c08e0d771325795f8738
3
+ size 1324077
faiss_indexes/2021_Freie wah_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dbdb92ab9516ed45e851e7644b9f19ee65b583ab7285abff9ef0d43c74a41cd
3
+ size 327166
faiss_indexes/2021_Greens_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4153203b45222ec5e7b76bcd5be285e6483d5bc8d388ddaa8c85edffcdfbf1
3
+ size 2734125
faiss_indexes/2021_Greens_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6913c5fd21fa78a6267d37bc1e931ea4dfa46081a8fe2c352c75c80509dfb26e
3
+ size 700354
faiss_indexes/2021_SPD_index/index.faiss ADDED
Binary file (817 kB). View file
 
faiss_indexes/2021_SPD_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3563521322671ca3c9e2fc0902e14f43946f7187dec1688e34240632490d59e
3
+ size 221543
faiss_indexes/2021_The Left_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1995822bec2972c5cdbbf4087a5386e61cd5281a8aa3df8c954ba9687e2bd68
3
+ size 2279469
faiss_indexes/2021_The Left_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3708e81986319babbf10c0affb07c71b485360cffcd2d21242f5020161631d22
3
+ size 667322
llm.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
3
+ import google.generativeai as genai
4
+ import numpy as np
5
+ import os
6
+ from langchain.vectorstores import FAISS
7
+ api_key = os.environ["GOOGLE_GEMINI_API"]
8
+
9
+ def format_chat_history(chat_history):
10
+ """Converts chat history from the provided format to the Gemini format."""
11
+ formatted_messages = []
12
+ for message in chat_history:
13
+ role = message[0]
14
+ if role == "user":
15
+ role = "user"
16
+ # For simplicity, assuming anything not "user" is the assistant
17
+ elif role =="bot": #You can expand this logic if you have other roles.
18
+ role ="model"
19
+
20
+ formatted_messages.append({"role": role, "parts": message[1]})
21
+ return formatted_messages
22
+
23
+ genai.configure(api_key=api_key)
24
+ class GeminiModel:
25
+ def __init__(self) -> None:
26
+ self.model = genai.GenerativeModel('gemini-1.5-pro-latest')
27
+
28
+ def predict(self, inp, history, grounding_threshold = 1.0):
29
+ chat = self.model.start_chat(history=format_chat_history(history))
30
+ response = chat.send_message(inp, tools ={"google_search_retrieval": {
31
+ "dynamic_retrieval_config": {
32
+ "mode": "unspecified",
33
+ "dynamic_threshold": grounding_threshold}}})
34
+
35
+ cost = (response.usage_metadata.total_token_count / 1_000_000) * 10
36
+ txt = response.text.replace('`', '').replace("\n","")
37
+ if "json" in txt[:4]:
38
+ txt = txt[4:]
39
+ return txt, cost
40
+
41
+ def generate_title(self, initial_message):
42
+ prompt = f"Generate a concise and descriptive title for the following conversation:\n\n{initial_message}\n\nTitle:"
43
+ response = self.model.generate_content(prompt)
44
+ title = response.text.strip()
45
+ return title
46
+
47
+ class GeminiEmbeddings:
48
+ def __init__(self) -> None:
49
+ self.model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
50
+
51
+ def predict(self, input):
52
+ embedding = self.model.embed_query(input)
53
+ embedding = np.array(embedding).reshape(1, -1).astype('float32')
54
+ return embedding
55
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key) # Ensure your OpenAI API key is set in the environment
56
+
57
+ def query_all_indexes(query):
58
+ indexes_path = "faiss_indexes"
59
+ results = []
60
+
61
+ for index_dir in os.listdir(indexes_path):
62
+ index_path = os.path.join(indexes_path, index_dir)
63
+ if os.path.isdir(index_path):
64
+ # Load the FAISS vectorstore
65
+ faiss_vectorstore = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
66
+
67
+ # Perform the search query
68
+ search_results = faiss_vectorstore.similarity_search(query, k=2) # Adjust k for number of results
69
+ results.extend([(res.page_content, index_dir) for res in search_results])
70
+
71
+ return results
72
+
73
+ if __name__ == "__main__":
74
+ print(query_all_indexes("Was sagen Parteien zum Klimawandel"))
make_vecdb.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import TextLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ import os
6
+ import random
7
+ from llm import api_key as SECRET_KEY
8
+ # Path to the folder containing the text files
9
+ folder_path = "./data"
10
+
11
+ # Initialize variables
12
+ documents = []
13
+
14
+ # Load all text files from the folder
15
+ for filename in os.listdir(folder_path):
16
+ if filename.endswith(".txt"):
17
+ file_path = os.path.join(folder_path, filename)
18
+ loader = TextLoader(file_path, encoding="utf-8")
19
+ documents.extend(loader.load())
20
+
21
+ # Split the documents into chunks for better vectorization
22
+ text_splitter = RecursiveCharacterTextSplitter(
23
+ chunk_size=1000, # Size of each chunk
24
+ chunk_overlap=200 # Overlap between chunks
25
+ )
26
+ random.shuffle(documents)
27
+ split_docs = text_splitter.split_documents(documents)
28
+
29
+ # Initialize embeddings (using OpenAIEmbeddings as an example)
30
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=SECRET_KEY) # Ensure your OpenAI API key is set in the environment
31
+
32
+ # Create the FAISS vectorstore
33
+ faiss_vectorstore = FAISS.from_documents(split_docs, embeddings)
34
+
35
+ # Save the FAISS vectorstore to disk
36
+ output_path = "faiss_index"
37
+ faiss_vectorstore.save_local(output_path)
38
+
39
+ print(f"FAISS vector database created and saved to: {output_path}")
model.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ langchain-community
4
+ langchain
5
+ langchain-core
6
+ langchain-google-genai==2.0.7