samiha123 commited on
Commit
dc22afe
·
1 Parent(s): 1571b50

first commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ src/chroma_db/*.sqlite3 filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1,21 +1,36 @@
1
  FROM python:3.9-slim
2
 
3
- WORKDIR /app
4
-
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
8
- software-properties-common \
9
  git \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- COPY requirements.txt ./
13
- COPY src/ ./src/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- RUN pip3 install -r requirements.txt
 
16
 
17
  EXPOSE 8501
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
  FROM python:3.9-slim
2
 
3
+ # --- System setup ---
 
4
  RUN apt-get update && apt-get install -y \
5
  build-essential \
6
  curl \
 
7
  git \
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
+ # Create non-root user with user ID 1000
11
+ RUN useradd -m -u 1000 user
12
+
13
+ # Set ENV vars for common cache locations to avoid permission issues
14
+ ENV HOME=/home/user \
15
+ XDG_CACHE_HOME=/home/user/.cache \
16
+ HF_HOME=/home/user/.cache \
17
+ TRANSFORMERS_CACHE=/home/user/.cache \
18
+ PATH=/home/user/.local/bin:$PATH
19
+
20
+ # Set working directory
21
+ WORKDIR $HOME/app
22
+
23
+ # Copy files with correct ownership
24
+ COPY --chown=user:user requirements.txt ./requirements.txt
25
+ RUN pip install --no-cache-dir -r requirements.txt
26
+
27
+ COPY --chown=user:user ./src ./src
28
 
29
+ # Use non-root user from now on
30
+ USER user
31
 
32
  EXPOSE 8501
33
 
34
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
35
 
36
+ ENTRYPOINT ["streamlit", "run", "src/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: Chatbot
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
src/Reranker.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.output_parsers import JsonOutputParser
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from pydantic import BaseModel, Field
4
+ from typing import List
5
+ from langchain_groq import ChatGroq
6
+
7
+ llm = ChatGroq(
8
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
9
+ temperature=0,
10
+ max_tokens=None,
11
+ timeout=None,
12
+ max_retries=2,
13
+
14
+ )
15
+
16
+ class RerankResult(BaseModel):
17
+ top_indexes: List[int] = Field(..., description="A releváns dokumentumok indexei")
18
+ message: str = Field(..., description="A felhasználónak megjelenítendő üzenet")
19
+
20
+ class Reranker:
21
+ def __init__(self, model):
22
+ self.llm = model
23
+ self.parser = self.llm.with_structured_output(RerankResult)
24
+
25
+ def rerank(self, query: str, docs: List) -> RerankResult:
26
+ context = "\n\n".join(
27
+ [f"[{i}] {doc[0]} (score={doc[1]:.3f})" for i, doc in enumerate(docs)]
28
+ )
29
+
30
+ prompt = ChatPromptTemplate.from_template("""
31
+ Egy intelligens asszisztens vagy, akinek feladata a dokumentumok szűrése azok relevanciája alapján egy kérdés megválaszolásához.
32
+ Az alábbi a felhasználó kérdése: "{query}"
33
+ És itt vannak a lekért dokumentumok:
34
+ {context}
35
+ Feladatod:
36
+ 1. Csak azokat a dokumentumokat válaszd ki, amelyek valóban relevánsak (a többit hagyd figyelmen kívül).
37
+ 2. Csak a releváns dokumentumok indexeit add vissza (ugyanabban a sorrendben, ahogy a listában szerepelnek).
38
+ 3. Ha egyik dokumentum sem releváns, akkor a "top_indexes" értéke legyen egy üres lista, és az üzenet: "Kérjük, pontosítsa kérdését."
39
+ ⚠️ Nagyon fontos: A válaszod **szigorúan érvényes JSON formátumú legyen** (se előtte, se utána ne legyen szöveg), az alábbi szabályokkal:
40
+ - A kulcsok legyenek dupla idézőjelek között (`"`)
41
+ - A logikai értékek kisbetűsek legyenek (`true`/`false`)
42
+ - Ne használj sortörést a JSON-on kívül
43
+ Íme a várt kimeneti formátum egy példája:
44
+ {{
45
+ "top_indexes": [0, 2],
46
+ "message": ""
47
+ }}
48
+ """)
49
+
50
+
51
+ formatted_prompt = prompt.format_prompt(query=query, context=context)
52
+ result = self.parser.invoke(formatted_prompt.to_messages())
53
+
54
+ return result
src/__pycache__/Reranker.cpython-310.pyc ADDED
Binary file (3.01 kB). View file
 
src/__pycache__/chroma_storage.cpython-310.pyc ADDED
Binary file (1.8 kB). View file
 
src/__pycache__/constant.cpython-310.pyc ADDED
Binary file (380 Bytes). View file
 
src/__pycache__/generation.cpython-310.pyc ADDED
Binary file (3.51 kB). View file
 
src/__pycache__/hybrid_retrieval.cpython-310.pyc ADDED
Binary file (1.7 kB). View file
 
src/__pycache__/retrieval.cpython-310.pyc ADDED
Binary file (1.34 kB). View file
 
src/app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from chromadb import Client as ChromaClient
4
+ from chroma_storage import ChromaStorage
5
+ from retrieval import Retriever
6
+
7
+ from generation import RAGGenerator
8
+ from langchain_community.vectorstores import Chroma
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+ from constant import categorie
11
+ from dotenv import load_dotenv
12
+
13
+ import chromadb
14
+ from Reranker import Reranker
15
+
16
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
17
+ load_dotenv()
18
+ import os
19
+
20
+
21
+
22
+
23
+ WANDB_PROJECT = "rag"
24
+ CHROMA_DIR = "src/chroma_db"
25
+ COLLECTION_NAME = "my_collection"
26
+
27
+ st.markdown("""
28
+ <style>
29
+ body {
30
+ font-family: 'Segoe UI', Roboto, sans-serif;
31
+ }
32
+ .header {
33
+ background-color: #4A90E2;
34
+ color: white;
35
+ padding: 10px;
36
+ display: flex;
37
+ align-items: center;
38
+ box-shadow: 0 2px 4px rgba(0,0,0,0.2);
39
+ }
40
+ .header img {
41
+ height: 40px;
42
+ margin-right: 10px;
43
+ }
44
+ .header h1 {
45
+ margin: 0;
46
+ font-size: 24px;
47
+ }
48
+ .chat-container {
49
+ padding: 10px;
50
+ }
51
+ .bot-message, .user-message {
52
+ display: flex;
53
+ align-items: flex-start;
54
+ margin: 8px 0;
55
+ }
56
+ .bot-message .bubble {
57
+ margin-left: 10px;
58
+ background-color: #f0f0f0;
59
+ color: #000;
60
+ }
61
+ .user-message {
62
+ justify-content: flex-end;
63
+ }
64
+ .user-message .bubble {
65
+ margin-right: 10px;
66
+ background-color: #4A90E2;
67
+ color: #fff;
68
+ }
69
+ .bubble {
70
+ border-radius: 8px;
71
+ padding: 10px;
72
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
73
+ max-width: 70%;
74
+ }
75
+ .avatar {
76
+ width: 40px;
77
+ height: 40px;
78
+ border-radius: 50%;
79
+ }
80
+ @media (max-width: 600px) {
81
+ .header h1 {
82
+ font-size: 18px;
83
+ }
84
+ .bubble {
85
+ max-width: 90%;
86
+ }
87
+ }
88
+ </style>
89
+ """, unsafe_allow_html=True)
90
+
91
+ # --- En-tête bleu avec logo et titre ---
92
+ st.markdown(f'''
93
+ <div class="header">
94
+ <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwww.aplicasoft.com%2Fimages%2Flogos%2F4d_logo.gif&f=1" alt="Logo 4D">
95
+ <h1>4D Assistant - En ligne</h1>
96
+ </div>
97
+ ''', unsafe_allow_html=True)
98
+
99
+
100
+ if "categories" not in st.session_state:
101
+ st.session_state.categories = categorie
102
+ if "messages" not in st.session_state:
103
+ st.session_state.messages = []
104
+ if "step" not in st.session_state:
105
+ st.session_state.step = 0
106
+ if "selected_category" not in st.session_state:
107
+ st.session_state.selected_category = None
108
+
109
+
110
+
111
+ # --- Affichage de la sélection de catégorie centrée ---
112
+ if st.session_state.selected_category is None:
113
+ st.markdown(
114
+ """
115
+ <div style="display: flex; justify-content: center; align-items: center; height: 60vh;">
116
+ <div>
117
+ <h2 style="text-align:center;">Choisissez une catégorie</h2>
118
+ </div>
119
+ </div>
120
+ """,
121
+ unsafe_allow_html=True,
122
+ )
123
+ selected = st.selectbox(
124
+ "", st.session_state.categories, key="category_select", index=0
125
+ )
126
+ if st.button("Valider la catégorie"):
127
+ st.session_state.selected_category = selected
128
+ st.rerun()
129
+ st.stop()
130
+
131
+ # --- Affichage du chat uniquement après sélection ---
132
+ sel = st.session_state.selected_category
133
+
134
+
135
+
136
+ # Ajouter le message initial du bot à l'étape 1
137
+ if st.session_state.step == 0:
138
+ st.session_state.messages.append({
139
+ "role": "assistant",
140
+ "content": "je suis votre assistant 4D"
141
+ })
142
+ # save_conversation_to_mongo(st.session_state.messages)
143
+ st.session_state.step = 1
144
+
145
+
146
+
147
+ # ─────────────────────────────────────────────────────────────────────────────
148
+ # 2) INITIALISATION UNIQUE (cache via session_state)
149
+ # ─────────────────────────────────────────────────────────────────────────────
150
+ if "initialized" not in st.session_state:
151
+ st.session_state.initialized = True
152
+ retriever = Retriever(
153
+ chroma_dir=CHROMA_DIR,
154
+ collection_name=COLLECTION_NAME
155
+
156
+ )
157
+
158
+
159
+
160
+ generator = RAGGenerator()
161
+ reranker = Reranker()
162
+ st.session_state.retriever = retriever
163
+ st.session_state.generator = generator
164
+ st.session_state.reranker = reranker
165
+ # st.session_state.categories = categorie
166
+
167
+
168
+ # ─────────────────────────────────────────────────────────────────────────────
169
+ # 3) UI & État de chat
170
+ # ─────────────────────────────────────────────────────────────────────────────
171
+ query = st.chat_input("chat_query")
172
+ #if "last_rerank" in st.session_state:
173
+ # st.code(f"Rerank result: {st.session_state.last_rerank}")
174
+
175
+ # Si l'utilisateur a saisi une requête
176
+ if query:
177
+ # Si on est en phase de clarification
178
+ if "qa_pairs" in st.session_state and "clarif_idx" in st.session_state:
179
+ idx = st.session_state.clarif_idx
180
+ st.session_state.messages.append({"role": "user", "content": query})
181
+ # save_conversation_to_mongo(st.session_state.messages)
182
+ st.session_state.qa_pairs[idx]["response"] = query
183
+ st.session_state.clarif_idx += 1
184
+ else:
185
+ # Requête initiale normale
186
+ st.session_state.messages.append({"role": "user", "content": query})
187
+ # save_conversation_to_mongo(st.session_state.messages)
188
+ try:
189
+ # 1. Récupération des documents initiaux
190
+ # with st.spinner():
191
+ docs = st.session_state.retriever.retrieve(query)
192
+
193
+
194
+ passages = [(doc_tuple[0].page_content, doc_tuple[1]) for doc_tuple in docs]
195
+
196
+
197
+ # st.code(passages)
198
+
199
+ # 2. Reranking des documents pour améliorer la pertinence
200
+ if docs:
201
+ # with st.spinner():
202
+ rerank_result = st.session_state.reranker.rerank(query, passages)
203
+ st.session_state.last_rerank = rerank_result.model_dump()
204
+
205
+ #st.code(rerank_result)
206
+
207
+
208
+
209
+ # Vérification si un feedback est requis
210
+ if rerank_result.feedback_required:
211
+ st.session_state.messages.append({
212
+ "role": "assistant",
213
+ "content": rerank_result.message
214
+ })
215
+ #save_conversation_to_mongo(st.session_state.messages)
216
+ st.rerun()
217
+ # Utiliser st.rerun() au lieu de continue
218
+
219
+ # Filtrage et réorganisation des documents selon le reranking
220
+ if rerank_result.top_indexes:
221
+ reranked_docs = [docs[i] for i in rerank_result.top_indexes if i < len(docs)]
222
+
223
+ docs = reranked_docs
224
+
225
+ # 3. Génération de la réponse avec les documents reranked
226
+ if docs: # Vérifier que nous avons des documents avant de continuer
227
+ #with st.spinner():
228
+ qa_pairs,solution, lang = st.session_state.generator.retrieve_qa(
229
+ query, category=sel, docs_scores=docs
230
+ )
231
+
232
+ st.session_state.qa_pairs = qa_pairs
233
+ st.session_state.solution = solution
234
+ st.session_state.lang = lang
235
+ else:
236
+ # Aucun document disponible
237
+ st.session_state.messages.append({
238
+ "role": "assistant",
239
+ "content": "Aucun document pertinent trouvé pour votre requête. Pouvez-vous la reformuler ?"
240
+ })
241
+ #save_conversation_to_mongo(st.session_state.messages)
242
+
243
+ except ValueError as e:
244
+ st.session_state.messages.append({
245
+ "role": "assistant",
246
+ "content": str(e)
247
+ })
248
+ # save_conversation_to_mongo(st.session_state.messages)
249
+ except Exception as e:
250
+ st.error(f"❌ Erreur : {e}")
251
+
252
+ # ───────────────────────────────────────────────
253
+ # Gestion des clarifications
254
+ # ───────────────────────────────────────────────
255
+ if "qa_pairs" in st.session_state:
256
+ if "clarif_shown" not in st.session_state:
257
+ questions_text = "\n".join([
258
+ f"{i+1}. {pair['question']}" for i, pair in enumerate(st.session_state.qa_pairs)
259
+ ])
260
+ st.session_state.messages.append({
261
+ "role": "assistant",
262
+ "content": questions_text
263
+ })
264
+ # save_conversation_to_mongo(st.session_state.messages)
265
+ st.session_state.clarif_shown = True
266
+
267
+ if "clarif_idx" not in st.session_state:
268
+ st.session_state.clarif_idx = 0
269
+
270
+ # Lorsque toutes les clarifications sont remplies, générer la réponse finale
271
+ if st.session_state.clarif_idx >= len(st.session_state.qa_pairs):
272
+ final = st.session_state.generator.generate_answer(
273
+ st.session_state.messages,
274
+ st.session_state.solution,
275
+ st.session_state.lang
276
+ )
277
+ st.session_state.messages.append({"role": "assistant", "content": final})
278
+ # save_conversation_to_mongo(st.session_state.messages)
279
+ for key in ["qa_pairs", "solution", "lang", "clarif_idx", "clarif_shown"]:
280
+ st.session_state.pop(key, None)
281
+
282
+ # --- Affichage de l'historique complet des messages ---
283
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
284
+ for msg in st.session_state.messages:
285
+ if not msg.get("content"):
286
+ continue
287
+ if msg["role"] == "assistant":
288
+ st.markdown(f'''
289
+ <div class="bot-message">
290
+ <img src="https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fwww.aplicasoft.com%2Fimages%2Flogos%2F4d_logo.gif&f=1" class="avatar">
291
+ <div class="bubble">{msg["content"]}</div>
292
+ </div>
293
+ ''', unsafe_allow_html=True)
294
+ else:
295
+ st.markdown(f'''
296
+ <div class="user-message">
297
+ <div class="bubble">{msg["content"]}</div>
298
+ <img src="https://img.icons8.com/ios-filled/50/4A90E2/user-male-circle.png" class="avatar">
299
+ </div>
300
+ ''', unsafe_allow_html=True)
301
+ st.markdown('</div>', unsafe_allow_html=True)
302
+
303
+
src/chroma_db/7fb02c85-346a-45e4-92fd-7b7e16fdc75d-20250627T191057Z-1-001.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c1b16f6503fdcc91139b8f561ea5edb0f25f78792eef4a4494bbbda268491e
3
+ size 18001
src/chroma_db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafcd27d44df3c09c89f40192f451c2eb38919e401aeaf0d9a2a055a165bb520
3
+ size 4546560
src/chroma_storage.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chromadb
3
+ from chromadb.utils import embedding_functions
4
+
5
+ class ChromaStorage:
6
+ def __init__(self, db_path: str = './src/chroma_db', collection_name: str = 'my_collection'):
7
+
8
+ self.client = chromadb.PersistentClient(path=db_path)
9
+ self.collection = self._get_or_create_collection(collection_name)
10
+
11
+ def _get_or_create_collection(self, name: str):
12
+ try:
13
+ return self.client.get_collection(
14
+ name=name
15
+ )
16
+ except Exception:
17
+ return self.client.create_collection(
18
+ name=name,
19
+ embedding_function=self.openai_ef
20
+ )
21
+
22
+ def add_batch(self, documents: list, metadatas: list, ids: list, batch_size: int = 200):
23
+ for i in range(0, len(documents), batch_size):
24
+ docs = documents[i:i + batch_size]
25
+ metas = metadatas[i:i + batch_size]
26
+ batch_ids = ids[i:i + batch_size]
27
+ self.collection.add(documents=docs, metadatas=metas, ids=batch_ids)
28
+
29
+ def query(self, query_text: str, k: int = 1) -> list:
30
+ return self.collection.query(query_texts=[query_text], n_results=k)
31
+
32
+ def delete_all(self):
33
+ self.client.reset()
src/constant.py ADDED
@@ -0,0 +1 @@
 
 
1
+ categorie =["Activation", "Certification","Connectivity", "Corruption", "Deploiement", "Developement", "Dump", "Error message", "Instability","Installation","Mauvais fonctionnement", "Network", "Performances", "Qodly", "Qodly Studio", "Sauvegarde", "TAOW Issue", "Usability"]
src/generation.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain.schema import AIMessage, HumanMessage, SystemMessage
3
+ import json
4
+ from langchain.schema import SystemMessage, HumanMessage
5
+ from langchain_groq import ChatGroq
6
+ class RAGGenerator:
7
+
8
+
9
+
10
+
11
+ def generate_answer( messages: list, solution: str, lang: str) -> str:
12
+
13
+ consigne = (
14
+ "Te egy műszaki asszisztens vagy. A megadott végleges megoldás alapján "
15
+ "csak azokat a releváns elemeket emeld ki, amelyek válaszolnak a felhasználó kérdésére. "
16
+ "Ezután fogalmazz meg egy tiszta, tömör, szigorúan tényszerű, pontos és részletes választ. "
17
+ "NE tartalmazzon SEMMILYEN személyesítést (sem nevet, sem keresztnév, sem köszönést, sem köszönetet, sem aláírást). "
18
+ "NE tartalmazzon SEMMILYEN kérést fájlok, külső objektumok, képernyőképek küldésére "
19
+ "vagy bármilyen más, külső átvitelhez szükséges segítségnyújtást."
20
+ )
21
+
22
+
23
+ langue_instruction = {
24
+ 'fr': 'Réponds en français.',
25
+ 'en': 'Respond in English.',
26
+ 'es': 'Responde en español.',
27
+ 'de': 'Antworte auf Deutsch.',
28
+ 'zh': '请用中文回答。'
29
+ }
30
+
31
+ sys_msg = consigne + ' ' + langue_instruction.get(lang, '')
32
+
33
+ content = [SystemMessage(content = f"A következő megoldás felhasználásával készítsd el az átfogalmazást: Végleges megoldás:\n{solution}")]
34
+
35
+ chat_history = [SystemMessage(content=sys_msg)] + [
36
+ AIMessage(content=msg["content"]) if msg["role"] == "assistant" else HumanMessage(content=msg["content"])
37
+ for msg in messages
38
+ ] + content
39
+
40
+ chat = ChatGroq(
41
+ model_name="meta-llama/llama-4-scout-17b-16e-instruct",
42
+ temperature=0.2
43
+ )
44
+
45
+ reply = chat.invoke(chat_history)
46
+
47
+ return reply.content
48
+
49
+
50
+ def retrieve_qa( query: str, docs_scores: list = []) -> tuple:
51
+ if not docs_scores:
52
+ raise LookupError("Nem található dokumentum a lekérdezéshez.")
53
+
54
+ chat = ChatGroq(
55
+ model_name="LLaMA3-8b-8192",
56
+ temperature=0
57
+ )
58
+ try:
59
+ lang_detect_messages = [
60
+ SystemMessage(content="Határozd meg a következő mondat nyelvét. Csak az ISO 639-1 nyelvkódot válaszold meg (pl. 'fr', 'en', 'es' stb.):"),
61
+
62
+ HumanMessage(content=query)
63
+ ]
64
+ lang_response = chat.invoke(lang_detect_messages)
65
+ lang = lang_response.content.strip().lower()
66
+ except Exception:
67
+ lang = "en"
68
+
69
+ first_doc, _ = docs_scores[0]
70
+ try:
71
+ qa_pairs = json.loads(first_doc.metadata.get('qa_pairs', '[]'))
72
+ qa_pairs = [q for q in qa_pairs if q.get('question')]
73
+ except json.JSONDecodeError:
74
+ raise ValueError("Érvénytelen a 'qa_pairs' metaadat.")
75
+
76
+
77
+ if lang and qa_pairs:
78
+ for pair in qa_pairs:
79
+ original_question = pair.get("question", "")
80
+ if original_question:
81
+ try:
82
+ messages = [
83
+ SystemMessage(content=f"Fordítsd le a következő kérdést {lang} nyelvre, csak a lefordított mondatot add meg:"),
84
+
85
+ HumanMessage(content=original_question)
86
+ ]
87
+ response = chat.invoke(messages)
88
+ pair["question"] = response.content.strip()
89
+ except Exception:
90
+ pass
91
+
92
+ solutions = []
93
+ for doc, _ in docs_scores:
94
+ sol = doc.metadata.get('solution_finale', '')
95
+ if sol:
96
+ solutions.append(sol)
97
+ full_solution = "\n\n".join(solutions)
98
+
99
+ return qa_pairs, full_solution, lang
src/retrieval.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # hybrid_retrieval.py
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain_core.documents import Document
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ import os
7
+
8
+ print(os.listdir())
9
+ class Retriever:
10
+
11
+ def __init__(self,
12
+ chroma_dir: str = 'src/chroma_db',
13
+ collection_name: str = 'my_collection'
14
+
15
+ ):
16
+
17
+
18
+ self.vectorstore = Chroma(
19
+ collection_name=collection_name,
20
+ persist_directory=chroma_dir
21
+
22
+ )
23
+
24
+ def retrieve(self, query: str):
25
+ """
26
+ Récupère les documents via le retriever Embedding.
27
+ """
28
+ return self.vectorstore.similarity_search_with_score(query,k=3)
29
+
30
+
31
+ def add_document(self, doc: str, metadata: dict, id: str):
32
+ self.vectorstore.collection.add(documents=[doc], metadatas=[metadata], ids=[id])
src/style.css ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Conteneur principal du chat */
2
+ .chat-container {
3
+ position: fixed;
4
+ bottom: 20px;
5
+ right: 20px;
6
+ width: 360px;
7
+ max-height: 600px;
8
+ display: flex;
9
+ flex-direction: column;
10
+ box-shadow: 0 4px 20px rgba(0,0,0,0.2);
11
+ border-radius: 1rem;
12
+ overflow: hidden;
13
+ background: #ffffff;
14
+ font-family: sans-serif;
15
+ z-index: 1000;
16
+ }
17
+
18
+ /* En‑tête en forme de vague */
19
+ .chat-header {
20
+ position: relative;
21
+ height: 120px;
22
+ background: #333835;
23
+ }
24
+ .chat-header svg {
25
+ position: absolute;
26
+ bottom: 0;
27
+ width: 100%;
28
+ height: auto;
29
+ display: block;
30
+ }
31
+
32
+ /* Corps du chat */
33
+ .chat-body {
34
+ flex: 1;
35
+ padding: 0.5rem;
36
+ overflow-y: auto;
37
+ display: flex;
38
+ flex-direction: column;
39
+ }
40
+
41
+ /* Bulles de message */
42
+ .user-bubble,
43
+ .assistant-bubble {
44
+ padding: 0.5rem 1rem;
45
+ border-radius: 0.75rem;
46
+ margin: 0.25rem 0;
47
+ max-width: 80%;
48
+ word-wrap: break-word;
49
+ line-height: 1.4;
50
+ }
51
+ .user-bubble {
52
+ background: #f1f0f0;
53
+ align-self: flex-end;
54
+ }
55
+ .assistant-bubble {
56
+ background: #e0f7fa;
57
+ align-self: flex-start;
58
+ }
59
+
60
+ /* Zone de saisie */
61
+ .chat-input-container {
62
+ padding: 0.5rem;
63
+ border-top: 1px solid #ececec;
64
+ }
65
+ .chat-input {
66
+ width: 100%;
67
+ padding: 0.5rem;
68
+ border: 1px solid #d1d1d1;
69
+ border-radius: 0.5rem;
70
+ outline: none;
71
+ font-size: 1rem;
72
+ }
73
+
74
+ /* Bouton flottant */
75
+ .chat-toggle {
76
+ position: fixed;
77
+ bottom: 20px;
78
+ right: 20px;
79
+ background: linear-gradient(135deg, #6e8efb, #a777e3);
80
+ color: #fff;
81
+ padding: 0.75rem 1rem;
82
+ border: none;
83
+ border-radius: 999px;
84
+ cursor: pointer;
85
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3);
86
+ z-index: 1001;
87
+ }
88
+