Spaces:

DialogueRobust
/

RobustDialogueDemo

Build error

App Files Files Community

DialogueRobust commited on Oct 24, 2025

Commit

e490ab5

1 Parent(s): 292920a

First commit

Browse files

Files changed (12) hide show

.gitattributes +1 -0
README.md +6 -6
backend.py +82 -0
build_index.py +27 -0
config.ini +16 -0
data/demo_prompt.json +3 -0
data/docs.pkl +3 -0
data/documents.json +3 -0
data/index.faiss +3 -0
embeddings.py +12 -0
requirements.txt +5 -0
semantic.py +20 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/* filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
 ---
-title: RobustDialogueDemo
-emoji: 📚
-colorFrom: gray
-colorTo: yellow
 sdk: gradio
 sdk_version: 5.49.1
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: Robust reasoning demo
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Robust Dialogue Demo
+emoji: 🏃
+colorFrom: purple
+colorTo: green
 sdk: gradio
 sdk_version: 5.49.1
 app_file: app.py
 pinned: false
+license: cc
+short_description: 'Domain-Specific Retrieval-Augmented Generation demo '
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

backend.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from semantic import SemanticSearch
+import json
+import argparse
+import os
+from embeddings import get_embedding
+from openai import OpenAI
+import configparser
+class BackEnd:
+    def __init__(self, config):
+        self.model = "gpt-4.1"  #config['ChatBot']['model']
+        self.client = OpenAI()
+        self.semantic_search = SemanticSearch()#config['Semantic Search'])
+        try:
+            with open('data/demo_prompt.json') as json_file:#config['ChatBot']['prompt file']) as json_file:
+                prompts = json.load(json_file)
+        except:
+            print(f"ERROR. Couldn't load prompt file {config['ChatBot']['prompt file']} or wrong json format")
+            quit()
+        lang = 'fr'#config['General']['language'][:2].lower()
+        if lang  == 'fr':
+            self.prompt_template = prompts["French"]
+        elif lang == 'en':
+            self.prompt_template = prompts["English"]
+    def process_query(self, query):
+        query_embeddings = get_embedding(query)
+        context = self.semantic_search.search(query_embeddings)
+        for index, document in enumerate(context):
+            context[index] = 'Document ' + str(index + 1) + '\n\n' + document
+        print('context = ', context)
+        documents = '\n\n'.join(context)
+        prompt = self.prompt_template['system_prompt']
+        demo_prefix = self.prompt_template['demo_prefix'].format(query = query, context = context)
+        prompt += demo_prefix + '\n' + documents + '\n\n'
+        demo_postfix = self.prompt_template['demo_postfix']
+        prompt += demo_postfix
+        if 'gpt' in self.model:
+            response  = self.client.responses.create(
+                model = self.model ,
+                input= prompt)
+        return json.loads(response.output_text), context
+# def main():
+#     parser = argparse.ArgumentParser()
+#     parser.add_argument('--config_file', type=str, required=True, help='File containing the configuration for the backend (in .ini format)')
+#     parser.add_argument('--query', type=str, required=False, help='Test query for testing the system')
+#     args = parser.parse_args()
+#     config = configparser.ConfigParser()
+#     config.read(args.config_file)
+#     backend = BackEnd(config)
+#     if args.query:
+#         response = backend.process_query(args.query)
+#         print(response)
+# if __name__ == '__main__':
+#     main()

build_index.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# build_index.py
+import os
+import json
+import pickle
+import faiss
+import numpy as np
+from embeddings import get_embedding
+DOCS_FILE = "data/documents.json"
+with open(DOCS_FILE, "r") as f:
+    docs = json.load(f)
+# Compute embeddings
+embs = [get_embedding(d["section"]) for d in docs]
+# Create FAISS index
+dim = len(embs[0])
+index = faiss.IndexFlatL2(dim)
+index.add(np.array(embs).astype("float32"))
+# Save outputs for the Space runtime
+faiss.write_index(index, "data/index.faiss")
+with open("data/docs.pkl", "wb") as f:
+    pickle.dump(docs, f)
+print("✅ Index and docs saved to data/")

config.ini ADDED Viewed

	@@ -0,0 +1,16 @@

+[General]
+language: French
+[ChatBot]
+model: gpt-4.1
+temperature: 0.1
+prompt file: demo_prompt.json
+[Semantic Search]
+embedding models: dangvantuan/sentence-camembert-large Lajavaness/sentence-flaubert-base
+database file: Livre_blanc_IA_CRIM.shorter.sections.json
+metric: cosine distance
+text key: section
+topk: 5

data/demo_prompt.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d11c98d850aa30740ac3a39757e0e6216da3d66adda7e8e7f588f2e6dbbaa04a
+size 3593

data/docs.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb5f2382efad046adb66db5012e7f9e46e53a047f08ac9a2cbbbfb7b0ce99691
+size 224530

data/documents.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81b954096e0387165987cbc0acac8570eda33513a95108da04d481fd228b4af2
+size 229378

data/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78b0060c9e9c4d574a03d588c2e843a271f82206950ff2c6c00e3610f30ad891
+size 823341

embeddings.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# embeddings.py
+import os
+from openai import OpenAI
+client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+def get_embedding(text: str, model="text-embedding-3-small"):
+    response = client.embeddings.create(
+        model=model,
+        input=text
+    )
+    return response.data[0].embedding

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+openai>=1.0.0
+faiss-cpu
+numpy
+pickle5

semantic.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import faiss
+import numpy as np
+import pickle
+INDEX_PATH = "data/index.faiss"
+DOCS_PATH = "data/docs.pkl"
+class SemanticSearch:
+    def __init__(self):
+        self.index = faiss.read_index(INDEX_PATH)
+        with open(DOCS_PATH, "rb") as f:
+            self.docs = pickle.load(f)
+    def search(self, query_emb, k=3):
+        D, I = self.index.search(np.array([query_emb]).astype("float32"), k)
+        results = []
+        for idx in I[0]:
+            if idx >= 0:
+                results.append(self.docs[idx]['section'])
+        return results