ionosphere commited on
Commit
60a5284
·
1 Parent(s): 0bad806
Files changed (5) hide show
  1. .gitignore +0 -1
  2. .gradio/certificate.pem +31 -0
  3. README.md +2 -2
  4. app.py +28 -23
  5. requirements.txt +9 -7
.gitignore CHANGED
@@ -1,4 +1,3 @@
1
- chroma_db/*
2
  __pycache__/*
3
  .venv
4
  .env
 
 
1
  __pycache__/*
2
  .venv
3
  .env
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: PDF Chatbot
3
  emoji: 👁
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.6.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
1
  ---
2
+ title: GAIA Chatbot - level 3
3
  emoji: 👁
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.13.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -1,7 +1,12 @@
1
  import os
2
- __import__('pysqlite3')
3
- import sys
4
- sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 
 
 
 
 
5
  from dotenv import load_dotenv
6
  import json
7
  import gradio as gr
@@ -20,11 +25,12 @@ from llama_index.vector_stores.chroma import ChromaVectorStore
20
 
21
  load_dotenv()
22
 
23
- title = "AgreenDefi Gaia 8x22b PDF Demo"
24
  description = "Example of an assistant with Gradio, RAG from PDF documents and Mistral AI via its API"
25
  placeholder = (
26
  "Vous pouvez me posez une question sur ce contexte, appuyer sur Entrée pour valider"
27
  )
 
28
  llm_model = "open-mixtral-8x22b"
29
 
30
  env_api_key = os.environ.get("MISTRAL_API_KEY")
@@ -82,36 +88,37 @@ def load_file(files):
82
  for doc in documents:
83
  index.insert(doc)
84
 
85
- return (
86
  gr.Textbox(visible=False),
87
  gr.Textbox(value=f"Document encoded ! You can ask questions", visible=True),
88
  get_documents_in_db(),
89
  )
90
 
91
 
92
- def load_document(input_file):
93
- file_name = input_file.name.split("/")[-1]
94
- return gr.Textbox(value=f"Document loaded: {file_name}", visible=True)
 
95
 
96
 
97
  with gr.Blocks() as demo:
98
  gr.Markdown(
99
- """ # Bienvenue sur la démo AgreenDefi PDF
100
 
101
- Ajouter un fichier avant de poser une question sur le tchat.
102
- Cette démo vous permet d'interagir entre des fichiers PDF et Mistral AI via son API.
103
- Mistral va répondre à vos questions par rapport au document.
104
 
105
  *The files will stay in the database unless there is 48h of inactivty or you re-build the space.*
106
  """
107
  )
108
 
109
- gr.Markdown(""" ### 1 / Préparer les données """)
110
 
111
  with gr.Row():
112
  with gr.Column():
113
  input_file = gr.File(
114
- label="Charger des fichiers pdf",
115
  file_types=[".pdf"],
116
  file_count="multiple",
117
  type="filepath",
@@ -123,33 +130,31 @@ with gr.Blocks() as demo:
123
 
124
  input_file.upload(
125
  fn=load_document,
126
- inputs=[
127
- input_file,
128
- ],
129
  outputs=[file_msg],
130
  concurrency_limit=20,
131
  )
132
 
133
  help_msg = gr.Markdown(
134
- value="Quan le document est chargé, Appuyer sur Encode pour l'ajouter dans la base de données."
135
  )
136
 
137
- file_btn = gr.Button(value="Encoder les fichiers ✅", interactive=True)
138
  btn_msg = gr.Textbox(container=False, visible=False)
139
 
140
  with gr.Row():
141
  db_list = gr.Markdown(value=get_documents_in_db)
142
- delete_btn = gr.Button(value="Vider la base 🗑️", interactive=True, scale=0)
143
 
144
  file_btn.click(
145
  load_file,
146
- inputs=[input_file],
147
  outputs=[file_msg, btn_msg, db_list],
148
  show_progress="full",
149
  )
150
  delete_btn.click(empty_db, outputs=[db_list], show_progress="minimal")
151
 
152
- gr.Markdown(""" ### 2 / Poser une question selon le contexte """)
153
 
154
  chatbot = gr.Chatbot()
155
  msg = gr.Textbox(placeholder=placeholder)
@@ -164,4 +169,4 @@ with gr.Blocks() as demo:
164
 
165
  demo.title = title
166
 
167
- demo.launch()
 
1
  import os
2
+ # Try to import pysqlite3 for environments like Hugging Face Spaces
3
+ try:
4
+ __import__('pysqlite3')
5
+ import sys
6
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
7
+ except ImportError:
8
+ # pysqlite3 not available, use standard sqlite3 (works on most environments)
9
+ pass
10
  from dotenv import load_dotenv
11
  import json
12
  import gradio as gr
 
25
 
26
  load_dotenv()
27
 
28
+ title = "Gaia Mistral 8x22b Chat RAG PDF Demo"
29
  description = "Example of an assistant with Gradio, RAG from PDF documents and Mistral AI via its API"
30
  placeholder = (
31
  "Vous pouvez me posez une question sur ce contexte, appuyer sur Entrée pour valider"
32
  )
33
+ placeholder_url = "Extract text from this url"
34
  llm_model = "open-mixtral-8x22b"
35
 
36
  env_api_key = os.environ.get("MISTRAL_API_KEY")
 
88
  for doc in documents:
89
  index.insert(doc)
90
 
91
+ return (
92
  gr.Textbox(visible=False),
93
  gr.Textbox(value=f"Document encoded ! You can ask questions", visible=True),
94
  get_documents_in_db(),
95
  )
96
 
97
 
98
+ def load_document(input_files):
99
+ for input_file in input_files:
100
+ file_name = input_file.name.split("/")[-1]
101
+ return gr.Textbox(value=f"Document loaded: {file_name}", visible=True)
102
 
103
 
104
  with gr.Blocks() as demo:
105
  gr.Markdown(
106
+ """ # Welcome to Gaia Level 3 Demo
107
 
108
+ Add a file before interacting with the Chat.
109
+ This demo allows you to interact with a pdf file and then ask questions to Mistral APIs.
110
+ Mistral will answer with the context extracted from your uploaded file.
111
 
112
  *The files will stay in the database unless there is 48h of inactivty or you re-build the space.*
113
  """
114
  )
115
 
116
+ gr.Markdown(""" ### 1 / Extract data from PDF """)
117
 
118
  with gr.Row():
119
  with gr.Column():
120
  input_file = gr.File(
121
+ label="Load a pdf",
122
  file_types=[".pdf"],
123
  file_count="multiple",
124
  type="filepath",
 
130
 
131
  input_file.upload(
132
  fn=load_document,
133
+ inputs=input_file,
 
 
134
  outputs=[file_msg],
135
  concurrency_limit=20,
136
  )
137
 
138
  help_msg = gr.Markdown(
139
+ value="Once the document is loaded, press the Encode button below to add it to the db."
140
  )
141
 
142
+ file_btn = gr.Button(value="Encode file ✅", interactive=True)
143
  btn_msg = gr.Textbox(container=False, visible=False)
144
 
145
  with gr.Row():
146
  db_list = gr.Markdown(value=get_documents_in_db)
147
+ delete_btn = gr.Button(value="Empty db 🗑️", interactive=True, scale=0)
148
 
149
  file_btn.click(
150
  load_file,
151
+ inputs=input_file,
152
  outputs=[file_msg, btn_msg, db_list],
153
  show_progress="full",
154
  )
155
  delete_btn.click(empty_db, outputs=[db_list], show_progress="minimal")
156
 
157
+ gr.Markdown(""" ### 2 / Ask a question about this context """)
158
 
159
  chatbot = gr.Chatbot()
160
  msg = gr.Textbox(placeholder=placeholder)
 
169
 
170
  demo.title = title
171
 
172
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,10 +1,12 @@
1
- pypdf
2
- mistralai
3
- gradio
4
- pysqlite3-binary
5
- chromadb
6
- llama-index
7
- llama-index-readers-web
 
 
8
  llama-index-readers-file
9
  llama-index-llms-mistralai
10
  llama-index-embeddings-mistralai
 
1
+ pypdf==4.3.1
2
+ mistralai==1.0.1
3
+ gradio==4.44.0
4
+ python-dotenv==1.0.1
5
+ huggingface-hub==0.36.0
6
+ # sqlite3
7
+ # pysqlite3-binary
8
+ chromadb==0.5.5
9
+ llama-index==0.10.68
10
  llama-index-readers-file
11
  llama-index-llms-mistralai
12
  llama-index-embeddings-mistralai