jdmorzan commited on
Commit
9edcf94
·
verified ·
1 Parent(s): c40cecd

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ia_pediatrica_pdf/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Ia Pediatrica Demo
3
- emoji: 🦀
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.33.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Explorador de carreras
3
+ emoji: 💬
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.23.3
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import queue
3
+ import time
4
+
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langsmith import traceable
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain.vectorstores import Chroma
11
+ from langchain.prompts import ChatPromptTemplate
12
+ from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
13
+ from langchain.callbacks.base import BaseCallbackHandler
14
+ import gradio as gr
15
+ import os
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+
21
+ # --------------------------
22
+ # Custom Streaming Callback Handler that uses a queue.
23
+ # --------------------------
24
+ class CustomStreamingCallbackHandler(BaseCallbackHandler):
25
+ def __init__(self):
26
+ self.token_queue = queue.Queue()
27
+
28
+ def on_llm_new_token(self, token: str, **kwargs):
29
+ # Simply put the new token in the queue.
30
+ self.token_queue.put(token)
31
+
32
+ # Instantiate the custom streaming callback (do not pass the chatbot here).
33
+ stream_handler = CustomStreamingCallbackHandler()
34
+
35
+ # --------------------------
36
+ # Setup vector database and embeddings
37
+ # --------------------------
38
+ embeddings = OpenAIEmbeddings()
39
+ vectordb = Chroma(
40
+ persist_directory="./ai_vocacional_2506",
41
+ embedding_function=embeddings
42
+ )
43
+
44
+ # --------------------------
45
+ # Define prompt templates
46
+ # --------------------------
47
+ general_system_template = r"""
48
+ Eres el explorador de carreras. Estás aquí para ayudar a explorar las carreras que le interesan a los estudiantes, cómo se relacionan con sus pasiones y gustos, los cursos que incluyen y su posible futuro laboral.
49
+ Eres orientador de estudiantes de 15 a 17 años de colegio que no saben sobre las carreras. Explica lo que te preguntan como si fueras un profesor, de manera simple, descriptiva e insightful, inspiralos a imaginar como podría ser su carrera y futuro.
50
+ Proporciona mensajes breves y concisos con bullets y negritas.
51
+ Toma los siguientes documentos de contexto {context} y responde únicamente basado en este contexto, no inventes.
52
+ No menciones ningún código de curso al mencionar sus nombres (ejemplo: [PS1223])
53
+ Si te preguntan por universidades, responde elegantemente que deben de haber muy buenas opciones pero la que conoces a profundidad y destaca es la UPC (Universidad Peruana de Ciencias Aplicadas), continua respondiendo a la pregunta que te hagan sobre universidades, respondiendo a la pregunta que te hagan y detallando por qué UPC destaca.
54
+ Menciona a la UPC únicamente si te preguntan por universidades o por la UPC.
55
+ """
56
+
57
+ text_init = '''¡Hola! Soy el Explorador de Carreras.
58
+
59
+ Sé que elegir una carrera puede ser un gran desafío, pero estoy aquí para ayudarte a descubrir el camino que mejor se ajuste a ti. Juntos podemos explorar todas las opciones que te interesan.
60
+
61
+ ¿Te gustaría saber...?
62
+ ¿Cómo son los cursos de las carreras que te interesan?
63
+ ¿Cómo es la vida diaria de un estudiante universitario?
64
+
65
+ No dudes en preguntarme lo que quieras. ¡Exploremos juntos todas las posibilidades que el mundo profesional tiene para ti!
66
+ '''
67
+
68
+
69
+ general_user_template = "Pregunta:```{question}```"
70
+ messages = [
71
+ SystemMessagePromptTemplate.from_template(general_system_template),
72
+ HumanMessagePromptTemplate.from_template(general_user_template)
73
+ ]
74
+ qa_prompt = ChatPromptTemplate.from_messages(messages)
75
+
76
+ # --------------------------
77
+ # Create conversation memory
78
+ # --------------------------
79
+ def create_memory():
80
+ return ConversationBufferMemory(memory_key='chat_history', return_messages=True)
81
+
82
+ # --------------------------
83
+ # Define the chain function that uses the LLM to answer queries
84
+ # --------------------------
85
+ def pdf_qa(query, memory, llm):
86
+ chain = ConversationalRetrievalChain.from_llm(
87
+ llm=llm,
88
+ retriever=vectordb.as_retriever(search_kwargs={'k': 28}),
89
+ combine_docs_chain_kwargs={'prompt': qa_prompt},
90
+ memory=memory
91
+ )
92
+ return chain({"question": query})
93
+
94
+ def print_like_dislike(x: gr.LikeData):
95
+ print(x.index, x.value, x.liked)
96
+
97
+ css = '''
98
+ @import url('https://fonts.googleapis.com/css2?family=Public+Sans&display=swap');
99
+ #enviar_button {
100
+ width: 48px;
101
+ height: 48px;
102
+ background-image: url('https://gestorportal-fe-dev2-cib.stage01.link/directus/c65117c0-3e48-4adb-a6a1-21c9387f480b.png'); /* Imagen de ejemplo *
103
+ background-size: contain; /*Asegura que la imagen se ajuste al tamaño del botón */
104
+ background-repeat: no-repeat; /*No repetir la imagen */
105
+ background-position: center;/**/
106
+ background-color: #E50A17;/*#FFFFFF; Fondo rojo */
107
+ transition: background-color 0.3s ease; /* Transición suave al cambiar color */
108
+ border-radius: 50%;
109
+ min-width: min(48px, 100%);
110
+ }
111
+ #markdown_text {
112
+ color: #191919; /* Color del texto (puedes cambiarlo a cualquier color que desees) */
113
+ font-family: 'Public Sans', sans-serif;
114
+ font-size: 16px; /* Tamaño de fuente */
115
+ font-weight: bold; /* Hacer el texto en negrita */
116
+ }
117
+ #chatbot_intput {
118
+ border-radius: 50px; /* Bordes redondeados */
119
+ border: 2px solid #D9D9D9; /* Borde de color verde */
120
+ padding: 0px; /* Espaciado interno */
121
+ min-height: 48px !important;
122
+ box-shadow: 0px 6px 28px -6px rgba(89, 89, 89, 0.20);
123
+ }
124
+ #chatbot_container {
125
+ max-width: 100% !important;
126
+ }
127
+ .gradio-container .fillable {
128
+ max-width: 100% !important;
129
+ padding: 0 !important;
130
+ }
131
+ .gradio-container #chatbot_output {
132
+ border-top: 1px solid #d9d9d9;
133
+ border: 0;
134
+ border-radius: 0;
135
+ border-width: 0 !important;
136
+ border-top: 1px solid #d9d9d9;
137
+ }
138
+ .gradio-container .icon-button-wrapper {
139
+ display: none;
140
+ }
141
+ .gradio-container #component-1 {
142
+ gap: 0 !important;
143
+ }
144
+ .gradio-container #component-4 {
145
+ padding: 0 24px;
146
+ margin-top: 24px;
147
+ }
148
+ .gradio-container #component-2 {
149
+ padding-top: 12px;
150
+ padding-bottom: 12px;
151
+ font-weight: bold;
152
+ padding-left: 24px;
153
+ border-bottom: 1px solid #D9D9D9;
154
+ border-width: 1px !important;
155
+ border-top: 0 !important;
156
+ border-left: 0 !important;
157
+ border-right: 0 !important;
158
+ border-radius: 0 !important;
159
+ }
160
+ .gradio-container #component-2 > span p {
161
+ font-weight: bold;
162
+ }
163
+ .gradio-container footer {
164
+ display: none !important;
165
+ }
166
+ .message-row {
167
+ margin: 20px 24px;
168
+ }
169
+ .message-row .bot {
170
+ padding: 24px;
171
+ margin-top: 0;
172
+ background: #fafafa;
173
+ border: 0;
174
+ border-radius: 4px;
175
+ }
176
+ .message-row .message {
177
+ margin-top: 0;
178
+ }
179
+ .input-container textarea {
180
+ padding: 12px 24px
181
+ }
182
+ '''
183
+
184
+ theme = gr.themes.Default(primary_hue="blue").set(
185
+ loader_color="#8CD7FF",
186
+ slider_color="#8CD7FF",
187
+ )
188
+
189
+ with gr.Blocks(css=css, elem_id="chatbot_container", theme = theme) as demo:
190
+ with gr.Column():
191
+ gr.Markdown("<div id='markdown_text'><strong class='explore_text'>Explorador de carreras</strong></div>")
192
+ chatbot = gr.Chatbot(value=[[None,text_init]], elem_id="chatbot_output", show_label=False)
193
+ #chatbot.like(print_like_dislike, None, None, like_user_message=True)
194
+ with gr.Row():
195
+ msg = gr.Textbox(placeholder="Escribe tu mensaje aquí", label='', elem_id="chatbot_intput", scale=15, container=False)
196
+ submit = gr.Button(" ", elem_id="enviar_button", scale=0.1)
197
+
198
+ memory_state = gr.State(create_memory)
199
+
200
+ # Create the ChatOpenAI model with streaming enabled and our custom callback.
201
+ llm = ChatOpenAI(
202
+ temperature=0,
203
+ model_name='gpt-4o',
204
+ streaming=True,
205
+ callbacks=[stream_handler]
206
+ )
207
+
208
+ # --------------------------
209
+ # Generator function that runs the chain in a separate thread and polls the token queue.
210
+ # --------------------------
211
+ def user(query, chat_history, memory):
212
+ # Append the user's message with an empty bot response.
213
+ chat_history.append((query, ""))
214
+ # Immediately yield an update so the user's message appears.
215
+ yield "", chat_history, memory
216
+
217
+ # Container for the final chain result.
218
+ final_result = [None]
219
+
220
+ # Define a helper function to run the chain.
221
+ def run_chain():
222
+ result = pdf_qa(query, memory, llm)
223
+ final_result[0] = result
224
+ # Signal end-of-stream by putting a sentinel value.
225
+ stream_handler.token_queue.put(None)
226
+
227
+ # Run the chain in a separate thread.
228
+ thread = threading.Thread(target=run_chain)
229
+ thread.start()
230
+
231
+ # Poll the token queue for new tokens and yield updated chat history.
232
+ current_response = ""
233
+ while True:
234
+ try:
235
+ token = stream_handler.token_queue.get(timeout=0.1)
236
+ except queue.Empty:
237
+ token = None
238
+
239
+ # A None token is our signal for end-of-stream.
240
+ if token is None:
241
+ if not thread.is_alive():
242
+ break
243
+ else:
244
+ continue
245
+ current_response += token
246
+ chat_history[-1] = (query, current_response)
247
+ yield "", chat_history, memory
248
+
249
+ thread.join()
250
+ # Optionally, update the final answer if it differs from the streaming tokens.
251
+ if final_result[0] and "answer" in final_result[0]:
252
+ chat_history[-1] = (query, final_result[0]["answer"])
253
+ yield "", chat_history, memory
254
+
255
+ # Wire up the generator function to Gradio components with queue enabled.
256
+ submit.click(user, [msg, chatbot, memory_state], [msg, chatbot, memory_state], queue=True)
257
+ msg.submit(user, [msg, chatbot, memory_state], [msg, chatbot, memory_state], queue=True)
258
+
259
+ if __name__ == "__main__":
260
+ demo.queue().launch()
gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ai_vocacional_v2/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
37
+ ai_vocacional_v3/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
38
+ ai_vocacional_v2_2025/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
ia_pediatrica_pdf/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbde24cf106b84ba5300765083d983bda58bcca1334dde81ebccde5c4a07becd
3
+ size 50880512
ia_pediatrica_pdf/d7b57a2f-a8ba-4f67-9889-ce9055689c2c/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd46d5bac99eba179eb855784b0fd28c18430353b1cf654a9d130a4ffd6ddc6
3
+ size 25136000
ia_pediatrica_pdf/d7b57a2f-a8ba-4f67-9889-ce9055689c2c/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:481140099539d5069fab7df8a2399df9c29f8eb01cde1d25850b00cc3308c8e7
3
+ size 100
ia_pediatrica_pdf/d7b57a2f-a8ba-4f67-9889-ce9055689c2c/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e6be5468e3bfd177ddc113dc92d6d9130455b84def551877fc066a30c296030
3
+ size 230019
ia_pediatrica_pdf/d7b57a2f-a8ba-4f67-9889-ce9055689c2c/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2cd1ab9b923188071d875e8688f87c2a10d409f638d9a23fd579da2dba8607
3
+ size 16000
ia_pediatrica_pdf/d7b57a2f-a8ba-4f67-9889-ce9055689c2c/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ba9669be146fd29d6264b6c98da568ab87fdf60054bbdceadccc5e9c12b9cf9
3
+ size 34156
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-cli
3
+ langchain-community
4
+ langchain-core
5
+ langchain-experimental
6
+ langchain-fireworks
7
+ openai
8
+ gradio
9
+ gradio-client
10
+ chromadb
11
+ langsmith
12
+ tiktoken
13
+ fastapi