Spaces:

lozanopastor
/

PDFChat

Sleeping

App Files Files Community

lozanopastor commited on Mar 24, 2025

Commit

7633bea

verified ·

1 Parent(s): b85c2f0

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -52

app.py CHANGED Viewed

@@ -16,21 +16,38 @@ os.getenv("GROQ_API_KEY")
 css_style = """
 <style>
     button {
-        height: 25px; /* Ajusta la altura del botón */
-        width: 80px; /* Ajusta el ancho del botón */
-        font-size: 12px; /* Ajusta el tamaño del texto */
-        background-color: #ccc; /* Color gris */
-        color: black; /* Color del texto */
-        border: none; /* Elimina el borde */
-        border-radius: 5px; /* Esquinas redondeadas */
-        cursor: pointer; /* Cambia el cursor al pasar sobre el botón */
     }
 </style>
 """
 def get_pdf_text(pdf_docs):
-    # Extraemos texto de los archivos cargados
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
@@ -39,19 +56,16 @@ def get_pdf_text(pdf_docs):
     return text
 def get_text_chunks(text):
-    # División del texto en fragmentos
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vector_store(text_chunks):
-    # Creación de almacén de vectores FAISS a partir de los fragmentos
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
     vector_store.save_local("faiss_index")
 def get_conversational_chain():
-    # Especificamos un prompt inicial al modelo
     prompt_template = """
     Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
     el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
@@ -61,7 +75,6 @@ def get_conversational_chain():
     {question}
     Respuesta:
     """
-    # Implementamos el modelo
     model = ChatGroq(
         temperature=0.3,
         model_name="deepseek-r1-distill-llama-70b",
@@ -71,87 +84,70 @@ def get_conversational_chain():
     chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
     return chain
-# Tratamiento para recoger el pensamiento del modelo
 def eliminar_texto_entre_tags(texto):
     patron = r'<think>.*?</think>'
     texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
     return texto_limpio
 def user_input(user_question):
-    """Maneja las consultas del usuario recuperando respuestas del almacén de vectores."""
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
     docs = new_db.similarity_search(user_question)
     chain = get_conversational_chain()
     response = chain(
         {"input_documents": docs, "question": user_question},
         return_only_outputs=True
     )
-    # Depuración: Imprimir la respuesta original
     original_response = response['output_text']
     print("Original Response:", original_response)
-    # Extraer el proceso de pensamiento
     thought_process = ""
     if "<think>" in response['output_text'] and "</think>" in response['output_text']:
         thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
         if thought_process_match:
             thought_process = thought_process_match.group(1).strip()
-    # Eliminar el proceso de pensamiento de la respuesta principal
     clean_response = eliminar_texto_entre_tags(original_response)
-    # Imprimir la respuesta limpia, sin las marcas <think> </think>
     print("Cleaned Response:", clean_response)
-    # Mostrar el proceso de pensamiento del modelo en el expander
     with st.expander("💭 Pensamiento del Modelo"):
         st.write(thought_process)
     st.markdown(f"### Respuesta:\n{clean_response}")
 def main():
-    """Función principal para ejecutar la aplicación Streamlit."""
     st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
     st.title("PDF Consultor 🔍")
     st.markdown(css_style, unsafe_allow_html=True)
-    with st.sidebar:
-        pdf_docs = st.file_uploader(
-            "[1] Subir archivo PDF",
-            accept_multiple_files=True,
-            type=["pdf"]
-        )
-        if st.button("[2] Procesar"):
-            with st.spinner("Procesando el archivo..."):
-                raw_text = get_pdf_text(pdf_docs)
-                text_chunks = get_text_chunks(raw_text)
-                get_vector_store(text_chunks)
-                st.success("¡PDF procesado exitosamente!")
-    # Botones para preguntas predefinidas con estilo personalizado
     col1, col2, col3 = st.columns(3)
     with col1:
         if st.button("Resumen", key="resumen_button"):
             user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
     with col2:
         if st.button("Entidad", key="entidad_button"):
             user_input("A qué entidad pertenece el contenido del documento?")
     with col3:
         if st.button("Fecha implantación", key="fecha_button"):
             user_input("En qué fecha se implantará el contenido del documento?")
-    user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?")
     if user_question:
         with st.spinner("Obteniendo tu respuesta..."):
@@ -159,3 +155,4 @@ def main():
 if __name__ == "__main__":
     main()

 css_style = """
 <style>
+    .step-number {
+        font-size: 24px;
+        font-weight: bold;
+        color: #4CAF50;
+    }
+    .step-text {
+        font-size: 18px;
+        color: #555;
+    }
     button {
+        height: 35px;
+        width: 120px;
+        font-size: 14px;
+        background-color: #4CAF50;
+        color: white;
+        border: none;
+        border-radius: 5px;
+        cursor: pointer;
+    }
+    button:hover {
+        background-color: #45a049;
+    }
+    .custom-input {
+        font-size: 16px;
+        padding: 10px;
+        border-radius: 5px;
+        border: 1px solid #ccc;
     }
 </style>
 """
 def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
     return text
 def get_text_chunks(text):
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
     chunks = text_splitter.split_text(text)
     return chunks
 def get_vector_store(text_chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
     vector_store.save_local("faiss_index")
 def get_conversational_chain():
     prompt_template = """
     Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
     el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
     {question}
     Respuesta:
     """
     model = ChatGroq(
         temperature=0.3,
         model_name="deepseek-r1-distill-llama-70b",
     chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
     return chain
 def eliminar_texto_entre_tags(texto):
     patron = r'<think>.*?</think>'
     texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
     return texto_limpio
 def user_input(user_question):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
     new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
     docs = new_db.similarity_search(user_question)
     chain = get_conversational_chain()
     response = chain(
         {"input_documents": docs, "question": user_question},
         return_only_outputs=True
     )
     original_response = response['output_text']
     print("Original Response:", original_response)
     thought_process = ""
     if "<think>" in response['output_text'] and "</think>" in response['output_text']:
         thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
         if thought_process_match:
             thought_process = thought_process_match.group(1).strip()
     clean_response = eliminar_texto_entre_tags(original_response)
     print("Cleaned Response:", clean_response)
     with st.expander("💭 Pensamiento del Modelo"):
         st.write(thought_process)
     st.markdown(f"### Respuesta:\n{clean_response}")
 def main():
     st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
     st.title("PDF Consultor 🔍")
     st.markdown(css_style, unsafe_allow_html=True)
+    st.sidebar.markdown('<p class="step-number">1️⃣</p> <p class="step-text">Subir archivo PDF</p>', unsafe_allow_html=True)
+    pdf_docs = st.sidebar.file_uploader(
+        "Subir archivo PDF",
+        accept_multiple_files=True,
+        type=["pdf"]
+    )
+    st.sidebar.markdown('<p class="step-number">2️⃣</p> <p class="step-text">Procesar el archivo</p>', unsafe_allow_html=True)
+    if st.sidebar.button("Procesar"):
+        with st.spinner("Procesando el archivo..."):
+            raw_text = get_pdf_text(pdf_docs)
+            text_chunks = get_text_chunks(raw_text)
+            get_vector_store(text_chunks)
+            st.sidebar.success("¡PDF procesado exitosamente!")
+    st.sidebar.markdown('<p class="step-number">3️⃣</p> <p class="step-text">Hacer una pregunta</p>', unsafe_allow_html=True)
     col1, col2, col3 = st.columns(3)
     with col1:
         if st.button("Resumen", key="resumen_button"):
             user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
     with col2:
         if st.button("Entidad", key="entidad_button"):
             user_input("A qué entidad pertenece el contenido del documento?")
     with col3:
         if st.button("Fecha implantación", key="fecha_button"):
             user_input("En qué fecha se implantará el contenido del documento?")
+    user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?", key="custom-input")
     if user_question:
         with st.spinner("Obteniendo tu respuesta..."):
 if __name__ == "__main__":
     main()