Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,21 +16,38 @@ os.getenv("GROQ_API_KEY")
|
|
| 16 |
|
| 17 |
css_style = """
|
| 18 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
button {
|
| 20 |
-
height:
|
| 21 |
-
width:
|
| 22 |
-
font-size:
|
| 23 |
-
background-color: #
|
| 24 |
-
color:
|
| 25 |
-
border: none;
|
| 26 |
-
border-radius: 5px;
|
| 27 |
-
cursor: pointer;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
}
|
| 29 |
</style>
|
| 30 |
"""
|
| 31 |
|
| 32 |
def get_pdf_text(pdf_docs):
|
| 33 |
-
# Extraemos texto de los archivos cargados
|
| 34 |
text = ""
|
| 35 |
for pdf in pdf_docs:
|
| 36 |
pdf_reader = PdfReader(pdf)
|
|
@@ -39,19 +56,16 @@ def get_pdf_text(pdf_docs):
|
|
| 39 |
return text
|
| 40 |
|
| 41 |
def get_text_chunks(text):
|
| 42 |
-
# División del texto en fragmentos
|
| 43 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
|
| 44 |
chunks = text_splitter.split_text(text)
|
| 45 |
return chunks
|
| 46 |
|
| 47 |
def get_vector_store(text_chunks):
|
| 48 |
-
# Creación de almacén de vectores FAISS a partir de los fragmentos
|
| 49 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 50 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
| 51 |
vector_store.save_local("faiss_index")
|
| 52 |
|
| 53 |
def get_conversational_chain():
|
| 54 |
-
# Especificamos un prompt inicial al modelo
|
| 55 |
prompt_template = """
|
| 56 |
Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
|
| 57 |
el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
|
|
@@ -61,7 +75,6 @@ def get_conversational_chain():
|
|
| 61 |
{question}
|
| 62 |
Respuesta:
|
| 63 |
"""
|
| 64 |
-
# Implementamos el modelo
|
| 65 |
model = ChatGroq(
|
| 66 |
temperature=0.3,
|
| 67 |
model_name="deepseek-r1-distill-llama-70b",
|
|
@@ -71,87 +84,70 @@ def get_conversational_chain():
|
|
| 71 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
| 72 |
return chain
|
| 73 |
|
| 74 |
-
# Tratamiento para recoger el pensamiento del modelo
|
| 75 |
def eliminar_texto_entre_tags(texto):
|
| 76 |
patron = r'<think>.*?</think>'
|
| 77 |
texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
|
| 78 |
return texto_limpio
|
| 79 |
|
| 80 |
def user_input(user_question):
|
| 81 |
-
"""Maneja las consultas del usuario recuperando respuestas del almacén de vectores."""
|
| 82 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 83 |
-
|
| 84 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
| 85 |
docs = new_db.similarity_search(user_question)
|
| 86 |
-
|
| 87 |
chain = get_conversational_chain()
|
| 88 |
-
|
| 89 |
response = chain(
|
| 90 |
{"input_documents": docs, "question": user_question},
|
| 91 |
return_only_outputs=True
|
| 92 |
)
|
| 93 |
-
|
| 94 |
-
# Depuración: Imprimir la respuesta original
|
| 95 |
original_response = response['output_text']
|
| 96 |
print("Original Response:", original_response)
|
| 97 |
-
|
| 98 |
-
# Extraer el proceso de pensamiento
|
| 99 |
thought_process = ""
|
| 100 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
| 101 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
| 102 |
if thought_process_match:
|
| 103 |
thought_process = thought_process_match.group(1).strip()
|
| 104 |
-
|
| 105 |
-
# Eliminar el proceso de pensamiento de la respuesta principal
|
| 106 |
clean_response = eliminar_texto_entre_tags(original_response)
|
| 107 |
-
|
| 108 |
-
# Imprimir la respuesta limpia, sin las marcas <think> </think>
|
| 109 |
print("Cleaned Response:", clean_response)
|
| 110 |
-
|
| 111 |
-
# Mostrar el proceso de pensamiento del modelo en el expander
|
| 112 |
with st.expander("💭 Pensamiento del Modelo"):
|
| 113 |
st.write(thought_process)
|
| 114 |
-
|
| 115 |
st.markdown(f"### Respuesta:\n{clean_response}")
|
| 116 |
|
| 117 |
def main():
|
| 118 |
-
"""Función principal para ejecutar la aplicación Streamlit."""
|
| 119 |
st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
|
| 120 |
-
|
| 121 |
st.title("PDF Consultor 🔍")
|
| 122 |
-
|
| 123 |
st.markdown(css_style, unsafe_allow_html=True)
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
|
| 140 |
col1, col2, col3 = st.columns(3)
|
| 141 |
-
|
| 142 |
with col1:
|
| 143 |
if st.button("Resumen", key="resumen_button"):
|
| 144 |
user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
|
| 145 |
-
|
| 146 |
with col2:
|
| 147 |
if st.button("Entidad", key="entidad_button"):
|
| 148 |
user_input("A qué entidad pertenece el contenido del documento?")
|
| 149 |
-
|
| 150 |
with col3:
|
| 151 |
if st.button("Fecha implantación", key="fecha_button"):
|
| 152 |
user_input("En qué fecha se implantará el contenido del documento?")
|
| 153 |
-
|
| 154 |
-
user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?")
|
| 155 |
|
| 156 |
if user_question:
|
| 157 |
with st.spinner("Obteniendo tu respuesta..."):
|
|
@@ -159,3 +155,4 @@ def main():
|
|
| 159 |
|
| 160 |
if __name__ == "__main__":
|
| 161 |
main()
|
|
|
|
|
|
| 16 |
|
| 17 |
css_style = """
|
| 18 |
<style>
|
| 19 |
+
.step-number {
|
| 20 |
+
font-size: 24px;
|
| 21 |
+
font-weight: bold;
|
| 22 |
+
color: #4CAF50;
|
| 23 |
+
}
|
| 24 |
+
.step-text {
|
| 25 |
+
font-size: 18px;
|
| 26 |
+
color: #555;
|
| 27 |
+
}
|
| 28 |
button {
|
| 29 |
+
height: 35px;
|
| 30 |
+
width: 120px;
|
| 31 |
+
font-size: 14px;
|
| 32 |
+
background-color: #4CAF50;
|
| 33 |
+
color: white;
|
| 34 |
+
border: none;
|
| 35 |
+
border-radius: 5px;
|
| 36 |
+
cursor: pointer;
|
| 37 |
+
}
|
| 38 |
+
button:hover {
|
| 39 |
+
background-color: #45a049;
|
| 40 |
+
}
|
| 41 |
+
.custom-input {
|
| 42 |
+
font-size: 16px;
|
| 43 |
+
padding: 10px;
|
| 44 |
+
border-radius: 5px;
|
| 45 |
+
border: 1px solid #ccc;
|
| 46 |
}
|
| 47 |
</style>
|
| 48 |
"""
|
| 49 |
|
| 50 |
def get_pdf_text(pdf_docs):
|
|
|
|
| 51 |
text = ""
|
| 52 |
for pdf in pdf_docs:
|
| 53 |
pdf_reader = PdfReader(pdf)
|
|
|
|
| 56 |
return text
|
| 57 |
|
| 58 |
def get_text_chunks(text):
|
|
|
|
| 59 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
|
| 60 |
chunks = text_splitter.split_text(text)
|
| 61 |
return chunks
|
| 62 |
|
| 63 |
def get_vector_store(text_chunks):
|
|
|
|
| 64 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 65 |
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
| 66 |
vector_store.save_local("faiss_index")
|
| 67 |
|
| 68 |
def get_conversational_chain():
|
|
|
|
| 69 |
prompt_template = """
|
| 70 |
Responde la pregunta en español de la manera más detallada posible a partir del contexto proporcionado. Si la respuesta no está en
|
| 71 |
el contexto proporcionado, simplemente di, "la respuesta no está disponible en el contexto." No proporciones respuestas incorrectas.
|
|
|
|
| 75 |
{question}
|
| 76 |
Respuesta:
|
| 77 |
"""
|
|
|
|
| 78 |
model = ChatGroq(
|
| 79 |
temperature=0.3,
|
| 80 |
model_name="deepseek-r1-distill-llama-70b",
|
|
|
|
| 84 |
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
|
| 85 |
return chain
|
| 86 |
|
|
|
|
| 87 |
def eliminar_texto_entre_tags(texto):
|
| 88 |
patron = r'<think>.*?</think>'
|
| 89 |
texto_limpio = re.sub(patron, '', texto, flags=re.DOTALL)
|
| 90 |
return texto_limpio
|
| 91 |
|
| 92 |
def user_input(user_question):
|
|
|
|
| 93 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
| 94 |
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
|
| 95 |
docs = new_db.similarity_search(user_question)
|
|
|
|
| 96 |
chain = get_conversational_chain()
|
|
|
|
| 97 |
response = chain(
|
| 98 |
{"input_documents": docs, "question": user_question},
|
| 99 |
return_only_outputs=True
|
| 100 |
)
|
|
|
|
|
|
|
| 101 |
original_response = response['output_text']
|
| 102 |
print("Original Response:", original_response)
|
|
|
|
|
|
|
| 103 |
thought_process = ""
|
| 104 |
if "<think>" in response['output_text'] and "</think>" in response['output_text']:
|
| 105 |
thought_process_match = re.search(r"<think>(.*?)</think>", response['output_text'], re.DOTALL)
|
| 106 |
if thought_process_match:
|
| 107 |
thought_process = thought_process_match.group(1).strip()
|
|
|
|
|
|
|
| 108 |
clean_response = eliminar_texto_entre_tags(original_response)
|
|
|
|
|
|
|
| 109 |
print("Cleaned Response:", clean_response)
|
|
|
|
|
|
|
| 110 |
with st.expander("💭 Pensamiento del Modelo"):
|
| 111 |
st.write(thought_process)
|
|
|
|
| 112 |
st.markdown(f"### Respuesta:\n{clean_response}")
|
| 113 |
|
| 114 |
def main():
|
|
|
|
| 115 |
st.set_page_config(page_title="PDF Consultor 🔍", page_icon="🔍", layout="wide")
|
|
|
|
| 116 |
st.title("PDF Consultor 🔍")
|
|
|
|
| 117 |
st.markdown(css_style, unsafe_allow_html=True)
|
| 118 |
+
|
| 119 |
+
st.sidebar.markdown('<p class="step-number">1️⃣</p> <p class="step-text">Subir archivo PDF</p>', unsafe_allow_html=True)
|
| 120 |
+
pdf_docs = st.sidebar.file_uploader(
|
| 121 |
+
"Subir archivo PDF",
|
| 122 |
+
accept_multiple_files=True,
|
| 123 |
+
type=["pdf"]
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
st.sidebar.markdown('<p class="step-number">2️⃣</p> <p class="step-text">Procesar el archivo</p>', unsafe_allow_html=True)
|
| 127 |
+
if st.sidebar.button("Procesar"):
|
| 128 |
+
with st.spinner("Procesando el archivo..."):
|
| 129 |
+
raw_text = get_pdf_text(pdf_docs)
|
| 130 |
+
text_chunks = get_text_chunks(raw_text)
|
| 131 |
+
get_vector_store(text_chunks)
|
| 132 |
+
st.sidebar.success("¡PDF procesado exitosamente!")
|
| 133 |
+
|
| 134 |
+
st.sidebar.markdown('<p class="step-number">3️⃣</p> <p class="step-text">Hacer una pregunta</p>', unsafe_allow_html=True)
|
| 135 |
|
| 136 |
col1, col2, col3 = st.columns(3)
|
| 137 |
+
|
| 138 |
with col1:
|
| 139 |
if st.button("Resumen", key="resumen_button"):
|
| 140 |
user_input("Realiza un resumen sobre los aspectos más relevantes comentados en el documento")
|
| 141 |
+
|
| 142 |
with col2:
|
| 143 |
if st.button("Entidad", key="entidad_button"):
|
| 144 |
user_input("A qué entidad pertenece el contenido del documento?")
|
| 145 |
+
|
| 146 |
with col3:
|
| 147 |
if st.button("Fecha implantación", key="fecha_button"):
|
| 148 |
user_input("En qué fecha se implantará el contenido del documento?")
|
| 149 |
+
|
| 150 |
+
user_question = st.text_input("Introduce tu pregunta", placeholder="¿Qué quieres saber?", key="custom-input")
|
| 151 |
|
| 152 |
if user_question:
|
| 153 |
with st.spinner("Obteniendo tu respuesta..."):
|
|
|
|
| 155 |
|
| 156 |
if __name__ == "__main__":
|
| 157 |
main()
|
| 158 |
+
|