Waflon commited on
Commit
9e063a9
·
verified ·
1 Parent(s): 904b827

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -8
app.py CHANGED
@@ -1,15 +1,71 @@
1
- import time
2
  import streamlit as st
3
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  with st.status("Cargando app...", expanded=True) as status:
6
- pipe = pipeline(model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
7
- text = st.text_area('Ingresa algun texto:')
8
- status.update(label="App cargada con exito!", state="complete")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def main():
11
- if text:
12
- out = pipe(text)
 
13
  st.json(out)
14
 
15
  if __name__ == "__main__":
 
 
1
  import streamlit as st
2
+ import getpass
3
+ import os
4
+
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain_core.runnables import RunnablePassthrough
12
+ from langchain.schema import StrOutputParser
13
+
14
+ os.environ["OPENAI_API_KEY"] = st.secrets['OPENAI_API_KEY'] # agregada en la config de hugginface
15
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
16
+ os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI_API_KEY']
17
 
18
  with st.status("Cargando app...", expanded=True) as status:
19
+ loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
20
+ data = loader.load()
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
22
+ #Transformado a tipo de dato especifico para esto
23
+ docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
24
+
25
+ #Modelo QA sentence similarity
26
+ modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
27
+ #model_kwargs = {'device':'cuda'} #or CPUmodel_kwargs = {'device':'cuda'} #or CPU
28
+ model_kwargs = {'device':'cuda'} #or CPUmodel_kwargs = {'device':'cuda'} #or CPU
29
+ encode_kwargs = {'normalize_embeddings': False}
30
+
31
+ #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII
32
+ embeddings = HuggingFaceEmbeddings(
33
+ model_name=modelPath, # Ruta a modelo Pre entrenado
34
+ model_kwargs=model_kwargs, # Opciones de configuracion del modelo
35
+ encode_kwargs=encode_kwargs # Opciones de Encoding
36
+ )
37
+
38
+ #DB y retriever
39
+ db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
40
+ retriever = db.as_retriever(search_kwargs={"k": 6})
41
+
42
+ template = """Responde la pregunta basado unicamente en el siguiente contexto
43
+
44
+ {contexto}
45
+
46
+ Pregunta: {pregunta}
47
+
48
+ """
49
+
50
+ #LLM
51
+ prompt = ChatPromptTemplate.from_template(template)
52
+ model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
53
+
54
+ def format_docs(docs):
55
+ return "\n\n".join([d.page_content for d in docs])
56
+
57
+ chain = (
58
+ {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()}
59
+ | prompt
60
+ | model
61
+ | StrOutputParser()
62
+ )
63
+ status.update(label="App cargada con exito!", state="complete")
64
 
65
+ def main():
66
+ pregunta = st.text_area('Ingresa algun texto:')
67
+ if pregunta:
68
+ out = chain.invoke(pregunta)
69
  st.json(out)
70
 
71
  if __name__ == "__main__":