Waflon commited on
Commit
19563eb
·
verified ·
1 Parent(s): c629702

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -45
app.py CHANGED
@@ -15,51 +15,50 @@ os.environ["OPENAI_API_KEY"] = st.secrets['OPENAI'] # agregada en la config de
15
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
16
  os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI']
17
 
18
- with st.status("Cargando app...", expanded=True) as status:
19
- loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
20
- data = loader.load()
21
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
22
- #Transformado a tipo de dato especifico para esto
23
- docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
24
-
25
- #Modelo QA sentence similarity
26
- modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
27
- model_kwargs = {'device':'cpu'} #or CPUmodel_kwargs = {'device':'cuda'} #or CPU
28
- encode_kwargs = {'normalize_embeddings': False}
29
-
30
- #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII
31
- embeddings = HuggingFaceEmbeddings(
32
- model_name=modelPath, # Ruta a modelo Pre entrenado
33
- model_kwargs=model_kwargs, # Opciones de configuracion del modelo
34
- encode_kwargs=encode_kwargs # Opciones de Encoding
35
- )
36
-
37
- #DB y retriever
38
- db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
39
- retriever = db.as_retriever(search_kwargs={"k": 6})
40
-
41
- template = """Responde la pregunta basado unicamente en el siguiente contexto
42
-
43
- {contexto}
44
-
45
- Pregunta: {pregunta}
46
-
47
- """
48
-
49
- #LLM
50
- prompt = ChatPromptTemplate.from_template(template)
51
- model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
52
-
53
- def format_docs(docs):
54
- return "\n\n".join([d.page_content for d in docs])
55
-
56
- chain = (
57
- {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()}
58
- | prompt
59
- | model
60
- | StrOutputParser()
61
- )
62
- status.update(label="App cargada con exito!", state="complete")
63
 
64
  def main():
65
  pregunta = st.text_area('Ingresa algun texto:')
 
15
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
16
  os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI']
17
 
18
+
19
+ loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
20
+ data = loader.load()
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
22
+ #Transformado a tipo de dato especifico para esto
23
+ docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
24
+
25
+ #Modelo QA sentence similarity
26
+ modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
27
+ model_kwargs = {'device':'cpu'} #or CPUmodel_kwargs = {'device':'cuda'} #or CPU
28
+ encode_kwargs = {'normalize_embeddings': False}
29
+
30
+ #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII
31
+ embeddings = HuggingFaceEmbeddings(
32
+ model_name=modelPath, # Ruta a modelo Pre entrenado
33
+ model_kwargs=model_kwargs, # Opciones de configuracion del modelo
34
+ encode_kwargs=encode_kwargs # Opciones de Encoding
35
+ )
36
+
37
+ #DB y retriever
38
+ db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
39
+ retriever = db.as_retriever(search_kwargs={"k": 6})
40
+
41
+ template = """Responde la pregunta basado unicamente en el siguiente contexto
42
+
43
+ {contexto}
44
+
45
+ Pregunta: {pregunta}
46
+
47
+ """
48
+
49
+ #LLM
50
+ prompt = ChatPromptTemplate.from_template(template)
51
+ model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
52
+
53
+ def format_docs(docs):
54
+ return "\n\n".join([d.page_content for d in docs])
55
+
56
+ chain = (
57
+ {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()}
58
+ | prompt
59
+ | model
60
+ | StrOutputParser()
61
+ )
 
62
 
63
  def main():
64
  pregunta = st.text_area('Ingresa algun texto:')