Waflon commited on
Commit
fc9959e
·
verified ·
1 Parent(s): a9c3493

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -49
app.py CHANGED
@@ -15,64 +15,72 @@ os.environ["OPENAI_API_KEY"] = st.secrets['OPENAI'] # agregada en la config de
15
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
16
  os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI']
17
 
 
 
 
18
 
19
  def get_data():
20
- return st.session_state["BD"].get(None)
21
 
22
 
23
  def add_data(chain):
24
- st.session_state["BD"]= chain
25
-
26
 
27
  try:
28
  chain = get_data()
 
 
 
 
29
  except:
30
- print("No hay datos previos")
31
- loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
32
- data = loader.load()
33
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
34
- #Transformado a tipo de dato especifico para esto
35
- docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
36
-
37
- #Modelo QA sentence similarity
38
- modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
39
- model_kwargs = {'device':'cpu'} # o cuda
40
- encode_kwargs = {'normalize_embeddings': False}
41
-
42
- #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII
43
- embeddings = HuggingFaceEmbeddings(
44
- model_name=modelPath, # Ruta a modelo Pre entrenado
45
- model_kwargs=model_kwargs, # Opciones de configuracion del modelo
46
- encode_kwargs=encode_kwargs # Opciones de Encoding
47
- )
48
-
49
- #DB y retriever
50
- db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
51
- retriever = db.as_retriever(search_kwargs={"k": 3})
52
-
53
- template = """Responde la pregunta basado unicamente en el siguiente contexto
54
-
55
- {contexto}
56
-
57
- Pregunta: {pregunta}
58
-
59
- """
60
-
61
- #LLM
62
- prompt = ChatPromptTemplate.from_template(template)
63
- model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
64
-
65
- def format_docs(docs):
66
- return "\n\n".join([d.page_content for d in docs])
67
-
68
- chain = (
69
- {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()}
70
- | prompt
71
- | model
72
- | StrOutputParser()
73
- )
74
- print(chain)
75
- add_data(chain)
 
 
76
 
77
  def main():
78
  pregunta = st.text_area('Ingresa algun texto:')
 
15
  os.environ["LANGCHAIN_TRACING_V2"] = "true"
16
  os.environ["LANGCHAIN_API_KEY"] = st.secrets['OPENAI']
17
 
18
+ # Initialization
19
+ if 'chain' not in st.session_state:
20
+ st.session_state['chain'] = 'dummy'
21
 
22
  def get_data():
23
+ return st.session_state["chain"]
24
 
25
 
26
  def add_data(chain):
27
+ st.session_state["chain"]= chain
 
28
 
29
  try:
30
  chain = get_data()
31
+ if chain != 'dummy':
32
+ print("dato nuevo")
33
+ else
34
+ print("dato default")
35
  except:
36
+ print("Pass")
37
+
38
+ print("No hay datos previos")
39
+ loader = PyPDFLoader("https://www.sii.cl/normativa_legislacion/circulares/2024/circu3.pdf")
40
+ data = loader.load()
41
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
42
+ #Transformado a tipo de dato especifico para esto
43
+ docs = text_splitter.split_documents(data) # 'data' holds the text you want to split, split the text into documents using the text splitter.
44
+
45
+ #Modelo QA sentence similarity
46
+ modelPath = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #español
47
+ model_kwargs = {'device':'cpu'} # o cuda
48
+ encode_kwargs = {'normalize_embeddings': False}
49
+
50
+ #Embeddings que transforman a vectores densos multidimensionales las preguntas del SII
51
+ embeddings = HuggingFaceEmbeddings(
52
+ model_name=modelPath, # Ruta a modelo Pre entrenado
53
+ model_kwargs=model_kwargs, # Opciones de configuracion del modelo
54
+ encode_kwargs=encode_kwargs # Opciones de Encoding
55
+ )
56
+
57
+ #DB y retriever
58
+ db = FAISS.from_documents(docs, embeddings) # Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
59
+ retriever = db.as_retriever(search_kwargs={"k": 3})
60
+
61
+ template = """Responde la pregunta basado unicamente en el siguiente contexto
62
+
63
+ {contexto}
64
+
65
+ Pregunta: {pregunta}
66
+
67
+ """
68
+
69
+ #LLM
70
+ prompt = ChatPromptTemplate.from_template(template)
71
+ model = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)
72
+
73
+ def format_docs(docs):
74
+ return "\n\n".join([d.page_content for d in docs])
75
+
76
+ chain = (
77
+ {"contexto": retriever | format_docs, "pregunta": RunnablePassthrough()}
78
+ | prompt
79
+ | model
80
+ | StrOutputParser()
81
+ )
82
+ print(chain)
83
+ add_data(chain)
84
 
85
  def main():
86
  pregunta = st.text_area('Ingresa algun texto:')