nesanchezo commited on
Commit
5bf66bd
·
1 Parent(s): e0e0179

primer commit

Browse files
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Set
2
+
3
+ from backend.core import run_llm
4
+ import streamlit as st
5
+ from streamlit_chat import message
6
+ from langchain.output_parsers import ResponseSchema
7
+ #from langchain.document_loaders import PyPDFLoader
8
+
9
+ def create_sources_string(source_urls: Set[str]) -> str:
10
+ if not source_urls:
11
+ return ""
12
+ sources_list = list(source_urls)
13
+ sources_list.sort()
14
+ sources_string = "sources:\n"
15
+ for i, source in enumerate(sources_list):
16
+ sources_string += f"{i+1}. {source}\n"
17
+ return sources_string
18
+
19
+
20
+ st.header("Chatbot Documentos Nico")
21
+ if (
22
+ "chat_answers_history" not in st.session_state
23
+ and "user_prompt_history" not in st.session_state
24
+ and "chat_history" not in st.session_state
25
+ ):
26
+ st.session_state["chat_answers_history"] = []
27
+ st.session_state["user_prompt_history"] = []
28
+ st.session_state["chat_history"] = []
29
+
30
+
31
+ prompt = st.text_input("Chatbot", placeholder="Quieres saber algo? pregunta aquí ...") or st.button(
32
+ "Submit"
33
+ )
34
+
35
+ if prompt:
36
+ with st.spinner("Generating response..."):
37
+ generated_response = run_llm(
38
+ query=prompt, chat_history=st.session_state["chat_history"]
39
+ )
40
+
41
+ sources = set(
42
+ [(doc.metadata["page"], doc.metadata["source"]) for doc in generated_response["source_documents"]]
43
+ )
44
+ #sources = set([1,2])
45
+ formatted_response = (
46
+ f"{generated_response['answer']} \n\n {create_sources_string(sources)}"
47
+ )
48
+
49
+ st.session_state.chat_history.append((prompt, generated_response["answer"]))
50
+ st.session_state.user_prompt_history.append(prompt)
51
+ st.session_state.chat_answers_history.append(formatted_response)
52
+
53
+ if st.session_state["chat_answers_history"]:
54
+ for generated_response, user_query in zip(
55
+ st.session_state["chat_answers_history"],
56
+ st.session_state["user_prompt_history"],
57
+ ):
58
+ message(
59
+ user_query,
60
+ is_user=True,
61
+ )
62
+ message(generated_response)
backend/__init__.py ADDED
File without changes
backend/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (222 Bytes). View file
 
backend/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (187 Bytes). View file
 
backend/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (214 Bytes). View file
 
backend/__pycache__/core.cpython-311.pyc ADDED
Binary file (2.23 kB). View file
 
backend/__pycache__/core.cpython-37.pyc ADDED
Binary file (2.22 kB). View file
 
backend/__pycache__/core.cpython-39.pyc ADDED
Binary file (2.23 kB). View file
 
backend/core.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any, Dict, List
3
+
4
+ from langchain.embeddings.openai import OpenAIEmbeddings
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.chains import ConversationalRetrievalChain
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.vectorstores import Chroma
9
+ from langchain.output_parsers import ResponseSchema
10
+ from langchain.output_parsers import StructuredOutputParser
11
+ from langchain.document_loaders import PyPDFLoader
12
+ from langchain.prompts import ChatPromptTemplate
13
+
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+
18
+ persist_directory = 'chroma/'
19
+
20
+ def run_llm(query: str, chat_history: List[Dict[str, Any]] = []):
21
+
22
+ embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
23
+
24
+ new_vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
25
+
26
+ chat = ChatOpenAI(
27
+ model="gpt-3.5-turbo-16k",
28
+ verbose=True,
29
+ temperature=0,
30
+ )
31
+
32
+ qa = ConversationalRetrievalChain.from_llm(
33
+ llm=chat, retriever=new_vectorstore.as_retriever(search_type = "mmr", search_kwargs={"k": 6, "fetch_k":20}), return_source_documents=True
34
+ )
35
+
36
+ return qa({"question": query, "chat_history": chat_history})
37
+
38
+ if __name__ == "__main__":
39
+ print(run_llm(query="What is Venu 2?"))
requirements.txt ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.5
2
+ aiosignal==1.3.1
3
+ altair==5.0.1
4
+ anyio==3.7.1
5
+ arabic-reshaper==3.0.0
6
+ asgiref==3.7.2
7
+ asn1crypto==1.5.1
8
+ async-timeout==4.0.3
9
+ attrs==23.1.0
10
+ backoff==2.2.1
11
+ blinker==1.6.2
12
+ cachetools==5.3.1
13
+ certifi==2023.7.22
14
+ cffi==1.15.1
15
+ charset-normalizer==3.2.0
16
+ chroma-hnswlib==0.7.2
17
+ chromadb==0.4.5
18
+ click==8.1.6
19
+ coloredlogs==15.0.1
20
+ cryptography==41.0.3
21
+ cssselect2==0.7.0
22
+ dataclasses-json==0.5.14
23
+ decorator==5.1.1
24
+ Django==4.2.4
25
+ fastapi==0.99.1
26
+ flatbuffers==23.5.26
27
+ frozenlist==1.4.0
28
+ gitdb==4.0.10
29
+ GitPython==3.1.32
30
+ greenlet==2.0.2
31
+ h11==0.14.0
32
+ html5lib==1.1
33
+ httptools==0.6.0
34
+ humanfriendly==10.0
35
+ idna==3.4
36
+ importlib-metadata==6.8.0
37
+ importlib-resources==6.0.1
38
+ Jinja2==3.1.2
39
+ jsonschema==4.19.0
40
+ jsonschema-specifications==2023.7.1
41
+ langchain==0.0.273
42
+ langsmith==0.0.26
43
+ lxml==4.9.3
44
+ markdown-it-py==3.0.0
45
+ MarkupSafe==2.1.3
46
+ marshmallow==3.20.1
47
+ mdurl==0.1.2
48
+ monotonic==1.6
49
+ mpmath==1.3.0
50
+ multidict==6.0.4
51
+ mypy-extensions==1.0.0
52
+ numexpr==2.8.5
53
+ numpy==1.25.2
54
+ onnxruntime==1.15.1
55
+ openai==0.27.9
56
+ openapi-schema-pydantic==1.2.4
57
+ oscrypto==1.3.0
58
+ overrides==7.4.0
59
+ packaging==23.1
60
+ pandas==2.0.3
61
+ pdf2image==1.16.3
62
+ Pillow==9.5.0
63
+ posthog==3.0.1
64
+ protobuf==4.24.0
65
+ pulsar-client==3.2.0
66
+ pyarrow==12.0.1
67
+ pycparser==2.21
68
+ pydantic==1.10.12
69
+ pydeck==0.8.0
70
+ Pygments==2.16.1
71
+ pyHanko==0.20.0
72
+ pyhanko-certvalidator==0.23.0
73
+ Pympler==1.0.1
74
+ pypdf==3.15.0
75
+ PyPDF2==3.0.1
76
+ PyPika==0.48.9
77
+ pypng==0.20220715.0
78
+ pytesseract==0.3.10
79
+ python-bidi==0.4.2
80
+ python-dateutil==2.8.2
81
+ python-dotenv==1.0.0
82
+ pytz==2023.3
83
+ pytz-deprecation-shim==0.1.0.post0
84
+ PyYAML==6.0.1
85
+ qrcode==7.4.2
86
+ referencing==0.30.2
87
+ regex==2023.8.8
88
+ reportlab==3.6.13
89
+ requests==2.31.0
90
+ rich==13.5.2
91
+ rpds-py==0.9.2
92
+ six==1.16.0
93
+ smmap==5.0.0
94
+ sniffio==1.3.0
95
+ SQLAlchemy==2.0.19
96
+ sqlparse==0.4.4
97
+ starlette==0.27.0
98
+ streamlit==1.25.0
99
+ streamlit-chat==0.1.1
100
+ svglib==1.5.1
101
+ sympy==1.12
102
+ tenacity==8.2.2
103
+ tiktoken==0.4.0
104
+ tinycss2==1.2.1
105
+ tokenizers==0.13.3
106
+ toml==0.10.2
107
+ toolz==0.12.0
108
+ tornado==6.3.2
109
+ tqdm==4.66.1
110
+ typing-inspect==0.9.0
111
+ typing_extensions==4.7.1
112
+ tzdata==2023.3
113
+ tzlocal==4.3.1
114
+ uritools==4.0.1
115
+ urllib3==2.0.4
116
+ uvicorn==0.23.2
117
+ uvloop==0.17.0
118
+ validators==0.20.0
119
+ watchdog==3.0.0
120
+ watchfiles==0.19.0
121
+ webencodings==0.5.1
122
+ websockets==11.0.3
123
+ xhtml2pdf==0.2.11
124
+ yarl==1.9.2
125
+ zipp==3.16.2