Spaces:
Configuration error
Configuration error
Commit ·
5977b46
1
Parent(s): 288607e
Upload 7 files
Browse files- README.md +36 -12
- app.py +37 -0
- data.csv +0 -0
- get-pip.py +0 -0
- insightly_wbg.png +0 -0
- requirements.txt +136 -0
- tempfile +0 -0
README.md
CHANGED
|
@@ -1,12 +1,36 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Langchain Chat-CSV with OpenAI (Tutorial)
|
| 2 |
+
> You can find the step-by-step video tutorial to build this application [on YouTube](https://youtu.be/tjeti5vXWOU).
|
| 3 |
+
|
| 4 |
+
This is a Python application that enables you to load a CSV file and ask questions about its contents using natural language. The application leverages Language Models (LLMs) to generate responses based on the CSV data. The LLM will only provide answers related to the information present in the CSV.
|
| 5 |
+
|
| 6 |
+
## How it works
|
| 7 |
+
|
| 8 |
+
The application reads the CSV file and processes the data. It utilizes OpenAI LLMs alongside with Langchain Agents in order to answer your questions. The CSV agent then uses tools to find solutions to your questions and generates an appropriate response with the help of a LLM.
|
| 9 |
+
|
| 10 |
+
The application employs Streamlit to create the graphical user interface (GUI) and utilizes Langchain to interact with the LLM.
|
| 11 |
+
|
| 12 |
+
## Installation
|
| 13 |
+
|
| 14 |
+
To install the repository, follow these steps:
|
| 15 |
+
|
| 16 |
+
1. Clone this repository to your local machine.
|
| 17 |
+
2. Install the necessary dependencies by running the following command:
|
| 18 |
+
|
| 19 |
+
```
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
3. Additionally, you need to obtain an OpenAI API key and add it to the `.env` file.
|
| 24 |
+
|
| 25 |
+
## Usage
|
| 26 |
+
|
| 27 |
+
To use the application, execute the `main.py` file using the Streamlit CLI. Make sure you have Streamlit installed before running the application. Run the following command in your terminal:
|
| 28 |
+
|
| 29 |
+
```
|
| 30 |
+
streamlit run main.py
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## Contributing
|
| 34 |
+
This repository is intended for educational purposes only and is not designed to accept external contributions. It serves as supplemental material for the YouTube tutorial, demonstrating how to build the project.
|
| 35 |
+
|
| 36 |
+
For any suggestions or improvements related to the tutorial content, please feel free to reach out through the YouTube channel's comment section.
|
app.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from tempfile import NamedTemporaryFile
|
| 2 |
+
from langchain.agents import create_csv_agent
|
| 3 |
+
from langchain.llms import OpenAI
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import os
|
| 6 |
+
import streamlit as st
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main():
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
# Load the OpenAI API key from the environment variable
|
| 14 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 15 |
+
if api_key is None or api_key == "":
|
| 16 |
+
st.error("OPENAI_API_KEY is not set")
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
st.set_page_config(page_title="Insightly")
|
| 20 |
+
st.sidebar.image("https://i.ibb.co/x807Xkj/insightly-wbg.png", use_column_width=True)
|
| 21 |
+
st.header("Data Analysis 📈")
|
| 22 |
+
|
| 23 |
+
csv_file = st.file_uploader("Upload a CSV file", type="csv")
|
| 24 |
+
if csv_file:
|
| 25 |
+
with NamedTemporaryFile(delete=False) as f:
|
| 26 |
+
f.write(csv_file.getvalue())
|
| 27 |
+
f.flush()
|
| 28 |
+
llm = OpenAI(temperature=0)
|
| 29 |
+
user_input = st.text_input("Question here:")
|
| 30 |
+
agent = create_csv_agent(llm, f.name, verbose=True)
|
| 31 |
+
if user_input:
|
| 32 |
+
response = agent.run(user_input)
|
| 33 |
+
st.write(response)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
main()
|
data.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
get-pip.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
insightly_wbg.png
ADDED
|
requirements.txt
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aim==3.17.4
|
| 2 |
+
aiodns==3.0.0
|
| 3 |
+
aleph_alpha_client==3.1.0
|
| 4 |
+
apify_client==1.1.1
|
| 5 |
+
arize==7.0.1
|
| 6 |
+
asynctest==0.13.0
|
| 7 |
+
atlassian==0.0.0
|
| 8 |
+
banana_dev==4.0.2
|
| 9 |
+
beautifulsoup4==4.12.2
|
| 10 |
+
bilibili_api==9.1.0
|
| 11 |
+
boto3==1.26.131
|
| 12 |
+
botocore==1.29.131
|
| 13 |
+
Brotli==1.0.9
|
| 14 |
+
brotlicffi==1.0.9.2
|
| 15 |
+
cchardet==2.1.7
|
| 16 |
+
cerebrium==0.4.34
|
| 17 |
+
chromadb==0.3.22
|
| 18 |
+
clearml==1.10.4
|
| 19 |
+
clickhouse_connect==0.5.23
|
| 20 |
+
comet_ml==3.33.1
|
| 21 |
+
ConfigParser==5.3.0
|
| 22 |
+
contextlib2==21.6.0
|
| 23 |
+
cryptography==40.0.2
|
| 24 |
+
cx_Oracle==8.3.0
|
| 25 |
+
Cython==0.29.35
|
| 26 |
+
datasets==2.12.0
|
| 27 |
+
disco==1.40.4
|
| 28 |
+
dl==0.1.0
|
| 29 |
+
docarray==0.31.1
|
| 30 |
+
docutils==0.20
|
| 31 |
+
docx2txt==0.8
|
| 32 |
+
duckdb==0.7.1
|
| 33 |
+
elasticsearch==8.7.0
|
| 34 |
+
email_validator==2.0.0.post2
|
| 35 |
+
extract_msg==0.41.0
|
| 36 |
+
fake_useragent==1.1.3
|
| 37 |
+
firebase_admin==6.1.0
|
| 38 |
+
fitz==0.0.1.dev2
|
| 39 |
+
genapi==0.0.8
|
| 40 |
+
google_api_python_client==2.86.0
|
| 41 |
+
google_auth_oauthlib==1.0.0
|
| 42 |
+
googlemaps==4.10.0
|
| 43 |
+
gptcache==0.1.22
|
| 44 |
+
greenlet==2.0.2
|
| 45 |
+
guardrails==2.0.0
|
| 46 |
+
gunicorn==20.1.0
|
| 47 |
+
HTMLParser==0.0.2
|
| 48 |
+
hypothesis==6.75.2
|
| 49 |
+
idna_ssl==1.1.0
|
| 50 |
+
importlib_metadata==5.0.0
|
| 51 |
+
InstructorEmbedding==1.0.0
|
| 52 |
+
ipython==8.13.2
|
| 53 |
+
ipywidgets==7.7.0
|
| 54 |
+
Jinja2==3.1.2
|
| 55 |
+
jnius==1.1.0
|
| 56 |
+
keyring==23.13.1
|
| 57 |
+
lark==1.1.5
|
| 58 |
+
lockfile==0.12.2
|
| 59 |
+
lxml==4.9.2
|
| 60 |
+
matplotlib==3.6.2
|
| 61 |
+
metal_sdk==1.0.1
|
| 62 |
+
mtrand==0.1
|
| 63 |
+
mwparserfromhell==0.6.4
|
| 64 |
+
mwxml==0.3.3
|
| 65 |
+
networkx==2.8.8
|
| 66 |
+
nltk==3.8.1
|
| 67 |
+
nomic==1.1.6
|
| 68 |
+
nose==1.3.7
|
| 69 |
+
numarray==1.5.1
|
| 70 |
+
Numeric==24.2
|
| 71 |
+
O365==2.0.26
|
| 72 |
+
ordereddict==1.1
|
| 73 |
+
pdf2image==1.16.3
|
| 74 |
+
pdfminer==20191125
|
| 75 |
+
pexpect==4.8.0
|
| 76 |
+
pickle5==0.0.11
|
| 77 |
+
Pillow==9.5.0
|
| 78 |
+
pinecone_text==0.4.2
|
| 79 |
+
pipeline==0.1
|
| 80 |
+
praw==7.7.0
|
| 81 |
+
promptlayer==0.1.80
|
| 82 |
+
protobuf==4.23.0
|
| 83 |
+
psutil==5.9.1
|
| 84 |
+
pygpt4all==1.1.0
|
| 85 |
+
PyInstaller==5.10.1
|
| 86 |
+
pymilvus==2.2.8
|
| 87 |
+
pyOpenSSL==23.1.1
|
| 88 |
+
pyowm==3.3.0
|
| 89 |
+
pypandoc==1.11
|
| 90 |
+
pypdf==3.8.1
|
| 91 |
+
PyPDF2==3.0.1
|
| 92 |
+
pyspark==3.4.0
|
| 93 |
+
pysqlcipher3==1.2.0
|
| 94 |
+
pysrt==1.1.2
|
| 95 |
+
pytesseract==0.3.10
|
| 96 |
+
pytest==7.3.1
|
| 97 |
+
python_magic==0.4.27
|
| 98 |
+
pytube==15.0.0
|
| 99 |
+
pytz==2022.6
|
| 100 |
+
qdrant_client==1.1.7
|
| 101 |
+
railroad==0.5.0
|
| 102 |
+
reportlab==4.0.0
|
| 103 |
+
runhouse==0.0.5
|
| 104 |
+
scikit_learn==1.2.2
|
| 105 |
+
scipy==1.10.1
|
| 106 |
+
selenium==4.9.1
|
| 107 |
+
sentence_transformers==2.2.2
|
| 108 |
+
setuptools_scm==7.1.0
|
| 109 |
+
simplejson==3.19.1
|
| 110 |
+
slack_sdk==3.21.3
|
| 111 |
+
spacy==3.5.2
|
| 112 |
+
Sphinx==7.0.0
|
| 113 |
+
sqlcipher3==0.5.0
|
| 114 |
+
svglib==1.5.1
|
| 115 |
+
tensorflow==2.12.0
|
| 116 |
+
tensorflow_text==2.12.1
|
| 117 |
+
textstat==0.7.3
|
| 118 |
+
threadpoolctl==3.1.0
|
| 119 |
+
tiktoken==0.3.3
|
| 120 |
+
tokenizers==0.13.3
|
| 121 |
+
tokio==0.2.0
|
| 122 |
+
torch==2.0.1
|
| 123 |
+
tornado==6.1
|
| 124 |
+
transformers==4.28.1
|
| 125 |
+
truststore==0.7.0
|
| 126 |
+
tweepy==4.14.0
|
| 127 |
+
urllib3_secure_extra==0.1.0
|
| 128 |
+
uvloop==0.17.0
|
| 129 |
+
vespa==0.6
|
| 130 |
+
wandb==0.15.2
|
| 131 |
+
wincertstore==0.2.1
|
| 132 |
+
wolframalpha==5.0.0
|
| 133 |
+
xlrd==2.0.1
|
| 134 |
+
xmlrpclib==1.0.1
|
| 135 |
+
youtube_transcript_api==0.6.0
|
| 136 |
+
zstandard==0.21.0
|
tempfile
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|