Rajat1203 commited on
Commit
81cce87
·
verified ·
1 Parent(s): 32b7560

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +1 -0
  2. .env.example +1 -0
  3. app.py +71 -0
  4. myData.csv +12 -0
  5. requirements.txt +7 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ COHERE_API_KEY="0DWnJ9TpwKIdSKCexwhyXN2FyOOIiO4JhnWPcmRg"
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ COHERE_API_KEY=""
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import logging
4
+ from dotenv import load_dotenv
5
+ from langchain_community.embeddings import CohereEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.document_loaders.csv_loader import CSVLoader
8
+
9
+ #setup logging
10
+ logging.basicConfig(level = logging.DEBUG)
11
+
12
+ #load Environemnt Variables
13
+ load_dotenv()
14
+
15
+ #ensure COHERE_API_KEY is set properly
16
+ if not os.getenv("COHERE_API_KEY"):
17
+ raise ValueError("COHERE_API_KEY environment variable not set")
18
+
19
+ st.set_page_config(page_title = "Educate Kids" , page_icon=":robot:")
20
+ st.header("SIMILARITY MATCHING")
21
+
22
+ model_name = "embed-english-v3.0"
23
+
24
+ user_agent = "my-app/1.0" # Replace with your agent if you Want
25
+ embeddings = CohereEmbeddings(model = model_name , user_agent = user_agent)
26
+ # embeddings = CohereEmbeddings(model = model_name)
27
+
28
+ loader = CSVLoader(file_path = "myData.csv" , csv_args={
29
+ 'delimiter' : ',',
30
+ 'quotechar' : '"',
31
+ 'fieldnames' : ['words']
32
+ })
33
+
34
+ data = loader.load()
35
+
36
+ logging.debug(f"loaded data : {data}")
37
+ print(f"loaded data : {data}")
38
+
39
+ #extract data from data and ensure they are valid
40
+ texts = [doc.page_content for doc in data]
41
+
42
+ #validate the databeing passed to the mebeddings
43
+ for text in texts:
44
+ logging.debug(f"Document text : {text}")
45
+
46
+ #initialize FAISS database
47
+ try:
48
+ db = FAISS.from_documents(data , embeddings)
49
+ st.write("FAISS database created successfully")
50
+ except ValueError as e:
51
+ logging.error(f"Error occured : {e}")
52
+ for text in texts:
53
+ try:
54
+ embedding = embeddings.embed_documents([text])
55
+ logging.debug(f"Embeddings : {embedding}")
56
+ except ValueError as ve:
57
+ logging.error(f"Failed to embed document: {text} with error :{ve}")
58
+
59
+ def get_input():
60
+ input_text = st.text_input("you : " , key = "input")
61
+ return input_text
62
+
63
+ user_input = get_input()
64
+ submit = st.button("Find similar Things")
65
+
66
+ if submit:
67
+ docs = db.similarity_search(user_input)
68
+ print(docs)
69
+ st.subheader("Top Matches : ")
70
+ st.text(docs[0].page_content)
71
+ st.text(docs[1].page_content)
myData.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Words
2
+ Elephant
3
+ Lion
4
+ Tiger
5
+ Dog
6
+ Cricket
7
+ Football
8
+ Tennis
9
+ Basketball
10
+ Apple
11
+ Orange
12
+ Banana
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ cohere
4
+ tiktoken
5
+ python-dotenv
6
+ faiss-cpu
7
+ langchain-cohere