aadil732 commited on
Commit
aa4d80e
·
1 Parent(s): 11c12df

Minor Changes

Browse files
Files changed (3) hide show
  1. src/app.py +35 -18
  2. src/extract_single_course.py +1 -1
  3. src/retriever.py +84 -30
src/app.py CHANGED
@@ -2,53 +2,70 @@ import streamlit as st
2
  import requests
3
 
4
  def wide_space_default():
 
 
 
5
  st.set_page_config(
6
  layout="wide",
7
  page_title="Search Free Courses",
8
  )
9
 
 
10
  wide_space_default()
11
 
 
12
  css_for_text = """
13
  <style>
14
  p, li, strong, ul {
15
- font-size: 18px !important;
16
  }
17
-
18
  h1 {
19
  font-size: 28px;
20
  }
21
-
22
  .text {
23
- font-size: 22px !important
24
  }
25
  </style>
26
  """
27
 
28
- # Applying the custom CSS for styling
29
  st.markdown(css_for_text, unsafe_allow_html=True)
30
 
 
31
  st.header("Analytics Vidya Free Courses", anchor=False)
32
- # desc = st.write("This is a search engine to search among free courses of Analytics Vidya")
33
 
34
  def sending_keyword(keyword):
35
- response = requests.get("http://0.0.0.0:8000/get_courses", params={"keyword": keyword}).json()
36
- return response['results']
37
-
38
- response = "This ia a search engine project created for Analytics Vidya Free Courses. The project helps users to type any keyword related to the free courses they are looking for."
39
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  with st.sidebar:
41
-
42
  keyword = st.text_input("Enter course keyword:")
43
-
44
- submit_button = st.button("Press me")
45
 
46
  if keyword or submit_button:
47
- st.write("Your entered keyword is ", keyword)
48
  st.snow()
49
 
 
50
  if keyword or submit_button:
 
51
 
52
- response = sending_keyword(keyword)
53
-
54
- st.markdown(response, unsafe_allow_html=True)
 
2
  import requests
3
 
4
  def wide_space_default():
5
+ """
6
+ Configures the Streamlit page layout to wide and sets the page title.
7
+ """
8
  st.set_page_config(
9
  layout="wide",
10
  page_title="Search Free Courses",
11
  )
12
 
13
+ # Apply default layout settings
14
  wide_space_default()
15
 
16
+ # Custom CSS styling for Streamlit components
17
  css_for_text = """
18
  <style>
19
  p, li, strong, ul {
20
+ font-size: 20px !important;
21
  }
 
22
  h1 {
23
  font-size: 28px;
24
  }
 
25
  .text {
26
+ font-size: 22px !important;
27
  }
28
  </style>
29
  """
30
 
31
+ # Applying custom CSS
32
  st.markdown(css_for_text, unsafe_allow_html=True)
33
 
34
+ # Header for the application
35
  st.header("Analytics Vidya Free Courses", anchor=False)
 
36
 
37
  def sending_keyword(keyword):
38
+ """
39
+ Sends the keyword to the FastAPI endpoint and retrieves the course search results.
40
+
41
+ Args:
42
+ keyword (str): The keyword to search for courses.
43
+
44
+ Returns:
45
+ str: The response from the API containing the search results.
46
+ """
47
+ try:
48
+ response = requests.get("http://0.0.0.0:8000/get_courses", params={"keyword": keyword}).json()
49
+ return response['results']
50
+ except Exception as e:
51
+ return f"Error: Unable to connect to the API. Details: {e}"
52
+
53
+ # Default response message
54
+ response = "This is a search engine project created for Analytics Vidya Free Courses. " \
55
+ "The project helps users to type any keyword related to the free courses they are looking for."
56
+
57
+ # Sidebar components
58
  with st.sidebar:
 
59
  keyword = st.text_input("Enter course keyword:")
60
+ submit_button = st.button("Search Courses")
 
61
 
62
  if keyword or submit_button:
63
+ st.write(f"Your entered keyword is: {keyword}")
64
  st.snow()
65
 
66
+ # Fetch and display response if a keyword is entered or button is pressed
67
  if keyword or submit_button:
68
+ response = sending_keyword(keyword)
69
 
70
+ # Display the response
71
+ st.markdown(response, unsafe_allow_html=True)
 
src/extract_single_course.py CHANGED
@@ -65,7 +65,7 @@ def extract_course_details(course_url):
65
  if curriculum_section:
66
  items = curriculum_section.find_all('h5')
67
  for idx, item in enumerate(items):
68
- if idx == 13: # Only Showing maximum 12 curriculums to the users
69
  text = "...and many more"
70
  curriculum.append(text)
71
  break
 
65
  if curriculum_section:
66
  items = curriculum_section.find_all('h5')
67
  for idx, item in enumerate(items):
68
+ if idx == 10: # Only Showing maximum 10 curriculums to the users
69
  text = "...and many more"
70
  curriculum.append(text)
71
  break
src/retriever.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.responses import JSONResponse
@@ -10,94 +11,147 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
10
  from dotenv import load_dotenv
11
 
12
  def creating_pinecone_index(embedding, index_name):
 
 
 
 
 
 
 
 
 
 
 
13
  return PineconeVectorStore(embedding=embedding, index_name=index_name)
14
 
 
15
  def retrieve_response_from_pinecone(keyword, k=5):
16
  """
17
- Retrieves the most similar responses from the Pinecone index based on the given query.
18
 
19
  Args:
20
- query (str): The input query used to search the Pinecone index for vectors.
21
- k (int, optional): Indicates top results to choose. Default is 5.
22
 
23
  Returns:
24
- list: A list of results containing the most similar vectors from the Pinecone index.
 
25
  """
26
-
27
  results = pinecone_index.similarity_search(keyword, k=k)
28
  return results
29
 
 
30
  def response_generator(keyword):
31
  """
32
- Generates a response to the given query by retrieving relevant information from the Pinecone index and invoking
33
- a processing chain with llm.
34
 
35
  Args:
36
- query (str): The user's input or question that will be used to retrieve relevant information and generate a response.
37
 
38
  Returns:
39
- str: The generated response to the query, either based on the retrieved information or an error messageif the process fails.
 
40
  """
41
-
42
  try:
 
43
  results = retrieve_response_from_pinecone(keyword, 5)
44
- print("results", results)
45
 
46
- # Generating a response by invoking the chain with retrieved content and the original query
47
  answer = chain.invoke(input={"keyword": keyword, "details": results})
48
  except Exception as e:
49
- # Returning an error message if any exception occurs
50
- answer = f"Sorry, I am unable to find the answer to your query. Please try again later. The error is {e}"
 
 
 
51
 
52
  return answer
53
 
 
 
54
  app = FastAPI()
55
 
 
56
  app.add_middleware(
57
  CORSMiddleware,
58
- allow_origins=["*"],
59
  allow_credentials=True,
60
- allow_methods=["*"],
61
- allow_headers=["*"],
62
  )
63
 
64
- @app.get("/")
65
- def root():
66
-
67
- return "HELLO"
68
-
69
  @app.get("/get_courses")
70
  def get_courses(keyword: str):
 
 
 
 
 
 
 
 
 
 
 
71
  if keyword.strip() == "":
72
- return JSONResponse(content={"results": "Please provide a valid keyword to search for the courses and upscale your knowledge."})
 
 
 
 
73
  else:
74
- print("Keyword to searh: ", keyword)
 
 
 
75
  results = response_generator(keyword)
 
 
76
  return JSONResponse(content={"results": results})
77
 
78
 
79
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  load_dotenv()
81
 
 
82
  embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
83
 
 
84
  llm = GoogleGenerativeAI(model="gemini-1.5-flash")
85
 
 
86
  index_name = "analytics-vidya-free-courses"
87
 
 
88
  pinecone_index = creating_pinecone_index(embedding, index_name)
89
 
 
90
  template = ChatPromptTemplate([
91
- ("system", "You are a search engine for finding free courses from Analytics Vidya. Course details will be provided to you"),
92
- ("system", "You will be provided with course title, course link, description and course curriculum. Show the curriculum in a bullet points format"),
93
- ("system", "Handle the details wisely and give the output in a proper format. Respond only with the course details in a tabular or descriptive markdown format as it suites"),
94
  ("human", "Give me some detail related to this keyword : {keyword}"),
95
  ("human", "These are the details of courses : {details}")
96
  ])
97
 
98
- # Setting up the document processing chain for response generation based on retrieved documents
99
  chain = create_stuff_documents_chain(llm, template, document_variable_name="details")
100
 
101
- # Starting the FastAPI server with Uvicorn, accessible at 0.0.0.0 on port 8000
102
- import uvicorn
103
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
+ import uvicorn
2
  from fastapi import FastAPI
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.responses import JSONResponse
 
11
  from dotenv import load_dotenv
12
 
13
  def creating_pinecone_index(embedding, index_name):
14
+ """
15
+ Creates and returns a Pinecone index for storing and retrieving document embeddings.
16
+
17
+ Args:
18
+ embedding (object): The embedding model used to generate vector representations of documents.
19
+ index_name (str): The name of the Pinecone index.
20
+
21
+ Returns:
22
+ PineconeVectorStore: An instance of the Pinecone vector store initialized with the specified embedding model and index name.
23
+
24
+ """
25
  return PineconeVectorStore(embedding=embedding, index_name=index_name)
26
 
27
+
28
  def retrieve_response_from_pinecone(keyword, k=5):
29
  """
30
+ Retrieving the top `k` most similar results from the Pinecone index for a given keyword.
31
 
32
  Args:
33
+ keyword (str): The keyword or query to search for in the Pinecone index.
34
+ k (int, optional): The number of top similar results to retrieve. Defaults to 5.
35
 
36
  Returns:
37
+ list: A list of results retrieved from the Pinecone index, ranked by similarity.
38
+
39
  """
 
40
  results = pinecone_index.similarity_search(keyword, k=k)
41
  return results
42
 
43
+
44
  def response_generator(keyword):
45
  """
46
+ Generates a response based on the given keyword by retrieving related information
47
+ from a Pinecone index and invoking a language model chain.
48
 
49
  Args:
50
+ keyword (str): The keyword or query to search in the Pinecone index.
51
 
52
  Returns:
53
+ str: The generated response or an error message if the process fails.
54
+
55
  """
 
56
  try:
57
+ # Retrieve top 5 relevant results from Pinecone
58
  results = retrieve_response_from_pinecone(keyword, 5)
59
+ print("results:", results)
60
 
61
+ # Generate response using the language model chain
62
  answer = chain.invoke(input={"keyword": keyword, "details": results})
63
  except Exception as e:
64
+ # Handle exceptions and return an error message
65
+ answer = (
66
+ f"Sorry, I am unable to find the answer to your query. "
67
+ f"Please try again later. The error is: {e}"
68
+ )
69
 
70
  return answer
71
 
72
+
73
+ # Initialize FastAPI application
74
  app = FastAPI()
75
 
76
+ # Add CORS middleware to allow cross-origin requests
77
  app.add_middleware(
78
  CORSMiddleware,
79
+ allow_origins=["*"], # Allow all origins
80
  allow_credentials=True,
81
+ allow_methods=["*"], # Allow all HTTP methods
82
+ allow_headers=["*"], # Allow all headers
83
  )
84
 
 
 
 
 
 
85
  @app.get("/get_courses")
86
  def get_courses(keyword: str):
87
+ """
88
+ Endpoint to fetch course recommendations based on a keyword.
89
+
90
+ Args:
91
+ keyword (str): The search keyword provided by the user.
92
+
93
+ Returns:
94
+ JSONResponse: A JSON response containing the results or an appropriate message.
95
+
96
+ """
97
+ # Check if the keyword is empty
98
  if keyword.strip() == "":
99
+ return JSONResponse(
100
+ content={
101
+ "results": "Please provide a valid keyword to search for the courses and upscale your knowledge."
102
+ }
103
+ )
104
  else:
105
+ # Debugging information
106
+ print("Keyword to search:", keyword)
107
+
108
+ # Generate response using the keyword
109
  results = response_generator(keyword)
110
+
111
+ # Return results in JSON format
112
  return JSONResponse(content={"results": results})
113
 
114
 
115
  if __name__ == "__main__":
116
+ """
117
+ Main script to initialize the necessary components and run the FastAPI application.
118
+
119
+ Workflow:
120
+ 1. Load environment variables from the `.env` file.
121
+ 2. Initialize the embedding model and LLM for processing.
122
+ 3. Create a Pinecone index to store and retrieve course data.
123
+ 4. Define a chat-based prompt template for the LLM chain to format and respond with course details.
124
+ 5. Start the FastAPI application using Uvicorn on host `0.0.0.0` and port `8000`.
125
+
126
+ """
127
+
128
+ # Load environment variables
129
  load_dotenv()
130
 
131
+ # Initialize embedding model
132
  embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
133
 
134
+ # Initialize language model
135
  llm = GoogleGenerativeAI(model="gemini-1.5-flash")
136
 
137
+ # Pinecone index name
138
  index_name = "analytics-vidya-free-courses"
139
 
140
+ # Create Pinecone index
141
  pinecone_index = creating_pinecone_index(embedding, index_name)
142
 
143
+ # Define chat-based prompt template
144
  template = ChatPromptTemplate([
145
+ ("system", "You are a search engine for finding free courses from Analytics Vidya. Course details will be provided to you."),
146
+ ("system", "You will be provided with course title, course link, description, and course curriculum. Show the curriculum in bullet points format."),
147
+ ("system", "Handle the details wisely and give the output in a proper format. Respond only with the course details in a tabular or descriptive markdown format as it suits."),
148
  ("human", "Give me some detail related to this keyword : {keyword}"),
149
  ("human", "These are the details of courses : {details}")
150
  ])
151
 
152
+ # Create a chain for document-based interactions
153
  chain = create_stuff_documents_chain(llm, template, document_variable_name="details")
154
 
155
+ # Run the FastAPI application
 
156
  uvicorn.run(app, host="0.0.0.0", port=8000)
157
+