Spaces:

aadil732
/

Analytics-Vidya-Free-Courses

Runtime error

App Files Files Community

aadil732 commited on Jan 5, 2025

Commit

aa4d80e

1 Parent(s): 11c12df

Minor Changes

Browse files

Files changed (3) hide show

src/app.py +35 -18
src/extract_single_course.py +1 -1
src/retriever.py +84 -30

src/app.py CHANGED Viewed

@@ -2,53 +2,70 @@ import streamlit as st
 import requests
 def wide_space_default():
     st.set_page_config(
         layout="wide",
         page_title="Search Free Courses",
     )
 wide_space_default()
 css_for_text = """
 <style>
     p, li, strong, ul {
-        font-size: 18px !important;
     }
     h1 {
         font-size: 28px;
     }
     .text {
-        font-size: 22px !important
     }
 </style>
 """
-# Applying the custom CSS for styling
 st.markdown(css_for_text, unsafe_allow_html=True)
 st.header("Analytics Vidya Free Courses", anchor=False)
-# desc = st.write("This is a search engine to search among free courses of Analytics Vidya")
 def sending_keyword(keyword):
-    response =  requests.get("http://0.0.0.0:8000/get_courses", params={"keyword": keyword}).json()
-    return response['results']
-response = "This ia a search engine project created for Analytics Vidya Free Courses. The project helps users to type any keyword related to the free courses they are looking for."
 with st.sidebar:
     keyword = st.text_input("Enter course keyword:")
-    submit_button = st.button("Press me")
     if keyword or submit_button:
-        st.write("Your entered keyword is ", keyword)
         st.snow()
 if keyword or submit_button:
-        response = sending_keyword(keyword)
-st.markdown(response, unsafe_allow_html=True)

 import requests
 def wide_space_default():
+    """
+    Configures the Streamlit page layout to wide and sets the page title.
+    """
     st.set_page_config(
         layout="wide",
         page_title="Search Free Courses",
     )
+# Apply default layout settings
 wide_space_default()
+# Custom CSS styling for Streamlit components
 css_for_text = """
 <style>
     p, li, strong, ul {
+        font-size: 20px !important;
     }
     h1 {
         font-size: 28px;
     }
     .text {
+        font-size: 22px !important;
     }
 </style>
 """
+# Applying custom CSS
 st.markdown(css_for_text, unsafe_allow_html=True)
+# Header for the application
 st.header("Analytics Vidya Free Courses", anchor=False)
 def sending_keyword(keyword):
+    """
+    Sends the keyword to the FastAPI endpoint and retrieves the course search results.
+    Args:
+        keyword (str): The keyword to search for courses.
+    Returns:
+        str: The response from the API containing the search results.
+    """
+    try:
+        response = requests.get("http://0.0.0.0:8000/get_courses", params={"keyword": keyword}).json()
+        return response['results']
+    except Exception as e:
+        return f"Error: Unable to connect to the API. Details: {e}"
+# Default response message
+response = "This is a search engine project created for Analytics Vidya Free Courses. " \
+           "The project helps users to type any keyword related to the free courses they are looking for."
+# Sidebar components
 with st.sidebar:
     keyword = st.text_input("Enter course keyword:")
+    submit_button = st.button("Search Courses")
     if keyword or submit_button:
+        st.write(f"Your entered keyword is: {keyword}")
         st.snow()
+# Fetch and display response if a keyword is entered or button is pressed
 if keyword or submit_button:
+    response = sending_keyword(keyword)
+# Display the response
+st.markdown(response, unsafe_allow_html=True)

src/extract_single_course.py CHANGED Viewed

@@ -65,7 +65,7 @@ def extract_course_details(course_url):
     if curriculum_section:
         items = curriculum_section.find_all('h5')
         for idx, item in enumerate(items):
-            if idx == 13: # Only Showing maximum 12 curriculums to the users
                 text = "...and many more"
                 curriculum.append(text)
                 break

     if curriculum_section:
         items = curriculum_section.find_all('h5')
         for idx, item in enumerate(items):
+            if idx == 10: # Only Showing maximum 10 curriculums to the users
                 text = "...and many more"
                 curriculum.append(text)
                 break

src/retriever.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
@@ -10,94 +11,147 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
 from dotenv import load_dotenv
 def creating_pinecone_index(embedding, index_name):
     return PineconeVectorStore(embedding=embedding, index_name=index_name)
 def retrieve_response_from_pinecone(keyword, k=5):
     """
-    Retrieves the most similar responses from the Pinecone index based on the given query.
     Args:
-        query (str): The input query used to search the Pinecone index for vectors.
-        k (int, optional): Indicates top results to choose. Default is 5.
     Returns:
-        list: A list of results containing the most similar vectors from the Pinecone index.
     """
     results = pinecone_index.similarity_search(keyword, k=k)
     return results
 def response_generator(keyword):
     """
-    Generates a response to the given query by retrieving relevant information from the Pinecone index and invoking
-    a processing chain with llm.
     Args:
-        query (str): The user's input or question that will be used to retrieve relevant information and generate a response.
     Returns:
-        str: The generated response to the query, either based on the retrieved information or an error messageif the process fails.
     """
     try:
         results = retrieve_response_from_pinecone(keyword, 5)
-        print("results", results)
-        # Generating a response by invoking the chain with retrieved content and the original query
         answer = chain.invoke(input={"keyword": keyword, "details": results})
     except Exception as e:
-        # Returning an error message if any exception occurs
-        answer = f"Sorry, I am unable to find the answer to your query. Please try again later. The error is {e}"
     return answer
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-@app.get("/")
-def root():
-    return "HELLO"
 @app.get("/get_courses")
 def get_courses(keyword: str):
     if keyword.strip() == "":
-        return JSONResponse(content={"results": "Please provide a valid keyword to search for the courses and upscale your knowledge."})
     else:
-        print("Keyword to searh: ", keyword)
         results = response_generator(keyword)
         return JSONResponse(content={"results": results})
 if __name__ == "__main__":
     load_dotenv()
     embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
     llm = GoogleGenerativeAI(model="gemini-1.5-flash")
     index_name = "analytics-vidya-free-courses"
     pinecone_index = creating_pinecone_index(embedding, index_name)
     template = ChatPromptTemplate([
-        ("system", "You are a search engine for finding free courses from Analytics Vidya. Course details will be provided to you"),
-        ("system", "You will be provided with course title, course link, description and course curriculum. Show the curriculum in a bullet points format"),
-        ("system", "Handle the details wisely and give the output in a proper format. Respond only with the course details in a tabular or descriptive markdown format as it suites"),
         ("human", "Give me some detail related to this keyword : {keyword}"),
         ("human", "These are the details of courses : {details}")
     ])
-    # Setting up the document processing chain for response generation based on retrieved documents
     chain = create_stuff_documents_chain(llm, template, document_variable_name="details")
-    # Starting the FastAPI server with Uvicorn, accessible at 0.0.0.0 on port 8000
-    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

+import uvicorn
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from dotenv import load_dotenv
 def creating_pinecone_index(embedding, index_name):
+    """
+    Creates and returns a Pinecone index for storing and retrieving document embeddings.
+    Args:
+        embedding (object): The embedding model used to generate vector representations of documents.
+        index_name (str): The name of the Pinecone index.
+    Returns:
+        PineconeVectorStore: An instance of the Pinecone vector store initialized with the specified embedding model and index name.
+    """
     return PineconeVectorStore(embedding=embedding, index_name=index_name)
 def retrieve_response_from_pinecone(keyword, k=5):
     """
+    Retrieving the top `k` most similar results from the Pinecone index for a given keyword.
     Args:
+        keyword (str): The keyword or query to search for in the Pinecone index.
+        k (int, optional): The number of top similar results to retrieve. Defaults to 5.
     Returns:
+        list: A list of results retrieved from the Pinecone index, ranked by similarity.
     """
     results = pinecone_index.similarity_search(keyword, k=k)
     return results
 def response_generator(keyword):
     """
+    Generates a response based on the given keyword by retrieving related information
+    from a Pinecone index and invoking a language model chain.
     Args:
+        keyword (str): The keyword or query to search in the Pinecone index.
     Returns:
+        str: The generated response or an error message if the process fails.
     """
     try:
+        # Retrieve top 5 relevant results from Pinecone
         results = retrieve_response_from_pinecone(keyword, 5)
+        print("results:", results)
+        # Generate response using the language model chain
         answer = chain.invoke(input={"keyword": keyword, "details": results})
     except Exception as e:
+        # Handle exceptions and return an error message
+        answer = (
+            f"Sorry, I am unable to find the answer to your query. "
+            f"Please try again later. The error is: {e}"
+        )
     return answer
+# Initialize FastAPI application
 app = FastAPI()
+# Add CORS middleware to allow cross-origin requests
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
     allow_credentials=True,
+    allow_methods=["*"],  # Allow all HTTP methods
+    allow_headers=["*"],  # Allow all headers
 )
 @app.get("/get_courses")
 def get_courses(keyword: str):
+    """
+    Endpoint to fetch course recommendations based on a keyword.
+    Args:
+        keyword (str): The search keyword provided by the user.
+    Returns:
+        JSONResponse: A JSON response containing the results or an appropriate message.
+    """
+    # Check if the keyword is empty
     if keyword.strip() == "":
+        return JSONResponse(
+            content={
+                "results": "Please provide a valid keyword to search for the courses and upscale your knowledge."
+            }
+        )
     else:
+        # Debugging information
+        print("Keyword to search:", keyword)
+        # Generate response using the keyword
         results = response_generator(keyword)
+        # Return results in JSON format
         return JSONResponse(content={"results": results})
 if __name__ == "__main__":
+    """
+    Main script to initialize the necessary components and run the FastAPI application.
+    Workflow:
+    1. Load environment variables from the `.env` file.
+    2. Initialize the embedding model and LLM for processing.
+    3. Create a Pinecone index to store and retrieve course data.
+    4. Define a chat-based prompt template for the LLM chain to format and respond with course details.
+    5. Start the FastAPI application using Uvicorn on host `0.0.0.0` and port `8000`.
+    """
+    # Load environment variables
     load_dotenv()
+    # Initialize embedding model
     embedding = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
+    # Initialize language model
     llm = GoogleGenerativeAI(model="gemini-1.5-flash")
+    # Pinecone index name
     index_name = "analytics-vidya-free-courses"
+    # Create Pinecone index
     pinecone_index = creating_pinecone_index(embedding, index_name)
+    # Define chat-based prompt template
     template = ChatPromptTemplate([
+        ("system", "You are a search engine for finding free courses from Analytics Vidya. Course details will be provided to you."),
+        ("system", "You will be provided with course title, course link, description, and course curriculum. Show the curriculum in bullet points format."),
+        ("system", "Handle the details wisely and give the output in a proper format. Respond only with the course details in a tabular or descriptive markdown format as it suits."),
         ("human", "Give me some detail related to this keyword : {keyword}"),
         ("human", "These are the details of courses : {details}")
     ])
+    # Create a chain for document-based interactions
     chain = create_stuff_documents_chain(llm, template, document_variable_name="details")
+    # Run the FastAPI application
     uvicorn.run(app, host="0.0.0.0", port=8000)