Upload 4 files
Browse files- __init__.py +1 -0
- app.py +42 -0
- requirements.txt +7 -0
- utils.py +84 -0
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# This file is intentionally left empty. It indicates that this directory is a package.
|
app.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from flask import Flask, request,jsonify
|
| 3 |
+
import os
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
import utils as ut
|
| 6 |
+
# Load environment variables from .env file
|
| 7 |
+
|
| 8 |
+
app = Flask(__name__)
|
| 9 |
+
|
| 10 |
+
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
|
| 11 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 12 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 13 |
+
|
| 14 |
+
@app.route('/query', methods=['POST'])
|
| 15 |
+
def query():
|
| 16 |
+
"""
|
| 17 |
+
Handles the POST request to '/query'. Extracts the query from the request,
|
| 18 |
+
processes it through the search, concatenate, and generate functions,
|
| 19 |
+
and returns the generated answer.
|
| 20 |
+
"""
|
| 21 |
+
if request.is_json:
|
| 22 |
+
# If the content type is application/json
|
| 23 |
+
query = request.json.get('query')
|
| 24 |
+
# get the data/query from streamlit app
|
| 25 |
+
print("Received query: ", query)
|
| 26 |
+
|
| 27 |
+
# Step 1: Search and scrape articles based on the query
|
| 28 |
+
print("Step 1: searching articles")
|
| 29 |
+
articles=ut.search_articles(query)
|
| 30 |
+
print(articles)
|
| 31 |
+
# Step 2: Concatenate content from the scraped articles
|
| 32 |
+
print("Step 2: concatenating content")
|
| 33 |
+
extracted_content = ut.fetch_article_content(articles)
|
| 34 |
+
print(extracted_content)
|
| 35 |
+
# Step 3: Generate an answer using the LLM
|
| 36 |
+
print("Step 3: generating answer")
|
| 37 |
+
llm_response = ut.generate_answer(extracted_content,query)
|
| 38 |
+
# return the jsonified text back to streamlit
|
| 39 |
+
return jsonify({"response": llm_response})
|
| 40 |
+
|
| 41 |
+
if __name__ == '__main__':
|
| 42 |
+
app.run(host='localhost', port=5001)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python-dotenv
|
| 2 |
+
flask
|
| 3 |
+
# openai==1.35.15
|
| 4 |
+
requests
|
| 5 |
+
streamlit
|
| 6 |
+
beautifulsoup4
|
| 7 |
+
google-generativeai
|
utils.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
import json
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
|
| 6 |
+
# Load API keys from environment variables
|
| 7 |
+
SERPER_API_KEY = os.getenv('X-API-KEY')
|
| 8 |
+
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
|
| 9 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
| 10 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
| 11 |
+
|
| 12 |
+
def search_articles(query:str):
|
| 13 |
+
"""
|
| 14 |
+
Searches for articles related to the query using Serper API.
|
| 15 |
+
Returns a list of dictionaries containing article URLs, headings, and text.
|
| 16 |
+
"""
|
| 17 |
+
articles = None
|
| 18 |
+
# implement the search logic - retrieves articles
|
| 19 |
+
url = "https://google.serper.dev/search"
|
| 20 |
+
|
| 21 |
+
payload = json.dumps({
|
| 22 |
+
"q":query
|
| 23 |
+
})
|
| 24 |
+
headers = {
|
| 25 |
+
'X-API-KEY': SERPER_API_KEY,
|
| 26 |
+
'Content-Type': 'application/json'
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
response = requests.request("POST", url, headers=headers, data=payload)
|
| 30 |
+
articles = response.text
|
| 31 |
+
return articles
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def fetch_article_content(articles):
|
| 35 |
+
"""
|
| 36 |
+
Fetches the article content, extracting headings and text.
|
| 37 |
+
"""
|
| 38 |
+
content = ""
|
| 39 |
+
# implementation of fetching headings and content from the articles
|
| 40 |
+
data = json.loads(articles)
|
| 41 |
+
|
| 42 |
+
# Extract from answerBox if it exists
|
| 43 |
+
if 'answerBox' in data:
|
| 44 |
+
if 'title' in data['answerBox']:
|
| 45 |
+
content += data['answerBox']['title']+"\n"
|
| 46 |
+
if 'snippet' in data['answerBox']:
|
| 47 |
+
content += data['answerBox']['snippet']+"\n"
|
| 48 |
+
|
| 49 |
+
# Extract from organic search results
|
| 50 |
+
if 'organic' in data:
|
| 51 |
+
for result in data['organic']:
|
| 52 |
+
if 'title' in result:
|
| 53 |
+
content += result['title']+"\n"
|
| 54 |
+
if 'snippet' in result:
|
| 55 |
+
content += result['snippet']+"\n"
|
| 56 |
+
|
| 57 |
+
# Extract from peopleAlsoAsk
|
| 58 |
+
if 'peopleAlsoAsk' in data:
|
| 59 |
+
for question in data['peopleAlsoAsk']:
|
| 60 |
+
if 'title' in question:
|
| 61 |
+
content += question['title']+"\n"
|
| 62 |
+
if 'snippet' in question:
|
| 63 |
+
content += question['snippet']+"\n"
|
| 64 |
+
return content.strip()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def generate_answer(content,query):
|
| 68 |
+
"""
|
| 69 |
+
Generates an answer from the concatenated content using GPT-4.
|
| 70 |
+
The content and the user's query are used to generate a contextual answer.
|
| 71 |
+
"""
|
| 72 |
+
# Create the prompt based on the content and the query
|
| 73 |
+
response = None
|
| 74 |
+
system_prompt = f"""You are a helpful assistant. Use the following context to answer the user's query. If the context doesn't contain relevant information, say so.\n
|
| 75 |
+
Below is the context : \n
|
| 76 |
+
{content}\n
|
| 77 |
+
Below is the user query:
|
| 78 |
+
{query}\n
|
| 79 |
+
Based on the user query above and the context given provide with highly accurate response for the user query . You should be very precise about the answer based on the user query and content.
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
response = model.generate_content(system_prompt)
|
| 83 |
+
return response.text
|
| 84 |
+
|