Spaces:

Mr-Thop
/

Research_Management

Sleeping

App Files Files Community

Research_Management / rm.py

Mr-Thop

Update rm.py

6596042 6 months ago

raw

history blame contribute delete

6.09 kB

	from socket import timeout
	from serpapi import GoogleSearch
	import os
	from firecrawl import FirecrawlApp
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	from google import genai
	import json
	import logging




	f_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
	app = Flask(__name__)
	CORS(app)

	client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))


	SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')

	def get_google_scholar_results(key_params: dict):
	key_params['api_key'] = os.getenv("SERPAPI_API_KEY")
	key_params['engine'] = "google_scholar"
	key_params['hl'] = "en"
	search = GoogleSearch(key_params)
	results = search.get_dict()
	if "profiles" in results and "organic_results" in results:
	return results["profiles"],results["organic_results"]
	elif "profiles" in results:
	return results["profiles"],None
	elif "organic_results" in results:
	return None,results["organic_results"]
	else:
	return None,None

	def get_results(query: str):
	'''
	This function is used to get the results from the Google Scholar API.
	It takes a query as input and returns a list of dictionaries, each containing the information about a paper/author.
	The keys of the dictionaries are the fields of the paper.

	Keys of the dictionary are:
	dict_keys(['position', 'title', 'result_id', 'link', 'snippet', 'publication_info', 'resources', 'inline_links'])
	'''
	params = {
	"q": query,
	}

	answer = []
	keys = []

	profiles,result = get_google_scholar_results(params)
	if result:
	keys = result[0].keys()
	for i in range(len(result)):
	output = {}
	if "title" in result[i]:
	output["title"] = result[i]["title"]
	if "result_id" in result[i]:
	output["result_id"] = result[i]["result_id"]
	if "link" in result[i]:
	output["link"] = result[i]["link"]
	if "https://www.annualreviews" in result[i]["link"]:
	output["abstract"] = get_abstract(result[i]["link"])
	if "snippet" in result[i]:
	output["snippet"] = result[i]["snippet"]
	if "publication_info" in result[i]:
	output["publication_info"] = result[i]["publication_info"]
	if "resources" in result[i]:
	output["resources"] = result[i]["resources"]

	answer.append(output)


	return profiles,answer,keys

	def get_abstract(url: str):
	scrape_result = f_app.scrape(url, formats=['markdown', 'html'])
	if "Abstract" in scrape_result.html:
	offset = scrape_result.html.find("Abstract")
	start = scrape_result.html[offset:].find("<p>")
	end = scrape_result.html[offset+start:].find("</p>")
	return scrape_result.html[offset+start:offset+start+end]
	else:
	return "Abstract not found"

	def scrape_web(url:str):
	'''
	This function is used inorder to scrape any websitye based on its url
	Returns the html code of the webpage
	'''
	scrape_result = f_app.scrape(url, formats=['markdown', 'html'])
	return scrape_result.html

	def get_response(chat_client,user):
	response = chat_client.send_message(user)
	return response.candidates[0].content.parts[0].text

	def convert_to_json(text):
	start = text.find("{")
	end = text[::-1].find("}")
	json_text = text[start : -end]
	try:
	return json.loads(json_text)
	except Exception as e:
	return "Json Parse Error due to " + str(e)

	def get_observation(function,inp):
	functions = ["get_results","scrape_web"]
	if function == functions[0]:
	profiles,answer,keys = get_results(inp)
	out_dict = {
	"state" : "OBSERVATION",
	"observation" : {
	"profiles" : profiles,
	"answer" : answer,
	"keys" : keys
	}
	}
	elif function == functions[1]:
	html_text = scrape_web(inp)
	out_dict = {
	"state" : "OBSERVATION",
	"observation" : {
	"html_text" : html_text
	}
	}
	else:
	out_dict = {
	"state" : "OBSERVATION",
	"observation" : {
	"message":"Function Not found, Please Retry"
	}
	}
	return out_dict

	def get_output(chat_client,inp):
	response = get_response(chat_client,str(inp))
	output = convert_to_json(response)
	while output["state"] != "OUTPUT":
	if output["state"] == "PLAN":
	response = get_response(chat_client,str(output))
	output = convert_to_json(response)
	elif output["state"] == "CALL":
	function = output["function_name"]
	for i in output["params"].keys():
	inp = output["params"][i]
	obs = get_observation(function,inp)
	response = get_response(chat_client,str(obs))
	output = convert_to_json(response)
	elif output["state"] == "OBSERVATION":
	response = get_response(chat_client,str(output))
	output = convert_to_json(response)
	else:
	response = get_response(chat_client,str(output))
	output = convert_to_json(response)
	return output




	def chat(query: str):
	chat_client = client.chats.create(
	model="gemini-2.5-flash"
	)
	response = get_response(chat_client,SYSTEM_PROMPT)
	inp = {
	"state" : "START",
	"user" : query
	}

	output = get_output(chat_client,inp)
	return output["output"]


	@app.route("/",methods=["GET"])
	def default():
	return jsonify({"message": "Backend Working Successfully"})

	@app.route("/chat",methods=["POST","GET"])
	def get_chat_results():
	query = request.json.get("query")
	app.logger.info(f"Chat Initiated : {query}")
	output = chat(query)
	app.logger.info("Output Parsed")
	return jsonify({"output":output})