Files changed (1) hide show
  1. rm.py +129 -100
rm.py CHANGED
@@ -6,73 +6,105 @@ from flask import Flask, request, jsonify
6
  from flask_cors import CORS
7
  from google import genai
8
  import json
 
 
9
 
10
-
11
-
12
-
13
- f_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
14
  app = Flask(__name__)
 
 
 
 
 
15
  CORS(app)
16
 
17
- client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
18
-
19
 
20
  SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")
21
 
22
- def get_google_scholar_results(key_params: dict):
23
- key_params['api_key'] = os.getenv("SERPAPI_API_KEY")
 
 
 
 
 
24
  key_params['engine'] = "google_scholar"
25
  key_params['hl'] = "en"
26
  search = GoogleSearch(key_params)
27
  results = search.get_dict()
28
- if "profiles" in results and "organic_results" in results:
29
- return results["profiles"],results["organic_results"]
30
- elif "profiles" in results:
31
- return results["profiles"],None
32
- elif "organic_results" in results:
33
- return None,results["organic_results"]
34
- else:
35
- return None,None
36
 
37
- def get_results(query: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  '''
39
- This function is used to get the results from the Google Scholar API.
40
- It takes a query as input and returns a list of dictionaries, each containing the information about a paper/author.
41
- The keys of the dictionaries are the fields of the paper.
 
 
42
 
43
- Keys of the dictionary are:
44
- dict_keys(['position', 'title', 'result_id', 'link', 'snippet', 'publication_info', 'resources', 'inline_links'])
 
 
45
  '''
46
- params = {
47
- "q": query,
48
- }
 
 
 
49
 
50
- answer = []
 
51
 
52
- profiles,result = get_google_scholar_results(params)
53
- keys = result[0].keys()
54
- for i in range(len(result)):
55
- output = {}
56
- if "title" in result[i]:
57
- output["title"] = result[i]["title"]
58
- if "result_id" in result[i]:
59
- output["result_id"] = result[i]["result_id"]
60
- if "link" in result[i]:
61
- output["link"] = result[i]["link"]
62
- print(f"\n {output['link']}")
63
- if "https://www.annualreviews" in result[i]["link"]:
64
- output["abstract"] = get_abstract(result[i]["link"])
65
- if "snippet" in result[i]:
66
- output["snippet"] = result[i]["snippet"]
67
- if "publication_info" in result[i]:
68
- output["publication_info"] = result[i]["publication_info"]
69
- if "resources" in result[i]:
70
- output["resources"] = result[i]["resources"]
71
-
72
- answer.append(output)
73
-
74
-
75
- return profiles,answer,keys
 
76
 
77
  def get_abstract(url: str):
78
  scrape_result = f_app.scrape_url(url, formats=['markdown', 'html'])
@@ -80,17 +112,13 @@ def get_abstract(url: str):
80
  offset = scrape_result.html.find("Abstract")
81
  start = scrape_result.html[offset:].find("<p>")
82
  end = scrape_result.html[offset+start:].find("</p>")
83
- print(offset,start,end,sep="\t")
84
  print(f"\n {scrape_result.html[offset+start:offset+start+end]}")
85
  return scrape_result.html[offset+start:offset+start+end]
86
  else:
87
  return "Abstract not found"
88
 
89
  def scrape_web(url:str):
90
- '''
91
- This function is used inorder to scrape any websitye based on its url
92
- Returns the html code of the webpage
93
- '''
94
  scrape_result = f_app.scrape_url(url, formats=['markdown', 'html'])
95
  return scrape_result.html
96
 
@@ -101,22 +129,27 @@ def get_response(chat_client,user):
101
  def convert_to_json(text):
102
  start = text.find("{")
103
  end = text[::-1].find("}")
104
- json_text = text[start : -end]
105
  try:
106
  return json.loads(json_text)
107
  except Exception as e:
108
  return "Json Parse Error due to " + str(e)
109
 
110
- def get_observation(function,inp):
111
  functions = ["get_results","scrape_web"]
112
  if function == functions[0]:
113
- profiles,answer,keys = get_results(inp)
 
 
 
 
 
114
  out_dict = {
115
  "state" : "OBSERVATION",
116
  "observation" : {
117
- "profiles" : profiles,
118
- "answer" : answer,
119
- "keys" : keys
120
  }
121
  }
122
  elif function == functions[1]:
@@ -124,81 +157,77 @@ def get_observation(function,inp):
124
  out_dict = {
125
  "state" : "OBSERVATION",
126
  "observation" : {
127
- "html_text" : html_text
128
  }
129
  }
130
  else:
131
  out_dict = {
132
  "state" : "OBSERVATION",
133
  "observation" : {
134
- "message":"Function Not found, Please Retry"
135
  }
136
  }
137
  return out_dict
138
 
139
- def get_output(chat_client,inp):
140
- response = get_response(chat_client,str(inp))
141
  output = convert_to_json(response)
142
- while output["state"] != "OUTPUT":
143
- if output["state"] == "PLAN":
144
- response = get_response(chat_client,str(output))
145
  output = convert_to_json(response)
146
  print(output)
147
- elif output["state"] == "CALL":
148
- function = output["function_name"]
149
- for i in output["params"].keys():
150
- inp = output["params"][i]
151
- print(inp)
152
- obs = get_observation(function,inp)
153
- response = get_response(chat_client,str(obs))
 
 
 
154
  output = convert_to_json(response)
155
  print(output)
156
- elif output["state"] == "OBSERVATION":
157
- response = get_response(chat_client,str(output))
158
  output = convert_to_json(response)
159
  print(output)
160
  else:
161
- response = get_response(chat_client,str(output))
162
  output = convert_to_json(response)
163
  print(output)
164
  return output
165
-
166
-
167
-
168
 
169
  def chat(query: str):
170
  chat_client = client.chats.create(
171
  model="gemini-2.5-flash"
172
  )
173
- response = get_response(chat_client,SYSTEM_PROMPT)
174
  inp = {
175
- "state" : "START",
176
- "user" : query
177
  }
178
-
179
- output = get_output(chat_client,inp)
180
  return output["output"]
181
 
182
  @app.route("/",methods=["GET"])
183
  def default():
184
  return jsonify({"message": "Backend Working Successfully"})
185
 
186
- @app.route("/chat",methods=["POST","GET"])
187
  def get_chat_results():
188
- query = request.json.get("query")
189
- output = chat(query)
190
- return jsonify({"output":output})
191
-
192
-
193
-
194
-
195
-
196
-
197
-
198
-
199
-
200
-
201
 
 
 
202
 
 
 
203
 
204
 
 
6
  from flask_cors import CORS
7
  from google import genai
8
  import json
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
 
12
+ f_app = FirecrawlApp(api_key="fc-518f9c40ee8d4bb18a18c9a79bafba24")
 
 
 
13
  app = Flask(__name__)
14
+ app.config['JSON_SORT_KEYS'] = False # To keep the order of keys in JSON response
15
+ app.config['CORS_HEADERS'] = 'Content-Type'
16
+ app.config['CORS_RESOURCES'] = {r"/*": {"origins": "*"}}
17
+ app.config['CORS_SUPPORTS_CREDENTIALS'] = True
18
+ app.config['CORS_METHODS'] = ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS']
19
  CORS(app)
20
 
21
+ client = genai.Client(api_key="AIzaSyDjDn3YjUNgHk_hkBNK1GOEBJjXnZXqLPU")
 
22
 
23
  SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT")
24
 
25
+ def get_google_scholar_results(key_params: dict, location: str | None = None):
26
+ """
27
+ Calls SerpAPI for Google Scholar results. If `location` is provided,
28
+ we additionally filter the returned author profiles to those whose
29
+ affiliations (or related fields) contain the location string.
30
+ """
31
+ key_params['api_key'] = "525fcda2cd63c50dba6f9f5eb2c8a9a09e0722fc9cc00f54c0e2f232b00acd09"
32
  key_params['engine'] = "google_scholar"
33
  key_params['hl'] = "en"
34
  search = GoogleSearch(key_params)
35
  results = search.get_dict()
 
 
 
 
 
 
 
 
36
 
37
+ profiles = results.get("profiles")
38
+ organic = results.get("organic_results")
39
+
40
+ # Optional location-based filtering on profiles
41
+ if profiles and location:
42
+ loc = location.strip().lower()
43
+ filtered = []
44
+ for p in profiles:
45
+ haystack_parts = [
46
+ str(p.get("name", "")),
47
+ str(p.get("affiliations", "")),
48
+ str(p.get("description", "")),
49
+ str(p.get("position", "")),
50
+ str(p.get("link", "")),
51
+ str(p.get("email", "")),
52
+ ]
53
+ haystack = " | ".join(haystack_parts).lower()
54
+ if loc in haystack:
55
+ filtered.append(p)
56
+ profiles = filtered
57
+
58
+ return profiles, organic
59
+
60
+ def get_results(query):
61
  '''
62
+ Location-aware Google Scholar retrieval.
63
+
64
+ Accepts either:
65
+ - a string query, or
66
+ - a dict with keys {"query" or "q", "location" (optional)}
67
 
68
+ Returns:
69
+ profiles: filtered (by location if provided)
70
+ answer: simplified list of organic results (title, link, etc.)
71
+ keys: keys present in the first organic result (if any)
72
  '''
73
+ if isinstance(query, dict):
74
+ q = query.get("query") or query.get("q") or ""
75
+ location = query.get("location")
76
+ else:
77
+ q = str(query)
78
+ location = None
79
 
80
+ q_for_scholar = f"{q} {location}".strip() if location else q
81
+ params = {"q": q_for_scholar}
82
 
83
+ answer = []
84
+ profiles, result = get_google_scholar_results(params, location=location)
85
+ keys = result[0].keys() if result and len(result) > 0 else []
86
+
87
+ if result:
88
+ for item in result:
89
+ output = {}
90
+ if "title" in item:
91
+ output["title"] = item["title"]
92
+ if "result_id" in item:
93
+ output["result_id"] = item["result_id"]
94
+ if "link" in item:
95
+ output["link"] = item["link"]
96
+ print(f"\n {output['link']}")
97
+ if "https://www.annualreviews" in item["link"]:
98
+ output["abstract"] = get_abstract(item["link"])
99
+ if "snippet" in item:
100
+ output["snippet"] = item["snippet"]
101
+ if "publication_info" in item:
102
+ output["publication_info"] = item["publication_info"]
103
+ if "resources" in item:
104
+ output["resources"] = item["resources"]
105
+ answer.append(output)
106
+
107
+ return profiles, answer, keys
108
 
109
  def get_abstract(url: str):
110
  scrape_result = f_app.scrape_url(url, formats=['markdown', 'html'])
 
112
  offset = scrape_result.html.find("Abstract")
113
  start = scrape_result.html[offset:].find("<p>")
114
  end = scrape_result.html[offset+start:].find("</p>")
115
+ print(offset, start, end, sep="\t")
116
  print(f"\n {scrape_result.html[offset+start:offset+start+end]}")
117
  return scrape_result.html[offset+start:offset+start+end]
118
  else:
119
  return "Abstract not found"
120
 
121
  def scrape_web(url:str):
 
 
 
 
122
  scrape_result = f_app.scrape_url(url, formats=['markdown', 'html'])
123
  return scrape_result.html
124
 
 
129
  def convert_to_json(text):
130
  start = text.find("{")
131
  end = text[::-1].find("}")
132
+ json_text = text[start : -end] if end != -1 else text[start:]
133
  try:
134
  return json.loads(json_text)
135
  except Exception as e:
136
  return "Json Parse Error due to " + str(e)
137
 
138
+ def get_observation(function, inp):
139
  functions = ["get_results","scrape_web"]
140
  if function == functions[0]:
141
+ if isinstance(inp, dict):
142
+ q = inp.get("query") or inp.get("q") or ""
143
+ location = inp.get("location")
144
+ profiles, answer, keys = get_results({"query": q, "location": location})
145
+ else:
146
+ profiles, answer, keys = get_results(inp)
147
  out_dict = {
148
  "state" : "OBSERVATION",
149
  "observation" : {
150
+ "profiles" : profiles,
151
+ "answer" : answer,
152
+ "keys" : list(keys) if keys else []
153
  }
154
  }
155
  elif function == functions[1]:
 
157
  out_dict = {
158
  "state" : "OBSERVATION",
159
  "observation" : {
160
+ "html_text" : html_text
161
  }
162
  }
163
  else:
164
  out_dict = {
165
  "state" : "OBSERVATION",
166
  "observation" : {
167
+ "message":"Function Not found, Please Retry"
168
  }
169
  }
170
  return out_dict
171
 
172
+ def get_output(chat_client, inp):
173
+ response = get_response(chat_client, str(inp))
174
  output = convert_to_json(response)
175
+ while isinstance(output, dict) and output.get("state") != "OUTPUT":
176
+ if output.get("state") == "PLAN":
177
+ response = get_response(chat_client, str(output))
178
  output = convert_to_json(response)
179
  print(output)
180
+ elif output.get("state") == "CALL":
181
+ function = output.get("function_name")
182
+ params_obj = output.get("params", {})
183
+ inp_to_fn = params_obj if isinstance(params_obj, dict) and params_obj else None
184
+ if not inp_to_fn:
185
+ for i in params_obj.keys():
186
+ inp_to_fn = params_obj[i]
187
+ print(inp_to_fn)
188
+ obs = get_observation(function, inp_to_fn)
189
+ response = get_response(chat_client, str(obs))
190
  output = convert_to_json(response)
191
  print(output)
192
+ elif output.get("state") == "OBSERVATION":
193
+ response = get_response(chat_client, str(output))
194
  output = convert_to_json(response)
195
  print(output)
196
  else:
197
+ response = get_response(chat_client, str(output))
198
  output = convert_to_json(response)
199
  print(output)
200
  return output
 
 
 
201
 
202
  def chat(query: str):
203
  chat_client = client.chats.create(
204
  model="gemini-2.5-flash"
205
  )
206
+ _ = get_response(chat_client, SYSTEM_PROMPT)
207
  inp = {
208
+ "state" : "START",
209
+ "user" : query
210
  }
211
+ output = get_output(chat_client, inp)
 
212
  return output["output"]
213
 
214
  @app.route("/",methods=["GET"])
215
  def default():
216
  return jsonify({"message": "Backend Working Successfully"})
217
 
218
+ @app.route("/chat", methods=["POST","GET"])
219
  def get_chat_results():
220
+ if request.method == "POST":
221
+ # Expect JSON body
222
+ data = request.get_json(silent=True) or {}
223
+ query = data.get("query")
224
+ else: # GET
225
+ query = request.args.get("query")
 
 
 
 
 
 
 
226
 
227
+ if not query:
228
+ return jsonify({"error": "No query provided"}), 400
229
 
230
+ output = chat(query)
231
+ return jsonify({"output": output})
232
 
233