| from elasticsearch import Elasticsearch |
| import os |
| import json |
| import requests |
|
|
| ES_URL = os.environ["ES_URL"] |
| ES_USER = os.environ["ES_USER"] |
| ES_PASS = os.environ["ES_PASS"] |
| ES_CA_CERT = os.environ["ES_CA_CERT"] |
|
|
|
|
| class ESGPT: |
| def __init__(self, index_name): |
| self.es = Elasticsearch(ES_URL, http_auth=(ES_USER, ES_PASS), |
| ca_certs=ES_CA_CERT, verify_certs=True) |
| self.index_name = index_name |
| self.model_engine = os.environ["OPENAI_GPT_ENGINE"] |
| self.api_key = os.environ["OPENAI_API_KEY"] |
|
|
| def index(self, doc_id, doc): |
| self.es.index(index=self.index_name, |
| id=doc_id, |
| document=doc) |
|
|
| def search(self, query): |
| body = { |
| "query": { |
| "query_string": {"query": query} |
| } |
| } |
|
|
| results = self.es.search(index=self.index_name, body=body) |
| return results['hits']['hits'] |
|
|
| def _paper_results_to_text(self, results): |
| text_result = "" |
| for paper in results: |
| title = "" |
| if "title" in paper["_source"]: |
| title = paper["_source"]["title"] |
| |
| abstract = "" |
| if "abctract" in paper["_source"]: |
| abstract = paper["_source"]["abstract"] |
| |
| paper_str = f"{title}:\n{abstract[:100]}\n\n" |
| text_result += paper_str |
| return text_result |
|
|
| def summarize(self, query, results): |
| |
| result_json_str = self._paper_results_to_text(results) |
| if result_json_str == "": |
| result_json_str = "No results found" |
|
|
| print(result_json_str[:500]) |
|
|
| body = { |
| "model": self.model_engine, |
| "prompt": f"Please summarize the following search results for query: {query}:\n{result_json_str[:1000]}", |
| "max_tokens": 1000, |
| "n": 1, |
| "stop": None, |
| "temperature": 0.5, |
| "stream": True, |
| } |
|
|
| headers = {"Content-Type": "application/json", |
| "Authorization": f"Bearer {self.api_key}"} |
|
|
| resp = requests.post("https://api.openai.com/v1/completions", |
| headers=headers, |
| data=json.dumps(body), |
| stream=True) |
| return resp |
|
|