Spaces:
Paused
Paused
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| import json | |
| import streamlit as st | |
| from pinecone import Pinecone | |
| from utils import get_variable | |
| PINECONE_KEY = get_variable("PINECONE_API_KEY") # app.pinecone.io | |
| PINE_CONE_ENVIRONMENT = get_variable("PINE_CONE_ENVIRONMENT") # app.pinecone.io | |
| def init_pinecone(): | |
| #pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) | |
| pinecone = Pinecone(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) | |
| return pinecone.Index("semsearch") | |
| def index_query(xq, top_k, regions=[], countries=[], index_namespace=""): | |
| logger.debug(f"Getting companies from countries: {countries} ") | |
| filters = [] | |
| if len(regions)>0: | |
| filters.append({'region': {"$in": regions}}) | |
| if len(countries)>0: | |
| filters.append({'country': {"$in": countries}}) | |
| if len(filters)==1: | |
| filters = filters[0] | |
| elif len(filters)>1: | |
| filters = {"$and": filters} | |
| else: | |
| filters = {} | |
| #st.write(filter) | |
| if not 'index' in st.session_state: | |
| st.session_state.index = init_pinecone() | |
| xc = st.session_state.index.query(vector = xq, namespace=index_namespace, top_k=20, filter = filters, include_metadata=True, include_vectors = False) | |
| #xc = st.session_state.index.query(xq, top_k=top_k, include_metadata=True, include_vectors = True) | |
| return xc | |
| def search_index(query, top_k, regions, countries, retriever, index_namespace=""): | |
| xq = retriever.encode([query]).tolist() | |
| try: | |
| xc = index_query(xq, top_k=top_k, regions=regions, countries=countries) | |
| except: | |
| # force reload | |
| Pinecone.init(api_key=PINECONE_KEY, environment=PINE_CONE_ENVIRONMENT) | |
| st.session_state.index = Pinecone.Index("semsearch") | |
| xc = index_query(xq, top_k=top_k, regions=regions, countries=countries, index_namespace=index_namespace) | |
| results = [] | |
| for match in xc['matches']: | |
| #logger.debug(f"Match: {match}") | |
| #answer = reader(question=query, context=match["metadata"]['context']) | |
| score = match['score'] | |
| # if 'type' in match['metadata'] and match['metadata']['type']!='description-webcontent' and scrape_boost>0: | |
| # score = score / scrape_boost | |
| answer = {'score': score, 'metadata': match['metadata']} | |
| answer['id'] = match['id'] | |
| answer["name"] = match["metadata"]['company_name'] | |
| answer["description"] = match["metadata"]['description'] if "description" in match['metadata'] else "" | |
| results.append(answer) | |
| return results | |