Source code for hestia_earth.utils

from pkgutil import extend_path
import requests
import os
import json
from hestia_earth.schema import SchemaType

__path__ = extend_path(__path__, __name__)

s3_client = None


# improves speed for connecting on subsequent calls
# TODO: find a better way to do this, like profiling?
[docs]def get_s3_client(): global s3_client import boto3 s3_client = boto3.client('s3') if s3_client is None else s3_client return s3_client
[docs]def non_empty_value(value): """ Return True if the value is not en empty string or an empty list. Parameters ---------- value Either a string, a list, a number or None. """ return value != '' and value is not None and value != []
[docs]def join_args(values): return '&'.join(list(filter(non_empty_value, values))).strip()
[docs]def api_url(): return os.environ.get('API_URL', 'https://api.hestia.earth')
[docs]def search_url(): return os.environ.get('SEARCH_URL', 'https://search.hestia.earth')
[docs]def request_url(base_url: str, **kwargs): args = list(map(lambda key: '='.join([key, str(kwargs.get(key))]) if kwargs.get(key) else None, kwargs.keys())) return f"{base_url}?{join_args(args)}"
[docs]def load_from_bucket(bucket: str, key: str): from botocore.exceptions import ClientError try: return json.loads(get_s3_client().get_object(Bucket=bucket, Key=key)['Body'].read()) except ClientError: return None
[docs]def download_hestia(node_id: str, node_type=SchemaType.TERM, mode=''): """ Download a Node from the Hestia Database. Parameters ---------- node_id The `@id` of the Node. node_type The `@type` of the Node. mode Optional - use `csv` to download as a CSV file, `zip` to download as a ZIP file. Defaults to `JSON`. Returns ------- JSON The `JSON` content of the Node. """ try: return load_from_bucket(os.getenv('AWS_BUCKET'), f"{node_type.value}/{node_id}.jsonld") except ImportError: url = request_url(f"{api_url()}/download", type=node_type.value, id=node_id, mode=mode) return requests.get(url).json() except requests.exceptions.RequestException: return None
[docs]def find_node(node_type: SchemaType, args: dict, limit=10): """ Finds nodes on the Hestia Platform. Parameters ---------- node_type The `@type` of the Node. args Dictionary of key/value to exec search on. Example: use `{'bibliography.title': 'My biblio'}` on a `SchemaType.Source` to find all `Source`s having a `bibliography` with `title` == `My biblio` limit Optional - limit the number of results to return. Returns ------- List[JSON] List of Nodes (as JSON) found. """ headers = {'Content-Type': 'application/json'} query_args = list(map(lambda key: {'match': {key: args.get(key)}}, args.keys())) must = [{'match': {'@type': node_type.value}}] must.extend(query_args) hits = requests.post(search_url(), json.dumps({ 'query': {'bool': {'must': must}}, 'limit': limit, '_source': {'includes': ['name', '@id']} }), headers=headers).json()['hits']['hits'] return list(map(lambda res: res.get('_source'), hits))
[docs]def find_node_exact(node_type: SchemaType, args: dict): """ Finds a single Node on the Hestia Platform. Parameters ---------- node_type The `@type` of the Node. args Dictionary of key/value to exec search on. Example: use `{'bibliography.title': 'My biblio'}` on a `SchemaType.Source` to find all `Source`s having a `bibliography` with `title` == `My biblio` Returns ------- JSON JSON of the node if found, else `None`. """ headers = {'Content-Type': 'application/json'} query_args = list(map(lambda key: {'match': {key: args.get(key)}}, args.keys())) must = [{'match': {'@type': node_type.value}}] must.extend(query_args) hits = requests.post(search_url(), json.dumps({ 'query': {'bool': {'must': must}}, 'limit': 2, '_source': {'includes': ['name', '@id']} }), headers=headers).json()['hits']['hits'] # do not return a duplicate return hits[0].get('_source') if len(hits) == 1 else None