from pkgutil import extend_path
import requests
import os
import json
from hestia_earth.schema import SchemaType
__path__ = extend_path(__path__, __name__)
s3_client = None
# improves speed for connecting on subsequent calls
# TODO: find a better way to do this, like profiling?
[docs]def get_s3_client():
global s3_client
import boto3
s3_client = boto3.client('s3') if s3_client is None else s3_client
return s3_client
[docs]def non_empty_value(value):
"""
Return True if the value is not en empty string or an empty list.
Parameters
----------
value
Either a string, a list, a number or None.
"""
return value != '' and value is not None and value != []
[docs]def join_args(values): return '&'.join(list(filter(non_empty_value, values))).strip()
[docs]def api_url(): return os.environ.get('API_URL', 'https://api.hestia.earth')
[docs]def search_url(): return os.environ.get('SEARCH_URL', 'https://search.hestia.earth')
[docs]def request_url(base_url: str, **kwargs):
args = list(map(lambda key: '='.join([key, str(kwargs.get(key))]) if kwargs.get(key) else None, kwargs.keys()))
return f"{base_url}?{join_args(args)}"
[docs]def load_from_bucket(bucket: str, key: str):
from botocore.exceptions import ClientError
try:
return json.loads(get_s3_client().get_object(Bucket=bucket, Key=key)['Body'].read())
except ClientError:
return None
[docs]def download_hestia(node_id: str, node_type=SchemaType.TERM, mode=''):
"""
Download a Node from the Hestia Database.
Parameters
----------
node_id
The `@id` of the Node.
node_type
The `@type` of the Node.
mode
Optional - use `csv` to download as a CSV file, `zip` to download as a ZIP file. Defaults to `JSON`.
Returns
-------
JSON
The `JSON` content of the Node.
"""
try:
return load_from_bucket(os.getenv('AWS_BUCKET'), f"{node_type.value}/{node_id}.jsonld")
except ImportError:
url = request_url(f"{api_url()}/download", type=node_type.value, id=node_id, mode=mode)
return requests.get(url).json()
except requests.exceptions.RequestException:
return None
[docs]def find_node(node_type: SchemaType, args: dict, limit=10):
"""
Finds nodes on the Hestia Platform.
Parameters
----------
node_type
The `@type` of the Node.
args
Dictionary of key/value to exec search on. Example: use `{'bibliography.title': 'My biblio'}` on a
`SchemaType.Source` to find all `Source`s having a `bibliography` with `title` == `My biblio`
limit
Optional - limit the number of results to return.
Returns
-------
List[JSON]
List of Nodes (as JSON) found.
"""
headers = {'Content-Type': 'application/json'}
query_args = list(map(lambda key: {'match': {key: args.get(key)}}, args.keys()))
must = [{'match': {'@type': node_type.value}}]
must.extend(query_args)
hits = requests.post(search_url(), json.dumps({
'query': {'bool': {'must': must}},
'limit': limit,
'_source': {'includes': ['name', '@id']}
}), headers=headers).json()['hits']['hits']
return list(map(lambda res: res.get('_source'), hits))
[docs]def find_node_exact(node_type: SchemaType, args: dict):
"""
Finds a single Node on the Hestia Platform.
Parameters
----------
node_type
The `@type` of the Node.
args
Dictionary of key/value to exec search on. Example: use `{'bibliography.title': 'My biblio'}` on a
`SchemaType.Source` to find all `Source`s having a `bibliography` with `title` == `My biblio`
Returns
-------
JSON
JSON of the node if found, else `None`.
"""
headers = {'Content-Type': 'application/json'}
query_args = list(map(lambda key: {'match': {key: args.get(key)}}, args.keys()))
must = [{'match': {'@type': node_type.value}}]
must.extend(query_args)
hits = requests.post(search_url(), json.dumps({
'query': {'bool': {'must': must}},
'limit': 2,
'_source': {'includes': ['name', '@id']}
}), headers=headers).json()['hits']['hits']
# do not return a duplicate
return hits[0].get('_source') if len(hits) == 1 else None