diff --git a/VERSION b/VERSION index 55cc4f6a..ee4c1b2e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.6.15 +2.6.17 diff --git a/entity-api-spec.yaml b/entity-api-spec.yaml index 8641138b..b2f8b578 100644 --- a/entity-api-spec.yaml +++ b/entity-api-spec.yaml @@ -644,7 +644,9 @@ components: - Invalid - Submitted - Incomplete - description: 'One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete' + - Approval + - Retracted + description: 'One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete|Approval|Retracted' title: type: string description: 'The dataset title.' @@ -791,9 +793,6 @@ components: type: string format: uuid description: 'The thumbnail image file previously uploaded to delete. Provide as a string of the file_uuid like: "c35002f9c3d49f8b77e1e2cd4a01803d"' - sub_status: - type: string - description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"' retraction_reason: type: string description: 'Information recorded about why a the dataset was retracted.' @@ -883,7 +882,7 @@ components: - Reorganized - Processing - Submitted - - Imcomplete + - Incomplete description: 'One of: New|Valid|Invalid|Error|Reorganized|Processing|Submitted|Incomplete' validation_message: type: string @@ -1098,8 +1097,10 @@ components: - Hold - Invalid - Submitted - - Imcomplete - description: 'One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete' + - Incomplete + - Approval + - Retracted + description: 'One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete|Approval|Retracted' title: type: string description: 'The Publication title.' diff --git a/src/app.py b/src/app.py index 60a5f960..0bb46f15 100644 --- a/src/app.py +++ b/src/app.py @@ -492,7 +492,7 @@ def get_ancestor_organs(id): public_entity = True if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -981,7 +981,7 @@ def get_entity_provenance(id): if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) else: @@ -1542,7 +1542,7 @@ def create_entity(entity_type): # Only published datasets can have revisions made of them. Verify that that status of the Dataset specified # by previous_revision_uuid is published. Else, bad request error. - if previous_version_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if previous_version_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: bad_request_error(f"The previous_revision_uuid specified for this dataset must be 'Published' in order to create a new revision from it") # If the preceding "additional validations" did not raise an error, @@ -1720,11 +1720,6 @@ def update_entity(id): if ('status' in json_data_dict) and (json_data_dict['status']): has_updated_status = True - # Normalize user provided status - if "sub_status" in json_data_dict: - normalized_status = schema_manager.normalize_status(json_data_dict["sub_status"]) - json_data_dict["sub_status"] = normalized_status - # Get the entity dict from cache if exists # Otherwise query against uuid-api and neo4j to get the entity dict if the id exists entity_dict = query_target_entity(id, user_token) @@ -1940,7 +1935,7 @@ def get_ancestors(id): if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -2147,7 +2142,7 @@ def get_parents(id): if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -2355,7 +2350,7 @@ def get_siblings(id): if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -2385,8 +2380,9 @@ def get_siblings(id): include_revisions = request.args.get('include-old-revisions') if status is not None: status = status.lower() - if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']: - bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive") + allowed_statuses = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'approval', 'retracted'] + if status not in allowed_statuses: + bad_request_error(f"Invalid Dataset Status. Must be one of: {', '.join(allowed_statuses)}") if property_key is not None: property_key = property_key.lower() result_filtering_accepted_property_keys = ['uuid'] @@ -2491,7 +2487,7 @@ def get_tuplets(id): if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -2519,8 +2515,9 @@ def get_tuplets(id): status = request.args.get('status') if status is not None: status = status.lower() - if status not in ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted']: - bad_request_error("Invalid Dataset Status. Must be 'new', 'qa', or 'published' Case-Insensitive") + allowed_statuses = ['new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'approval', 'retracted'] + if status not in allowed_statuses: + bad_request_error(f"Invalid Dataset Status. Must be one of: {', '.join(allowed_statuses)}") if property_key is not None: property_key = property_key.lower() result_filtering_accepted_property_keys = ['uuid'] @@ -2742,7 +2739,7 @@ def get_collections(id): if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}") - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -2850,7 +2847,7 @@ def get_uploads(id): if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): bad_request_error(f"Unsupported entity type of id {id}: {normalized_entity_type}") - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -3166,7 +3163,7 @@ def get_globus_url(id): globus_server_uuid = app.config['GLOBUS_PROTECTED_ENDPOINT_UUID'] access_dir = access_level_prefix_dir(app.config['PROTECTED_DATA_SUBDIR']) dir_path = dir_path + access_dir + group_name + "/" - elif (entity_data_access_level == ACCESS_LEVEL_PROTECTED) and (entity_dict['status'] == 'Published'): + elif (entity_data_access_level == ACCESS_LEVEL_PROTECTED) and (entity_dict['status'] in ['Published', 'Retracted']): globus_server_uuid = app.config['GLOBUS_PUBLIC_ENDPOINT_UUID'] access_dir = access_level_prefix_dir(app.config['PUBLIC_DATA_SUBDIR']) dir_path = dir_path + access_dir + "/" @@ -3299,7 +3296,7 @@ def get_dataset_revision_number(id): bad_request_error("The entity of given id is not a Dataset or Publication") # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required = True) @@ -3404,101 +3401,6 @@ def get_dataset_revision_number(id): # return jsonify(results) -""" -Retract a published dataset with a retraction reason and sub status - -Takes as input a json body with required fields "retracted_reason" and "sub_status". -Authorization handled by gateway. Only token of HuBMAP-Data-Admin group can use this call. - -Technically, the same can be achieved by making a PUT call to the generic entity update endpoint -with using a HuBMAP-Data-Admin group token. But doing this is strongly discouraged because we'll -need to add more validators to ensure when "retracted_reason" is provided, there must be a -"sub_status" filed and vise versa. So consider this call a special use case of entity update. - -Parameters ----------- -id : str - The HuBMAP ID (e.g. HBM123.ABCD.456) or UUID of target dataset - -Returns -------- -dict - The updated dataset details -""" -@app.route('/datasets//retract', methods=['PUT']) -def retract_dataset(id): - if READ_ONLY_MODE: - forbidden_error("Access not granted when entity-api in READ-ONLY mode") - - # Always expect a json body - require_json(request) - - # Parse incoming json string into json data(python dict object) - json_data_dict = request.get_json() - - # Normalize user provided status - if "sub_status" in json_data_dict: - normalized_status = schema_manager.normalize_status(json_data_dict["sub_status"]) - json_data_dict["sub_status"] = normalized_status - - # Use beblow application-level validations to avoid complicating schema validators - # The 'retraction_reason' and `sub_status` are the only required/allowed fields. No other fields allowed. - # Must enforce this rule otherwise we'll need to run after update triggers if any other fields - # get passed in (which should be done using the generic entity update call) - if 'retraction_reason' not in json_data_dict: - bad_request_error("Missing required field: retraction_reason") - - if 'sub_status' not in json_data_dict: - bad_request_error("Missing required field: sub_status") - - if len(json_data_dict) > 2: - bad_request_error("Only retraction_reason and sub_status are allowed fields") - - # Must be a HuBMAP-Data-Admin group token - token = get_user_token(request) - - # Retrieves the neo4j data for a given entity based on the id supplied. - # The normalized entity-type from this entity is checked to be a dataset - # If the entity is not a dataset and the dataset is not published, cannot retract - entity_dict = query_target_entity(id, token) - normalized_entity_type = entity_dict['entity_type'] - - # A bit more application-level validation - # Adding publication to validation 2/17/23 ~Derek Furst - if not schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset'): - bad_request_error("The entity of given id is not a Dataset or Publication") - - # Validate request json against the yaml schema - # The given value of `sub_status` is being validated at this step - try: - schema_manager.validate_json_data_against_schema(json_data_dict, normalized_entity_type, existing_entity_dict = entity_dict) - except schema_errors.SchemaValidationException as e: - # No need to log the validation errors - bad_request_error(str(e)) - - # Execute property level validators defined in schema yaml before entity property update - try: - schema_manager.execute_property_level_validators('before_property_update_validators', normalized_entity_type, request, entity_dict, json_data_dict) - except (schema_errors.MissingApplicationHeaderException, - schema_errors.InvalidApplicationHeaderException, - KeyError, - ValueError) as e: - bad_request_error(e) - - # No need to call after_update() afterwards because retraction doesn't call any after_update_trigger methods - merged_updated_dict = update_entity_details(request, normalized_entity_type, token, json_data_dict, entity_dict) - - complete_dict = schema_manager.get_complete_entity_result(request.args, token, merged_updated_dict) - - # Will also filter the result based on schema - normalized_complete_dict = schema_manager.normalize_entity_result_for_response(complete_dict) - - # Also reindex the updated entity node in elasticsearch via search-api - reindex_entity(entity_dict['uuid'], token) - - return jsonify(normalized_complete_dict) - - """ Retrieve a list of all revisions of a dataset from the id of any dataset in the chain. E.g: If there are 5 revisions, and the id for revision 4 is given, a list of revisions @@ -3548,7 +3450,7 @@ def get_revisions_list(id): bad_request_error("The entity is not a Dataset. Found entity type:" + normalized_entity_type) # Only published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) @@ -3578,7 +3480,7 @@ def get_revisions_list(id): normalized_revisions_list.pop(0) # Also hide the 'next_revision_uuid' of the second last revision from response - if 'next_revision_uuid' in normalized_revisions_list[0]: + if normalized_revisions_list and 'next_revision_uuid' in normalized_revisions_list[0]: normalized_revisions_list[0].pop('next_revision_uuid') # Now all we need to do is to compose the result list @@ -3633,7 +3535,7 @@ def get_associated_organs_from_dataset(id): excluded_fields = schema_manager.get_fields_to_exclude('Sample') public_entity = True # published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) @@ -3694,7 +3596,7 @@ def get_associated_samples_from_dataset(id): bad_request_error("The entity of given id is not a Dataset or Publication") public_entity = True # published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group public_entity = False token = get_user_token(request, non_public_access_required=True) @@ -3754,7 +3656,7 @@ def get_associated_donors_from_dataset(id): bad_request_error("The entity of given id is not a Dataset or Publication") public_entity = True # published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: public_entity = False # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) @@ -3829,7 +3731,7 @@ def get_prov_info_for_dataset(id): bad_request_error("The entity of given id is not a Dataset") # published/public datasets don't require token - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: # Token is required and the user must belong to HuBMAP-READ group token = get_user_token(request, non_public_access_required=True) @@ -4337,14 +4239,14 @@ def paired_dataset(id): if normalized_entity_type != 'Dataset': bad_request_error("The target entity of the specified id is not a Dataset") - if entity_dict['status'].lower() != DATASET_STATUS_PUBLISHED: + if entity_dict['status'].lower() not in [DATASET_STATUS_PUBLISHED, 'retracted']: if not user_in_hubmap_read_group(request): forbidden_error("Access not granted") paired_dataset = app_neo4j_queries.get_paired_dataset(neo4j_driver_instance, uuid, data_type, search_depth) out_list = [] for result in paired_dataset: - if user_in_hubmap_read_group(request) or result['status'].lower() == 'published': + if user_in_hubmap_read_group(request) or result['status'].lower() in ['published', 'retracted']: out_list.append(result['uuid']) if len(out_list) < 1: not_found_error(f"Search for paired datasets of type {data_type} for dataset with id {uuid} returned no results") @@ -4890,7 +4792,7 @@ def _get_entity_visibility(normalized_entity_type, entity_dict): # it can be used along with the user's authorization to determine access. entity_visibility=DataVisibilityEnum.NONPUBLIC if schema_manager.entity_type_instanceof(normalized_entity_type, 'Dataset') and \ - entity_dict['status'].lower() == DATASET_STATUS_PUBLISHED: + entity_dict['status'].lower() in [DATASET_STATUS_PUBLISHED, 'retracted']: entity_visibility=DataVisibilityEnum.PUBLIC elif schema_manager.entity_type_instanceof(normalized_entity_type, 'Collection') and \ 'registered_doi' in entity_dict and \ @@ -4902,12 +4804,10 @@ def _get_entity_visibility(normalized_entity_type, entity_dict): # Get the data_access_level for each Dataset in the Collection from Neo4j collection_dataset_statuses = schema_neo4j_queries.get_collection_datasets_statuses(neo4j_driver_instance ,entity_dict['uuid']) + PUBLIC_STATUSES = {SchemaConstants.DATASET_STATUS_PUBLISHED, "retracted"} - # If the list of distinct statuses for Datasets in the Collection only has one entry, and - # it is 'published', the Collection is public - if len(collection_dataset_statuses) == 1 and \ - collection_dataset_statuses[0].lower() == SchemaConstants.DATASET_STATUS_PUBLISHED: - entity_visibility=DataVisibilityEnum.PUBLIC + if all(status.lower() in PUBLIC_STATUSES for status in collection_dataset_statuses): + entity_visibility = DataVisibilityEnum.PUBLIC elif normalized_entity_type == 'Upload': # Upload entities require authorization to access, so keep the # entity_visibility as non-public, as initialized outside block. diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 33782290..ff2e9b19 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -542,7 +542,7 @@ def get_sorted_revisions(neo4j_driver, uuid): def get_sorted_multi_revisions(neo4j_driver, uuid, fetch_all=True, property_key=False): results = [] - match_case = '' if fetch_all is True else 'AND prev.status = "Published" AND next.status = "Published" ' + match_case = '' if fetch_all is True else 'AND prev.status IN ["Published", "Retracted"] AND next.status IN ["Published", "Retracted"] ' collect_prop = f".{property_key}" if property_key else '' query = ( @@ -1095,7 +1095,7 @@ def get_all_dataset_samples(neo4j_driver, dataset_uuid): def get_sankey_info(neo4j_driver, public_only): public_only_query = " " if public_only: - public_only_query = f"AND toLower(ds.status) = 'published' " + public_only_query = f"AND toLower(ds.status) IN ['published', 'retracted'] " query = (f"MATCH (donor:Donor)-[:ACTIVITY_INPUT]->(organ_activity:Activity)-[:ACTIVITY_OUTPUT]-> " f"(organ:Sample {{sample_category:'organ'}})-[*]->(a:Activity)-[:ACTIVITY_OUTPUT]->(ds:Dataset) " f"WHERE toLower(a.creation_action) = 'create dataset activity' " @@ -1138,7 +1138,7 @@ def get_sankey_info(neo4j_driver, public_only): def get_unpublished(neo4j_driver): query = ( "MATCH (ds:Dataset)<-[*]-(d:Donor) " - "WHERE ds.status <> 'Published' and ds.status <> 'Hold' " + "WHERE NOT ds.status IN ['Published', 'Hold', 'Retracted'] " # specimen_type -> sample_category 12/15/2022 "OPTIONAL MATCH (ds)<-[*]-(s:Sample {sample_category:'organ'}) " "RETURN distinct ds.data_types as data_types, ds.group_name as organization, ds.uuid as uuid, " diff --git a/src/schema/provenance_schema.yaml b/src/schema/provenance_schema.yaml index 3107832f..b013570c 100644 --- a/src/schema/provenance_schema.yaml +++ b/src/schema/provenance_schema.yaml @@ -408,7 +408,7 @@ ENTITIES: - validate_dataset_not_component generated: true indexed: true - description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete" + description: "One of: New|Processing|Published|QA|Error|Hold|Invalid|Submitted|Incomplete|Approval|Retracted" before_create_trigger: set_dataset_status_new after_create_trigger: set_status_history after_update_trigger: update_status @@ -640,18 +640,7 @@ ENTITIES: retraction_reason: type: string indexed: true - before_property_update_validators: - - validate_if_retraction_permitted - - validate_sub_status_provided description: 'Information recorded about why a the dataset was retracted.' - sub_status: - type: string - indexed: true - before_property_update_validators: - - validate_if_retraction_permitted - - validate_retraction_reason_provided - - validate_retracted_dataset_sub_status_value - description: 'A sub-status provided to further define the status. The only current allowable value is "Retracted"' provider_info: type: string indexed: true diff --git a/src/schema/schema_manager.py b/src/schema/schema_manager.py index a7e55595..490867c6 100644 --- a/src/schema/schema_manager.py +++ b/src/schema/schema_manager.py @@ -1470,12 +1470,12 @@ def normalize_entity_type(entity_type): Parameters ---------- status : str - One of the status types: New|Processing|QA|Published|Error|Hold|Invalid + One of the status types: New|Processing|QA|Published|Error|Hold|Invalid|Approval|Retracted Returns ------- string - One of the normalized status types: New|Processing|QA|Published|Error|Hold|Invalid + One of the normalized status types: New|Processing|QA|Published|Error|Hold|Invalid|Approval|Retracted """ def normalize_status(status): if status.lower() == "qa": diff --git a/src/schema/schema_neo4j_queries.py b/src/schema/schema_neo4j_queries.py index 7fb1cf8c..3d88de6c 100644 --- a/src/schema/schema_neo4j_queries.py +++ b/src/schema/schema_neo4j_queries.py @@ -1692,7 +1692,7 @@ def get_component_dataset_uuids(neo4j_driver, uuid): def count_attached_published_datasets(neo4j_driver, entity_type, uuid): query = (f"MATCH (e:{entity_type})-[:ACTIVITY_INPUT|ACTIVITY_OUTPUT*]->(d:Dataset) " # Use the string function toLower() to avoid case-sensetivity issue - f"WHERE e.uuid='{uuid}' AND toLower(d.status) = 'published' " + f"WHERE e.uuid='{uuid}' AND toLower(d.status) IN ['published', 'retracted'] " # COLLECT() returns a list # apoc.coll.toSet() reruns a set containing unique nodes f"RETURN COUNT(d) AS {record_field_name}") diff --git a/src/schema/schema_triggers.py b/src/schema/schema_triggers.py index 811eed0d..f1387dd9 100644 --- a/src/schema/schema_triggers.py +++ b/src/schema/schema_triggers.py @@ -1766,7 +1766,7 @@ def update_status(property_key, normalized_type, request_args, user_token, exist set_status_history(property_key, normalized_type, request_args, user_token, existing_data_dict, new_data_dict) # Only apply to non-published parent datasets - if status.lower() != 'published': + if status.lower() not in ['published', 'retracted']: # Only sync the child component datasets status for Multi-Assay Split component_dataset_uuids = schema_neo4j_queries.get_component_dataset_uuids(schema_manager.get_neo4j_driver_instance(), uuid) diff --git a/src/schema/schema_validators.py b/src/schema/schema_validators.py index 4ec3cb21..70674080 100644 --- a/src/schema/schema_validators.py +++ b/src/schema/schema_validators.py @@ -298,8 +298,9 @@ def halt_DOI_if_unpublished_dataset(property_key, normalized_entity_type, reques # simply get the existing, distinct 'data_access_level' setting for all the Datasets in the Collection distinct_dataset_statuses = schema_neo4j_queries.get_collection_datasets_statuses(neo4j_driver_instance ,existing_data_dict['uuid']) - if len( distinct_dataset_statuses) != 1 or \ - distinct_dataset_statuses[0].lower() != SchemaConstants.DATASET_STATUS_PUBLISHED: + PUBLIC_STATUSES = {SchemaConstants.DATASET_STATUS_PUBLISHED, 'retracted'} + + if not all(status.lower() in PUBLIC_STATUSES for status in distinct_dataset_statuses): raise ValueError(f"Unable to modify existing {existing_data_dict['entity_type']}" f" {existing_data_dict['uuid']} for DOI since it contains unpublished Datasets.") @@ -414,8 +415,9 @@ def validate_application_header_before_property_update(property_key, normalized_ def validate_dataset_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): # Use lowercase for comparison accepted_status_values = [ - 'new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete' + 'new', 'processing', 'published', 'qa', 'error', 'hold', 'invalid', 'submitted', 'incomplete', 'approval' ] + # Retracted intentionally omitted. Status will be set to retracted only on a manual basis new_status = new_data_dict[property_key].lower() if new_status not in accepted_status_values: @@ -426,7 +428,7 @@ def validate_dataset_status_value(property_key, normalized_entity_type, request, # If status == 'Published' already in Neo4j, then fail for any changes at all # Because once published, the dataset should be read-only - if existing_data_dict['status'].lower() == SchemaConstants.DATASET_STATUS_PUBLISHED: + if existing_data_dict['status'].lower() in [SchemaConstants.DATASET_STATUS_PUBLISHED, 'retracted']: raise ValueError(f"The status of this {normalized_entity_type} is already 'Published', status change is not allowed") # HTTP header names are case-insensitive @@ -480,6 +482,7 @@ def validate_status_changed(property_key, normalized_entity_type, request, exist The json data in request body, already after the regular validations """ def validate_if_retraction_permitted(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): + # This validator is currently unused. Keeping it in case we decide we want an api endpoint for retraction at some point. if 'status' not in existing_data_dict: raise KeyError("Missing 'status' key in 'existing_data_dict' during calling 'validate_if_retraction_permitted()' validator method.") @@ -506,27 +509,6 @@ def validate_if_retraction_permitted(property_key, normalized_entity_type, reque raise ValueError("Permission denied, retraction is not allowed") -""" -Validate the sub_status field is also provided when Dataset.retraction_reason is provided on update via PUT - -Parameters ----------- -property_key : str - The target property key -normalized_type : str - Submission -request: Flask request object - The instance of Flask request passed in from application request -existing_data_dict : dict - A dictionary that contains all existing entity properties -new_data_dict : dict - The json data in request body, already after the regular validations -""" -def validate_sub_status_provided(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): - if 'sub_status' not in new_data_dict: - raise ValueError("Missing sub_status field when retraction_reason is provided") - - """ Validate the reaction_reason field is also provided when Dataset.sub_status is provided on update via PUT @@ -548,31 +530,6 @@ def validate_retraction_reason_provided(property_key, normalized_entity_type, re raise ValueError("Missing retraction_reason field when sub_status is provided") -""" -Validate the provided value of Dataset.sub_status on update via PUT - -Parameters ----------- -property_key : str - The target property key -normalized_type : str - Submission -request: Flask request object - The instance of Flask request passed in from application request -existing_data_dict : dict - A dictionary that contains all existing entity properties -new_data_dict : dict - The json data in request body, already after the regular validations -""" -def validate_retracted_dataset_sub_status_value(property_key, normalized_entity_type, request, existing_data_dict, new_data_dict): - # Use lowercase for comparison - accepted_sub_status_values = ['retracted'] - sub_status = new_data_dict[property_key].lower() - - if sub_status not in accepted_sub_status_values: - raise ValueError("Invalid sub_status value of the Dataset to be retracted") - - """ Validate the provided value of Upload.status on update via PUT @@ -1019,7 +976,7 @@ def _validate_application_header(applications_allowed, request_headers): def _is_entity_locked_against_update(existing_entity_dict): entity_type = existing_entity_dict['entity_type'] if entity_type in ['Publication','Dataset']: - if 'status' in existing_entity_dict and existing_entity_dict['status'] == 'Published': + if 'status' in existing_entity_dict and existing_entity_dict['status'] in ['Published', 'Retracted']: raise schema_errors.LockedEntityUpdateException(f"Permission denied to change a published/public {entity_type}.") elif entity_type in ['Donor','Sample']: if 'data_access_level' in existing_entity_dict and existing_entity_dict['data_access_level'] == 'public': diff --git a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml index e75b2935..07bd4e1c 100644 --- a/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml +++ b/src/schema_templating/example-yaml-templates/api-template-test/entity-Template.yaml @@ -600,7 +600,9 @@ x-ref-components: - Error - Hold - Invalid - description: "One of: New|Processing|QA|Published|Error|Hold|Invalid" + - Approval + - Retracted + description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Approval|Retracted" title: type: string description: "The dataset title." diff --git a/src/schema_templating/example-yaml-templates/dataset-schema.yaml b/src/schema_templating/example-yaml-templates/dataset-schema.yaml index 8dbd2218..8e9b914c 100644 --- a/src/schema_templating/example-yaml-templates/dataset-schema.yaml +++ b/src/schema_templating/example-yaml-templates/dataset-schema.yaml @@ -91,7 +91,9 @@ Dataset: - Error - Hold - Invalid - description: "One of: New|Processing|QA|Published|Error|Hold|Invalid" + - Approval + - Retracted + description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Approval|Retracted" title: type: string description: "The dataset title." diff --git a/src/schema_templating/example-yaml-templates/entity-api-spec-TEMPLATE.yaml b/src/schema_templating/example-yaml-templates/entity-api-spec-TEMPLATE.yaml index 8c4515cc..6b06be25 100644 --- a/src/schema_templating/example-yaml-templates/entity-api-spec-TEMPLATE.yaml +++ b/src/schema_templating/example-yaml-templates/entity-api-spec-TEMPLATE.yaml @@ -579,7 +579,9 @@ components: - Error - Hold - Invalid - description: "One of: New|Processing|QA|Published|Error|Hold|Invalid" + - Approval + - Retracted + description: "One of: New|Processing|QA|Published|Error|Hold|Invalid|Approval|Retracted" title: type: string description: "The dataset title."