Difference between revisions of "Data Object JSON v4"
Jump to navigation
Jump to search
(2 intermediate revisions by the same user not shown) | |||
Line 124: | Line 124: | ||
"description": "For dataset objects only, provides an indication of the types of keys used in the dataset, in particular if they are anonymous or pseudonymous", | "description": "For dataset objects only, provides an indication of the types of keys used in the dataset, in particular if they are anonymous or pseudonymous", | ||
"properties": { | "properties": { | ||
− | " | + | "record_keys_type_id": { |
"type": "integer", | "type": "integer", | ||
"description": "An integer referencing the relevant record in the dataset_recordkey_types enumeration / lookup table" | "description": "An integer referencing the relevant record in the dataset_recordkey_types enumeration / lookup table" | ||
}, | }, | ||
− | " | + | "record_keys_type": { |
"type": "string", | "type": "string", | ||
"description": "One of the allowed types, from the dataset_recordkey_types enumeration, e.g. 'Anonymised', or 'Pseudonymised'" | "description": "One of the allowed types, from the dataset_recordkey_types enumeration, e.g. 'Anonymised', or 'Pseudonymised'" | ||
}, | }, | ||
− | " | + | "record_keys_details": { |
"type": "string", | "type": "string", | ||
"description": "Provides further details of the record key types, perhaps referring to dataset preparation, if available" | "description": "Provides further details of the record key types, perhaps referring to dataset preparation, if available" | ||
Line 143: | Line 143: | ||
"description": "For dataset objects only, provides an indication of the level of identifiers in the dataset", | "description": "For dataset objects only, provides an indication of the level of identifiers in the dataset", | ||
"properties": { | "properties": { | ||
− | " | + | "deident_type_id": { |
"type": "integer", | "type": "integer", | ||
"description": "An integer referencing the relevant record in the dataset_identifier_types enumeration / lookup table" | "description": "An integer referencing the relevant record in the dataset_identifier_types enumeration / lookup table" | ||
}, | }, | ||
− | " | + | "deident_type": { |
"type": "string", | "type": "string", | ||
− | "description": "One of the allowed types, from the | + | "description": "One of the allowed types, from the dataset_de-identification_levels enumeration, e.g. 'De-identification applied'" |
}, | }, | ||
− | "": { | + | "deident_direct": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If direct Identifiers were removed from the data set" |
} | } | ||
− | "": { | + | "deident_hipaa": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If the US HIPAA de-identification rules have been applied" |
} | } | ||
− | "": { | + | "deident_dates": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If dates have been rebased and / or replaced by integers" |
} | } | ||
− | "": { | + | "deident_nonarr": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If narrative text fields have been removed" |
} | } | ||
− | "": { | + | "deident_kanon": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "if k-anonymisation (k>=2) has been achieved" |
} | } | ||
− | " | + | "deident_details": { |
"type": "string", | "type": "string", | ||
− | "description": "Provides further details of the | + | "description": "Provides further details of the de-identification of the dataset, perhaps referring to other documents and / or a URL." |
} | } | ||
} | } | ||
Line 182: | Line 182: | ||
"description": "For dataset objects only, provides an indication of the level of explicit consent for secondary use of the data", | "description": "For dataset objects only, provides an indication of the level of explicit consent for secondary use of the data", | ||
"properties": { | "properties": { | ||
− | " | + | "consent_type_id": { |
"type": "integer", | "type": "integer", | ||
"description": "An integer referencing the relevant record in the dataset_consent_types enumeration / lookup table" | "description": "An integer referencing the relevant record in the dataset_consent_types enumeration / lookup table" | ||
}, | }, | ||
− | " | + | "consent_type": { |
"type": "string", | "type": "string", | ||
− | "description": "One of the allowed types, from the dataset_consent_types enumeration, e.g. ' | + | "description": "One of the allowed types, from the dataset_consent_types enumeration, e.g. 'No explicit consent', or 'General research use'" |
}, | }, | ||
− | "": { | + | "consent_noncommercial": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If further use restricted to non-commercial users" |
} | } | ||
− | "": { | + | "consent_geog_restrict": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If any geographical restrictions apply to further use" |
} | } | ||
− | "": { | + | "consent_research_type": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If further use is restricted to specific type(s) of research" |
} | } | ||
− | "": { | + | "consent_genetic_only": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If further research is restricted to genetic studies only" |
} | } | ||
− | "": { | + | "consent_no_methods": { |
"type": "boolean", | "type": "boolean", | ||
− | "description": "" | + | "description": "If the data cannot be used for work that is purely for methodological / algorithmic development" |
} | } | ||
− | " | + | "consents_details": { |
"type": "string", | "type": "string", | ||
− | "description": "Provides further details of the consent for secondary use associated with the dataset, if available" | + | "description": "Provides further details of the consent for secondary use associated with the dataset, if available, and in particular any details associated with the specific restrictions listed above." |
} | } | ||
} | } |
Latest revision as of 09:45, 14 September 2020
The current (version 4) form of the JSON schema for the Data object file is shown below.
This version was created in September 2020.
{ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "http://ecrin.org/json_schemas/mdrdataobject/v4.json", "title": "XDC Data Object definition", "description": "ECRIN Metadata Repository for clinical research objects, Data Object JSON definition, version 4 September 2020", "type": "object", "required": ["id", "data_object_title", "class", "type", "publication_year", "access_type"], "additionalProperties": false, "properties": { "file_type": { "type": "string", "description": "always 'data_object'" }, "id": { "type": "integer", "description": "Internal accession number of the data object within MDR database" }, "doi": { "type": "string", "description": "The doi (without prefixes, beginning with '10...') of the object, if it has one" }, "display_title": { "type": "string", "description": "The title of the object - if a unique title is not part of the source title it may be a composite of the study title and the object type" }, "version": { "type": "string", "description": "Indication of version if there is one, in the format of the source data" }, "object_class": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the object_classes enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the object_classes enumeration, e.g. 'Dataset', or 'Text'" } } }, "object_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the object_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the object_types enumeration, e.g. 'Study Protocol', or 'IPD final analysis dataset (full study population)'" } } }, "publication_year": { "type": "integer", "description": "A four digit number indicating the year the object was published, i.e. became available" }, "language_code": { "type": "array", "items": { "$ref": "#/definitions/lang_code" } }, "managing_organisation": { "$ref": "#/definitions/organisation" }, "access_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the object_access_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the object_access_types enumeration, e.g. 'Public download (self-attestation required)', or 'Case by case download'" } } }, "access_details": { "type": "string", "description": "A textual description of the access being offered, for example identifying the groups to which access is granted, the criteria on which a case-by-case decision would be based, any further restrictions on on-screen access, etc." }, "access_details_url": { "type": "string", "format" : "uri", "description": "A url of a web page that provides details of the accesss available, possibly including the practical details required or a form to use to apply for access" }, "url_last_checked": { "type": "string", "format": "date", "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)" }, "eosc_category": { "type": "integer", "description": "An integer between 0 and 3 inclusive, representing the type of data object / access management required, using a recommended eosc categorisation" }, "dataset_record_keys": { "type": "object", "description": "For dataset objects only, provides an indication of the types of keys used in the dataset, in particular if they are anonymous or pseudonymous", "properties": { "record_keys_type_id": { "type": "integer", "description": "An integer referencing the relevant record in the dataset_recordkey_types enumeration / lookup table" }, "record_keys_type": { "type": "string", "description": "One of the allowed types, from the dataset_recordkey_types enumeration, e.g. 'Anonymised', or 'Pseudonymised'" }, "record_keys_details": { "type": "string", "description": "Provides further details of the record key types, perhaps referring to dataset preparation, if available" } } }, "dataset_identifiers": { "type": "object", "description": "For dataset objects only, provides an indication of the level of identifiers in the dataset", "properties": { "deident_type_id": { "type": "integer", "description": "An integer referencing the relevant record in the dataset_identifier_types enumeration / lookup table" }, "deident_type": { "type": "string", "description": "One of the allowed types, from the dataset_de-identification_levels enumeration, e.g. 'De-identification applied'" }, "deident_direct": { "type": "boolean", "description": "If direct Identifiers were removed from the data set" } "deident_hipaa": { "type": "boolean", "description": "If the US HIPAA de-identification rules have been applied" } "deident_dates": { "type": "boolean", "description": "If dates have been rebased and / or replaced by integers" } "deident_nonarr": { "type": "boolean", "description": "If narrative text fields have been removed" } "deident_kanon": { "type": "boolean", "description": "if k-anonymisation (k>=2) has been achieved" } "deident_details": { "type": "string", "description": "Provides further details of the de-identification of the dataset, perhaps referring to other documents and / or a URL." } } }, "dataset_consents": { "type": "object", "description": "For dataset objects only, provides an indication of the level of explicit consent for secondary use of the data", "properties": { "consent_type_id": { "type": "integer", "description": "An integer referencing the relevant record in the dataset_consent_types enumeration / lookup table" }, "consent_type": { "type": "string", "description": "One of the allowed types, from the dataset_consent_types enumeration, e.g. 'No explicit consent', or 'General research use'" }, "consent_noncommercial": { "type": "boolean", "description": "If further use restricted to non-commercial users" } "consent_geog_restrict": { "type": "boolean", "description": "If any geographical restrictions apply to further use" } "consent_research_type": { "type": "boolean", "description": "If further use is restricted to specific type(s) of research" } "consent_genetic_only": { "type": "boolean", "description": "If further research is restricted to genetic studies only" } "consent_no_methods": { "type": "boolean", "description": "If the data cannot be used for work that is purely for methodological / algorithmic development" } "consents_details": { "type": "string", "description": "Provides further details of the consent for secondary use associated with the dataset, if available, and in particular any details associated with the specific restrictions listed above." } } }, "object_identifiers": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the value and type of the identifier, and optionally its date and organisation of origin", "required": ["id", "value", "type"], "properties": { "id": { "type": "integer", "description": "Data object identifier record primary key, generated automatically in database" }, "identifier_value": { "type": "string", "description": "The identifier value, in a standardised format (for each identifier type)" }, "identifier_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the identifier_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the identifier_types enumeration, e.g. 'Trial Registry ID', or 'Sponsor ID'" } } }, "identifier_date": { "type": "string", "description": "The date the identifier was allocated, if known, in a string 'yyyy MMM dd' format, e.g. '2015 Dec 12'" }, "identifier_org": { "$ref": "#/definitions/organisation" } } } }, "object_titles": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the type and value of the 'other title'", "required": ["id", "title_type", "title_text"], "properties": { "id": { "type": "integer", "description": "Other title record primary key, generated automatically in database" }, "title_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the title_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the title_types enumeration, e.g. 'Abbreviation or Acronym', or 'Translated Title'" } } }, "title_text": { "type": "string" }, "lang_code": { "$ref": "#/definitions/lang_code" }, "contains_html": { "type": "boolean" "description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" }, "comments": { "type": "string" } } } }, "object_contributors": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the people and / or organisations that contributed to the data object, directly or indirectly", "required": ["id", "contribution_type"], "properties": { "id": { "type": "integer", "description": "Object contributor record primary key, generated automatically in database" }, "contribution_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the contribution_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the contribution_types enumeration, e.g. 'Creator', or 'Trial Sponsor'" } } }, "is_individual": { "type": "boolean", "description": "Indicates whether the contributor is an individual or an organisation, e.g. pharma company or research network." }, "person": { "type": "object", "description": "A composite object that may include an id, but should have as a minimum either a last_name or full_name included. Not required if there is an organisation entry.", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the people table" }, "family_name": { "type": "string", "description": "The person's family name, the 'key' name under which they are listed - usually the last name in Western cultures" }, "given_name": { "type": "string", "description": "The person's given name, or the first name by which they are nornally known. May be replaced by initials" }, "full_name": { "type": "string", "description": "The person's full name, in the order of first_name last_name" }, "identifier": { "type": "object", "properties": { "person_id": { "type": "string", "description": "The person's identifier within the identifier scheme" }, "scheme_name": { "type": "string", "description": "A recognised identifier scheme name, most commonly ORCID" } } }, "affiliation": { "type": "object", "properties": { "affiliation": { "type": "string", "description": "The person's organisational affiliation as it appears in the source data" }, "org_id": { "type": "string", "description": "The organisation's identifier within the identifier scheme" }, "scheme_name": { "type": "string", "description": "A recognised identifier scheme name" } } } } }, "organisation": { "$ref": "#/definitions/organisation" } } } }, "object_dates": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the date and its type", "required": ["id", "date_type", "is_date_range", "start"], "properties": { "id": { "type": "integer", "description": "Object date record primary key, generated automatically in database" }, "date_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the date_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the date_types enumeration, e.g. 'Available', or 'Updated'" } } }, "is_date_range": { "type": "boolean", "default": false, "description": "If true both start and end dates should be considered, otherwise just the start date" }, "date_as_string": { "type": "string", "description": "String representation of the date, used for all dates including partial dates and date ranges " }, "start_date": { "type": "object", "properties": { "start_year": { "type": "integer", "description": "A 4 digit integer indicating the year" }, "start_month": { "type": "integer", "description": "An integer indicating the month" }, "start_day": { "type": "integer", "description": "An integer indicating the day" }, } }, "end_date": { "type": "object", "properties": { "end_year": { "type": "integer", "description": "A 4 digit integer indicating the year" }, "end_month": { "type": "integer", "description": "An integer indicating the month" }, "end_day": { "type": "integer", "description": "An integer indicating the day" }, } }, "comments": { "type": "string", "description": "Additional comments or information about the date" } } } }, "object_descriptions": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the description and its type", "required": ["id", "description_type", "description_text"], "properties": { "id": { "type": "integer", "description": "Object description record primary key, generated automatically in database" }, "description_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the description_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the description_types enumeration, e.g. 'Table of Contents', or 'Abstract Section'" } } }, "description_label": { "type": "string", "description": "A short label describing the nature of the description text" }, "description_text": { "type": "string", "description": "The description text, as it appears in the source data" }, "language_code": { "$ref": "#/definitions/lang_code" }, "contains_html": { "type": "boolean" "description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" }, } } }, "object_instances": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the nature of a specific instance of a data object, e.g. a file or a web page", "required": ["id", "repository_org", "file_type"], "properties": { "id": { "type": "integer", "description": "Object instance record primary key, generated automatically in database" }, "repository_org": { "$ref": "#/definitions/organisation" }, "url":{ "type": "string", "format": "uri", "description": "The url where the object can be accessed, if one exists" }, "url_direct_access":{ "type": "boolean", "description": "Whether or not the access via the url is public and freely available" }, "url_last_checked": { "type": "string", "format": "date", "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)" }, "resource_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the file_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the file_types enumeration, e.g. 'PDF', or 'Excel Spreadsheet(s)'" } } }, "resource_size":{ "type": "number", "description": "A number (may be a real or an integer number) indicating the size of the resource" }, "resource_size_unit":{ "type": "string", "description": "One of the allowed types, from the size_units enumeration / look up table, e.g. 'Mb', or 'Pages'" }, "resource_comment":{ "type": "string", "description": "Holds further details of the resource, in particular to support machine processing. These could include the schema used for XML files, and / or the character coding used for text files (e.g. UTF-8 versus UTF-16) or the presence and types of any byte order marks." } } } }, "object_topics": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates the topic name or keyword, and - if applicable - how it was classified in the source data, the controlled terminology system used, and the code for the topic in that system", "required": ["id", "value"], "properties": { "id": { "type": "integer", "description": "Object topic record primary key, generated automatically in database" }, "topic_value": { "type": "string", "description": "The topic name, as provided in the source data" }, "topic_source_type": { "type": "object", "description": "How the topic was categorised in the source data, (or by using the controlled terminology, or using a category matching service)", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the topic_source_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the topic_source_types enumeration, e.g. 'Condition', or 'Organism'" } } }, "topic_ct": { "type": "object", "description": "If applicable, the controlled terminology system from which the topic name was taken", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the topic_controlled_terminologies enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the topic_controlled_terminologies enumeration, e.g. 'MESH', or 'Cochrane PICO terminology'" } } }, "topic_ct_code": { "type": "string", "description": "The code for the topic within the designated controlled terminology scheme" } } } }, "object_rights": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates any usage and copyright rights associated with the data object", "required": ["id", "details"], "properties": { "id":{ "type": "integer", "description": "Object rights record primary key, generated automatically in database" }, "details":{ "type": "string", "description": "The name of the rights applied, possibly including a detailed description" }, "rights_url":{ "type": "string", "format": "uri", "description": "The url where details of the rights applied can be found, e.g. defintition of creative commons variant" } } } }, "related_objects": { "type": "array", "items": { "type": "object", "description": "A composite object that indicates any related data object and the nature of the relationship", "required": ["id", "relationship_type", "target_object_id"], "properties": { "id": { "type": "integer", "description": "Object relationships record primary key, generated automatically in database" }, "relationship_type": { "type": "object", "properties": { "id": { "type": "integer", "description": "An integer referencing the relevant record in the object_relationship_types enumeration / lookup table" }, "name": { "type": "string", "description": "One of the allowed types, from the object_relationship_types enumeration, e.g. 'Is metadata for', or 'Is supplement to'" } } }, "target_object_id": { "type": "integer", "description": "The id, i.e. internal accession number, of the related object" } } } }, "related_studies": { "type": "array", "items": { "type": "integer", "description": "The id, i.e. internal accession number, of the linked study" } }, "provenance_data": { "type": "string", "description": "A listing of the source or sources from which the data for the data object has been drawn, and the date-time(s) when the data was last downloaded" }, }, "definitions": { "lang_code": { "type": "string", "minLength": 2, "maxLength": 2, "description": "A two letter ISO 369-1 code indicating the language of the study", "default": "en" }, "organisation": { "type": "object", "required": ["name"], "properties": { "id": { "type": "integer", "description": "The id of the organisation within the ECRIN contextual database, if that id exists" }, "name": { "type": "array", "description": "Either the name of the organisation as supplied by the source data, or the names (may be multiple) of the organisation within the ECRIN contextual database", "minItems": 1, "items": { "type": "string" } } } } } }