Data Object JSON v4

From ECRIN-MDR Wiki
Jump to navigation Jump to search

The current (version 4) form of the JSON schema for the Data object file is shown below.
This version was created in September 2020.

{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$id": "http://ecrin.org/json_schemas/mdrdataobject/v4.json",
    "title": "XDC Data Object definition",
    "description": "ECRIN Metadata Repository for clinical research objects, Data Object JSON definition, version 4 September 2020",
    "type": "object",
    "required": ["id", "data_object_title", "class", "type", "publication_year", "access_type"],
    "additionalProperties": false,
    "properties": {

        "file_type": {
            "type": "string",
            "description": "always 'data_object'"
        },

        "id": {
            "type": "integer",
            "description": "Internal accession number of the data object within MDR database"
        },
        
        "doi": {
            "type": "string",
            "description": "The doi (without prefixes, beginning with '10...') of the object, if it has one"
        },
        
        "display_title": {
            "type": "string",
            "description": "The title of the object - if a unique title is not part of the source title it may be a composite of the study title and the object type"
        },
        
        "version": {
            "type": "string",
            "description": "Indication of version if there is one, in the format of the source data"
        },
        
        "object_class": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_classes enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_classes enumeration, e.g. 'Dataset', or 'Text'"
                }
            }
        },
        
        "object_type": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_types enumeration, e.g. 'Study Protocol', or 'IPD final analysis dataset (full study population)'"
                }
            }
        },
        
        "publication_year": {
            "type": "integer",
            "description": "A four digit number indicating the year the object was published, i.e. became available"
        },

        "language_code": {
            "type": "array",
            "items": {
                "$ref": "#/definitions/lang_code"
            }
        },
        
        "managing_organisation": {
                "$ref": "#/definitions/organisation"
        },
        
        "access_type": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_access_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_access_types enumeration, e.g. 'Public download (self-attestation required)', or 'Case by case download'"
                }
            }
        },
        
        "access_details": {
            "type": "string",
            "description": "A textual description of the access being offered, for example identifying the groups to which access is granted, the criteria on which a case-by-case decision would be based, any further restrictions on on-screen access, etc."
        },
        
        "access_details_url": {
            "type": "string",
            "format" : "uri",
            "description": "A url of a web page that provides details of the accesss available, possibly including the practical details required or a form to use to apply for access"
        },

        "url_last_checked": {
            "type": "string",
            "format": "date",
            "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)"
        }, 
 
        "eosc_category": {
            "type": "integer",
            "description": "An integer between 0 and 3 inclusive, representing the type of data object / access management required, using a recommended eosc categorisation"
        },  

        "dataset_record_keys": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the types of keys used in the dataset, in particular if they are anonymous or pseudonymous",
            "properties": {
                "record_keys_type_id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_recordkey_types enumeration / lookup table"
                },
                "record_keys_type": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_recordkey_types enumeration, e.g. 'Anonymised', or 'Pseudonymised'"
                },
                "record_keys_details": {
                    "type": "string",
                    "description": "Provides further details of the record key types, perhaps referring to dataset preparation, if available"
                }
            }
        },
        
        "dataset_identifiers": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the level of identifiers in the dataset",
            "properties": {
                "deident_type_id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_identifier_types enumeration / lookup table"
                },
                "deident_type": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_de-identification_levels enumeration, e.g. 'De-identification applied'"
                },
                "deident_direct": {
                    "type": "boolean",
                    "description": "If direct Identifiers were removed from the data set"
                }
                "deident_hipaa": {
                    "type": "boolean",
                    "description": "If the US HIPAA de-identification rules have been applied"
                }
                "deident_dates": {
                    "type": "boolean",
                    "description": "If dates have been rebased and / or replaced by integers"
                }
                "deident_nonarr": {
                    "type": "boolean",
                    "description": "If narrative text fields have been removed"
                }
                "deident_kanon": {
                    "type": "boolean",
                    "description": "if k-anonymisation (k>=2) has been achieved"
                }
                "deident_details": {
                    "type": "string",
                    "description": "Provides further details of the de-identification of the dataset, perhaps referring to other documents and / or a URL."
                }
            }
        },
        
        "dataset_consents": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the level of explicit consent for secondary use of the data",
            "properties": {
                "consent_type_id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_consent_types enumeration / lookup table"
                },
                "consent_type": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_consent_types enumeration, e.g. 'No explicit consent', or 'General research use'"
                },
                "consent_noncommercial": {
                    "type": "boolean",
                    "description": "If further use restricted to non-commercial users"
                }
                "consent_geog_restrict": {
                    "type": "boolean",
                    "description": "If any geographical restrictions apply to further use"
                }
                "consent_research_type": {
                    "type": "boolean",
                    "description": "If further use is restricted to specific type(s) of research"
                }
                "consent_genetic_only": {
                    "type": "boolean",
                    "description": "If further research is restricted to genetic studies only"
                }
                "consent_no_methods": {
                    "type": "boolean",
                    "description": "If the data cannot be used for work that is purely for methodological / algorithmic development"
                }
                "consents_details": {
                    "type": "string",
                    "description": "Provides further details of the consent for secondary use associated with the dataset, if available, and in particular any details associated with the specific restrictions listed above."
                }
            }
        },
        
        "object_identifiers": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the value and type of the identifier, and optionally its date and organisation of origin",
                "required": ["id", "value", "type"],
                "properties": {
		    "id": {
		        "type": "integer",
			"description": "Data object identifier record primary key, generated automatically in database"
		    },

		    "identifier_value": {
		        "type": "string",
			"description": "The identifier value, in a standardised format (for each identifier type)"
		    },

		    "identifier_type": {
			"type": "object",
			"properties": {
			    "id": {
				 "type": "integer",
				 "description": "An integer referencing the relevant record in the identifier_types enumeration / lookup table"
			     },
		             "name": {
				 "type": "string",
				 "description": "One of the allowed types, from the identifier_types enumeration, e.g. 'Trial Registry ID', or 'Sponsor ID'"
			    }
		        }
		   },

		   "identifier_date": {
			"type": "string",
			"description": "The date the identifier was allocated, if known, in a string 'yyyy  MMM dd' format, e.g. '2015 Dec 12'"
		   },

		   "identifier_org": {
			"$ref": "#/definitions/organisation"
		   }
	       }
            }
        },
        
        "object_titles": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the type and value of the 'other title'",
                "required": ["id", "title_type", "title_text"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Other title record primary key, generated automatically in database"
                    },
                    
		    "title_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the title_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the title_types enumeration, e.g. 'Abbreviation or Acronym', or 'Translated Title'"
                            }
                        }
                    },
                   
		    "title_text": {
                        "type": "string"
                    },
                    
		    "lang_code": {
                        "$ref": "#/definitions/lang_code"
                    },
					
	            "contains_html": {
                        "type": "boolean"
			"description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" 
                    },
					
		    "comments": {
                        "type": "string"
                    }
                }
            }
        },
        
        "object_contributors": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the people and / or organisations that contributed to the data object, directly or indirectly",
                "required": ["id", "contribution_type"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object contributor record primary key, generated automatically in database"
                    },

                    "contribution_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the contribution_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the contribution_types enumeration, e.g. 'Creator', or 'Trial Sponsor'"
                            }
                        }
                    },

                    "is_individual": {
                        "type": "boolean",
                        "description": "Indicates whether the contributor is an individual or an organisation, e.g. pharma company or research network."
                    },

                    "person": {
                        "type": "object",
                        "description": "A composite object that may include an id, but should have as a minimum either a last_name or full_name included. Not required if there is an organisation entry.",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the people table"
                            },

                            "family_name": {
                                "type": "string",
                                "description": "The person's family name, the 'key' name under which they are listed - usually the last name in Western cultures"
                            },

                            "given_name": {
                                "type": "string",
                                "description": "The person's given name, or the first name by which they are nornally known. May be replaced by initials"
                            },

                            "full_name": {
                                "type": "string",
                                "description": "The person's full name, in the order of first_name last_name"
                            },

                            "identifier": {
                               "type": "object",
                                    "properties": {
                                         "person_id": {
                                               "type": "string",
                                               "description": "The person's identifier within the identifier scheme"
                                          },

                                          "scheme_name": {
                                                "type": "string",
                                                 "description": "A recognised identifier scheme name, most commonly ORCID"
                                          }
                                    }
                             },  
                     
                             "affiliation": {
                                   "type": "object",
                                    "properties": {
                                         "affiliation": {
                                           "type": "string",
                                           "description": "The person's organisational affiliation as it appears in the source data"
                                         }, 
                                         "org_id": {
                                           "type": "string",
                                           "description": "The organisation's identifier within the identifier scheme"
                                         },
                                         "scheme_name": {
                                           "type": "string",
                                           "description": "A recognised identifier scheme name"
                                         }
                                   }
                            }
                        }
                    }, 
					
                    "organisation": {
                          "$ref": "#/definitions/organisation"
                    }
                }
             }
        },
        
        "object_dates": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the date and its type",
                "required": ["id", "date_type", "is_date_range", "start"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object date record primary key, generated automatically in database"
                    },

                    "date_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the date_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the date_types enumeration, e.g. 'Available', or 'Updated'"
                            }
                        }
                    },

                    "is_date_range": {
                        "type": "boolean",
                        "default": false,
                        "description": "If true both start and end dates should be considered, otherwise just the start date"
                    },

                    "date_as_string": {
                         "type": "string",
                         "description": "String representation of the date, used for all dates including partial dates and date ranges "
                     },

                    "start_date": {
                        "type": "object",
                        "properties": {
                            "start_year": {
                                "type": "integer",
                                "description": "A 4 digit integer indicating the year"
                            },
                            "start_month": {
                                "type": "integer",
                                "description": "An integer indicating the month"
                            },
      
                            "start_day": {
                                "type": "integer",
                                "description": "An integer indicating the day"
                            },
                        }
                    },
                    "end_date": {
                        "type": "object",
                        "properties": {
                            "end_year": {
                                "type": "integer",
                                "description": "A 4 digit integer indicating the year"
                            },
                            "end_month": {
                                "type": "integer",
                                "description": "An integer indicating the month"
                            },
                            "end_day": {
                                "type": "integer",
                                "description": "An integer indicating the day"
                            },
                        }
                    },

                    "comments": {
                        "type": "string",
                        "description": "Additional comments or information about the date"
                    }
                }
            }
        },

        "object_descriptions": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the description and its type",
                "required": ["id", "description_type", "description_text"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object description record primary key, generated automatically in database"
                    },
                    "description_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the description_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the description_types enumeration, e.g. 'Table of Contents', or 'Abstract Section'"
                            }
                        }
                    },

                    "description_label": {
                        "type": "string",
                        "description": "A short label describing the nature of the description text"
                    },

                    "description_text": {
                        "type": "string",
                        "description": "The description text, as it appears in the source data"
                    },

                    "language_code": {
                    "$ref": "#/definitions/lang_code"
                    },

                    "contains_html": {
                        "type": "boolean"
                        "description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" 
                    },
                }
           }
       },

        "object_instances": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the nature of a specific instance of a data object, e.g. a file or a web page",
                "required": ["id", "repository_org", "file_type"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object instance record primary key, generated automatically in database"
                    },
                    "repository_org": {
                           "$ref": "#/definitions/organisation"
                    },
                    "url":{
                        "type": "string",
                        "format": "uri",
                        "description": "The url where the object can be accessed, if one exists"
                    }, 
                    "url_direct_access":{
                        "type": "boolean",
                        "description": "Whether or not the access via the url is public and freely available"
                    }, 
                    "url_last_checked": {
                        "type": "string",
                        "format": "date",
                        "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)"
                    },
                    "resource_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the file_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the file_types enumeration, e.g. 'PDF', or 'Excel Spreadsheet(s)'"
                            }
                        }
                    },
                    "resource_size":{
                        "type": "number",
                        "description": "A number (may be a real or an integer number) indicating the size of the resource"
                    }, 
                    "resource_size_unit":{
                        "type": "string",
                        "description": "One of the allowed types, from the size_units enumeration / look up table, e.g. 'Mb', or 'Pages'"
                    },
                    "resource_comment":{
                        "type": "string",
                        "description": "Holds further details of the resource, in particular to support machine processing. These could include the schema used for XML files, and / or the character coding used for text files (e.g. UTF-8 versus UTF-16) or the presence and types of any byte order marks."
                    }
                }
            }
        },
        
        "object_topics": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the topic name or keyword, and - if applicable - how it was classified in the source data, the controlled terminology system used, and the code for the topic in that system",
                "required": ["id", "value"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object topic record primary key, generated automatically in database"
                    },
                    "topic_value": {
                        "type": "string",
                        "description": "The topic name, as provided in the source data"
                    },
                    "topic_source_type": {
                        "type": "object",
                        "description": "How the topic was categorised in the source data, (or by using the controlled terminology, or using a category matching service)",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the topic_source_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the topic_source_types enumeration, e.g. 'Condition', or 'Organism'"
                            }
                        }
                    },
                    "topic_ct": {
                        "type": "object",
                        "description": "If applicable, the controlled terminology system from which the topic name was taken",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the topic_controlled_terminologies enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the topic_controlled_terminologies enumeration, e.g. 'MESH', or 'Cochrane PICO terminology'"
                            }
                        }
                    },
                    "topic_ct_code": {
                        "type": "string",
                        "description": "The code for the topic within the designated controlled terminology scheme"
                    }
                }
            }
        },
        
        "object_rights": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates any usage and copyright rights associated with the data object",
                "required": ["id", "details"],
                "properties": {
                    "id":{
                        "type": "integer",
                        "description": "Object rights record primary key, generated automatically in database"
                    }, 
                    "details":{
                        "type": "string",
                        "description": "The name of the rights applied, possibly including a detailed description"
                    }, 
                    "rights_url":{
                        "type": "string",
                        "format": "uri",
                        "description": "The url where details of the rights applied can be found, e.g. defintition of creative commons variant"
                    }
                }
            }
        },
        
        "related_objects": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates any related data object and the nature of the relationship",
                "required": ["id", "relationship_type", "target_object_id"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object relationships record primary key, generated automatically in database"
                    },
                    "relationship_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the object_relationship_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the object_relationship_types enumeration, e.g. 'Is metadata for', or 'Is supplement to'"
                            }
                        }
                    },
                    "target_object_id": {
                        "type": "integer",
                        "description": "The id, i.e. internal accession number, of the related object"
                    }
                }
            }
        },    
        
        "related_studies": {
            "type": "array",
            "items": {
                 "type": "integer",
                 "description": "The id, i.e. internal accession number, of the linked study"
            }
        },

        "provenance_data": {
            "type": "string",
            "description": "A listing of the source or sources from which the data for the data object has been drawn, and the date-time(s) when the data was last downloaded"
        },
    },

    "definitions": {

        "lang_code": {
            "type": "string",
            "minLength": 2,
            "maxLength": 2,
            "description": "A two letter ISO 369-1 code indicating the language of the study",
            "default": "en"
        },

        "organisation": {
            "type": "object",
            "required": ["name"],
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "The id of the organisation within the ECRIN contextual database, if that id exists"
                },
                "name": {
                    "type": "array",
                    "description": "Either the name of the organisation as supplied by the source data, or the names (may be multiple) of the organisation within the ECRIN contextual database",
                    "minItems": 1,
                    "items": {
                        "type": "string"
                    }
                }
            }
        }

    }
}