Data Object JSON v3

From ECRIN-MDR Wiki
Jump to navigation Jump to search

The current (version 3 form of the JSON schema for the Data object file is shown below.
This version was created in November 2019.

{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "$id": "http://ecrin.org/json_schemas/mdrdataobject/v3.json",
    "title": "XDC Data Object definition",
    "description": "ECRIN Metadata Repository for clinical research objects, Data Object JSON definition, version 3 November 2019, amended August 2020 with the addition of provenance_data and the resource_comments strings",
    "type": "object",
    "required": ["id", "data_object_title", "class", "type", "publication_year", "access_type"],
    "additionalProperties": false,
    "properties": {

        "file_type": {
            "type": "string",
            "description": "always 'data_object'"
        },

        "id": {
            "type": "integer",
            "description": "Internal accession number of the data object within MDR database"
        },
        
        "doi": {
            "type": "string",
            "description": "The doi (without prefixes, beginning with '10...') of the object, if it has one"
        },
        
        "display_title": {
            "type": "string",
            "description": "The title of the object - if a unique title is not part of the source title it may be a composite of the study title and the object type"
        },
        
        "version": {
            "type": "string",
            "description": "Indication of version if there is one, in the format of the source data"
        },
        
        "object_class": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_classes enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_classes enumeration, e.g. 'Dataset', or 'Text'"
                }
            }
        },
        
        "object_type": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_types enumeration, e.g. 'Study Protocol', or 'IPD final analysis dataset (full study population)'"
                }
            }
        },
        
        "publication_year": {
            "type": "integer",
            "description": "A four digit number indicating the year the object was published, i.e. became available"
        },

        "language_code": {
            "type": "array",
            "items": {
                "$ref": "#/definitions/lang_code"
            }
        },
        
        "managing_organisation": {
                "$ref": "#/definitions/organisation"
        },
        
        "access_type": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the object_access_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the object_access_types enumeration, e.g. 'Public download (self-attestation required)', or 'Case by case download'"
                }
            }
        },
        
        "access_details": {
            "type": "string",
            "description": "A textual description of the access being offered, for example identifying the groups to which access is granted, the criteria on which a case-by-case decision would be based, any further restrictions on on-screen access, etc."
        },
        
        "access_details_url": {
            "type": "string",
            "format" : "uri",
            "description": "A url of a web page that provides details of the accesss available, possibly including the practical details required or a form to use to apply for access"
        },

        "url_last_checked": {
            "type": "string",
           "format": "date",
           "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)"
        },  

        "dataset_record_keys": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the types of keys used in the dataset, in particular if they are anonymous or pseudonymous",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_recordkey_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_recordkey_types enumeration, e.g. 'Anonymised', or 'Pseudonymised'"
                },
                "details": {
                    "type": "string",
                    "description": "Provides further details of the record key types, perhaps referring to dataset preparation, if available"
                }
            }
        },
        
        "dataset_identifiers": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the level of identifiers in the dataset",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_identifier_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_identifier_types enumeration, e.g. 'De-identified', or 'Has Direct Identifiers'"
                },
                "details": {
                    "type": "string",
                    "description": "Provides further details of the identifiers in the dataset, perhaps referring to dataset preparation, if available"
                }
            }
        },
        
        "dataset_consents": {
            "type": "object",
            "description": "For dataset objects only, provides an indication of the level of explicit consent for secondary use of the data",
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "An integer referencing the relevant record in the dataset_consent_types enumeration / lookup table"
                },
                "name": {
                    "type": "string",
                    "description": "One of the allowed types, from the dataset_consent_types enumeration, e.g. 'None', or 'Full re-use for research'"
                },
                "details": {
                    "type": "string",
                    "description": "Provides further details of the consent for secondary use associated with the dataset, if available"
                }
            }
        },
        
        "object_identifiers": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the value and type of the identifier, and optionally its date and organisation of origin",
                "required": ["id", "value", "type"],
                "properties": {
		    "id": {
		        "type": "integer",
			"description": "Data object identifier record primary key, generated automatically in database"
		    },

		    "identifier_value": {
		        "type": "string",
			"description": "The identifier value, in a standardised format (for each identifier type)"
		    },

		    "identifier_type": {
			"type": "object",
			"properties": {
			    "id": {
				 "type": "integer",
				 "description": "An integer referencing the relevant record in the identifier_types enumeration / lookup table"
			     },
		             "name": {
				 "type": "string",
				 "description": "One of the allowed types, from the identifier_types enumeration, e.g. 'Trial Registry ID', or 'Sponsor ID'"
			    }
		        }
		   },

		   "identifier_date": {
			"type": "string",
			"description": "The date the identifier was allocated, if known, in a string 'yyyy  MMM dd' format, e.g. '2015 Dec 12'"
		   },

		   "identifier_org": {
			"$ref": "#/definitions/organisation"
		   }
	       }
            }
        },
        
        "object_titles": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the type and value of the 'other title'",
                "required": ["id", "title_type", "title_text"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Other title record primary key, generated automatically in database"
                    },
                    
		    "title_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the title_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the title_types enumeration, e.g. 'Abbreviation or Acronym', or 'Translated Title'"
                            }
                        }
                    },
                   
		    "title_text": {
                        "type": "string"
                    },
                    
		    "lang_code": {
                        "$ref": "#/definitions/lang_code"
                    },
					
	            "contains_html": {
                        "type": "boolean"
			"description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" 
                    },
					
		    "comments": {
                        "type": "string"
                    }
                }
            }
        },
        
        "object_contributors": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the people and / or organisations that contributed to the data object, directly or indirectly",
                "required": ["id", "contribution_type"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object contributor record primary key, generated automatically in database"
                    },

                    "contribution_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the contribution_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the contribution_types enumeration, e.g. 'Creator', or 'Trial Sponsor'"
                            }
                        }
                    },

                    "is_individual": {
                        "type": "boolean",
                        "description": "Indicates whether the contributor is an individual or an organisation, e.g. pharma company or research network."
                    },

                    "person": {
                        "type": "object",
                        "description": "A composite object that may include an id, but should have as a minimum either a last_name or full_name included. Not required if there is an organisation entry.",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the people table"
                            },

                            "family_name": {
                                "type": "string",
                                "description": "The person's family name, the 'key' name under which they are listed - usually the last name in Western cultures"
                            },

                            "given_name": {
                                "type": "string",
                                "description": "The person's given name, or the first name by which they are nornally known. May be replaced by initials"
                            },

                            "full_name": {
                                "type": "string",
                                "description": "The person's full name, in the order of first_name last_name"
                            },

                            "identifier": {
                               "type": "object",
                                    "properties": {
                                         "person_id": {
                                               "type": "string",
                                               "description": "The person's identifier within the identifier scheme"
                                          },

                                          "scheme_name": {
                                                "type": "string",
                                                 "description": "A recognised identifier scheme name, most commonly ORCID"
                                          }
                                    }
                             },  
                     
                             "affiliation": {
                                   "type": "object",
                                    "properties": {
                                         "affiliation": {
                                           "type": "string",
                                           "description": "The person's organisational affiliation as it appears in the source data"
                                         }, 
                                         "org_id": {
                                           "type": "string",
                                           "description": "The organisation's identifier within the identifier scheme"
                                         },
                                         "scheme_name": {
                                           "type": "string",
                                           "description": "A recognised identifier scheme name"
                                         }
                                   }
                            }
                        }
                    }, 
					
                    "organisation": {
                          "$ref": "#/definitions/organisation"
                    }
                }
             }
        },
        
        "object_dates": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the date and its type",
                "required": ["id", "date_type", "is_date_range", "start"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object date record primary key, generated automatically in database"
                    },

                    "date_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the date_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the date_types enumeration, e.g. 'Available', or 'Updated'"
                            }
                        }
                    },

                    "is_date_range": {
                        "type": "boolean",
                        "default": false,
                        "description": "If true both start and end dates should be considered, otherwise just the start date"
                    },

                    "date_as_string": {
                         "type": "string",
                         "description": "String representation of the date, used for all dates including partial dates and date ranges "
                     },

                    "start_date": {
                        "type": "object",
                        "properties": {
                            "start_year": {
                                "type": "integer",
                                "description": "A 4 digit integer indicating the year"
                            },
                            "start_month": {
                                "type": "integer",
                                "description": "An integer indicating the month"
                            },
      
                            "start_day": {
                                "type": "integer",
                                "description": "An integer indicating the day"
                            },
                        }
                    },
                    "end_date": {
                        "type": "object",
                        "properties": {
                            "end_year": {
                                "type": "integer",
                                "description": "A 4 digit integer indicating the year"
                            },
                            "end_month": {
                                "type": "integer",
                                "description": "An integer indicating the month"
                            },
                            "end_day": {
                                "type": "integer",
                                "description": "An integer indicating the day"
                            },
                        }
                    },

                    "comments": {
                        "type": "string",
                        "description": "Additional comments or information about the date"
                    }
                }
            }
        },

        "object_descriptions": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the description and its type",
                "required": ["id", "description_type", "description_text"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object description record primary key, generated automatically in database"
                    },
                    "description_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the description_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the description_types enumeration, e.g. 'Table of Contents', or 'Abstract Section'"
                            }
                        }
                    },

                    "description_label": {
                        "type": "string",
                        "description": "A short label describing the nature of the description text"
                    },

                    "description_text": {
                        "type": "string",
                        "description": "The description text, as it appears in the source data"
                    },

                    "language_code": {
                    "$ref": "#/definitions/lang_code"
                    },

                    "contains_html": {
                        "type": "boolean"
                        "description": "Whether the title text has any embedded html tags, e.g. for super or subscripts" 
                    },
                }
           }
       },

        "object_instances": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the nature of a specific instance of a data object, e.g. a file or a web page",
                "required": ["id", "repository_org", "file_type"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object instance record primary key, generated automatically in database"
                    },
                    "repository_org": {
                           "$ref": "#/definitions/organisation"
                    },
                    "url":{
                        "type": "string",
                        "format": "uri",
                        "description": "The url where the object can be accessed, if one exists"
                    }, 
                    "url_direct_access":{
                        "type": "boolean",
                        "description": "Whether or not the access via the url is public and freely available"
                    }, 
                    "url_last_checked": {
                        "type": "string",
                        "format": "date",
                        "description": "If a check has been carried out, the date the url last responded with a 200 (success) message, as a string (yyyy MMM dd)"
                    },
                    "resource_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the file_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the file_types enumeration, e.g. 'PDF', or 'Excel Spreadsheet(s)'"
                            }
                        }
                    },
                    "resource_size":{
                        "type": "number",
                        "description": "A number (may be a real or an integer number) indicating the size of the resource"
                    }, 
                    "resource_size_unit":{
                        "type": "string",
                        "description": "One of the allowed types, from the size_units enumeration / look up table, e.g. 'Mb', or 'Pages'"
                    },
                    "resource_comment":{
                        "type": "string",
                        "description": "Holds further details of the resource, in particular to support machine processing. These could include the schema used for XML files, and / or the character coding used for text files (e.g. UTF-8 versus UTF-16) or the presence and types of any byte order marks."
                    }
                }
            }
        },
        
        "object_topics": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates the topic name or keyword, and - if applicable - how it was classified in the source data, the controlled terminology system used, and the code for the topic in that system",
                "required": ["id", "value"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object topic record primary key, generated automatically in database"
                    },
                    "topic_value": {
                        "type": "string",
                        "description": "The topic name, as provided in the source data"
                    },
                    "topic_source_type": {
                        "type": "object",
                        "description": "How the topic was categorised in the source data, (or by using the controlled terminology, or using a category matching service)",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the topic_source_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the topic_source_types enumeration, e.g. 'Condition', or 'Organism'"
                            }
                        }
                    },
                    "topic_ct": {
                        "type": "object",
                        "description": "If applicable, the controlled terminology system from which the topic name was taken",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the topic_controlled_terminologies enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the topic_controlled_terminologies enumeration, e.g. 'MESH', or 'Cochrane PICO terminology'"
                            }
                        }
                    },
                    "topic_ct_code": {
                        "type": "string",
                        "description": "The code for the topic within the designated controlled terminology scheme"
                    }
                }
            }
        },
        
        "object_rights": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates any usage and copyright rights associated with the data object",
                "required": ["id", "details"],
                "properties": {
                    "id":{
                        "type": "integer",
                        "description": "Object rights record primary key, generated automatically in database"
                    }, 
                    "details":{
                        "type": "string",
                        "description": "The name of the rights applied, possibly including a detailed description"
                    }, 
                    "rights_url":{
                        "type": "string",
                        "format": "uri",
                        "description": "The url where details of the rights applied can be found, e.g. defintition of creative commons variant"
                    }
                }
            }
        },
        
        "related_objects": {
            "type": "array",
            "items": {
                "type": "object",
                "description": "A composite object that indicates any related data object and the nature of the relationship",
                "required": ["id", "relationship_type", "target_object_id"],
                "properties": {
                    "id": {
                        "type": "integer",
                        "description": "Object relationships record primary key, generated automatically in database"
                    },
                    "relationship_type": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "integer",
                                "description": "An integer referencing the relevant record in the object_relationship_types enumeration / lookup table"
                            },
                            "name": {
                                "type": "string",
                                "description": "One of the allowed types, from the object_relationship_types enumeration, e.g. 'Is metadata for', or 'Is supplement to'"
                            }
                        }
                    },
                    "target_object_id": {
                        "type": "integer",
                        "description": "The id, i.e. internal accession number, of the related object"
                    }
                }
            }
        },    
        
        "related_studies": {
            "type": "array",
            "items": {
                 "type": "integer",
                 "description": "The id, i.e. internal accession number, of the linked study"
            }
        },

        "provenance_data": {
            "type": "string",
            "description": "A listing of the source or sources from which the data for the data object has been drawn, and the date-time(s) when the data was last downloaded"
        },
    },

    "definitions": {

        "lang_code": {
            "type": "string",
            "minLength": 2,
            "maxLength": 2,
            "description": "A two letter ISO 369-1 code indicating the language of the study",
            "default": "en"
        },

        "organisation": {
            "type": "object",
            "required": ["name"],
            "properties": {
                "id": {
                    "type": "integer",
                    "description": "The id of the organisation within the ECRIN contextual database, if that id exists"
                },
                "name": {
                    "type": "array",
                    "description": "Either the name of the organisation as supplied by the source data, or the names (may be multiple) of the organisation within the ECRIN contextual database",
                    "minItems": 1,
                    "items": {
                        "type": "string"
                    }
                }
            }
        }

    }
}