createSimpleISAjson.py¶
======================
An example of using the ISA model classes to create an ISA-JSON file.
#!/usr/bin/env python
from isatools.model import *
def create_descriptor():
"""Returns a simple but complete ISA-JSON 1.0 descriptor for illustration."""
# Create an empty Investigation object and set some values to the instance variables.
investigation = Investigation()
investigation.identifier = "i1"
investigation.title = "My Simple ISA Investigation"
investigation.description = "We could alternatively use the class constructor's parameters to set some default " \
"values at the time of creation, however we want to demonstrate how to use the " \
"object's instance variables to set values."
investigation.submission_date = "2016-11-03"
investigation.public_release_date = "2016-11-03"
# Create an empty Study object and set some values. The Study must have a filename, otherwise when we serialize it
# to ISA-Tab we would not know where to write it. We must also attach the study to the investigation by adding it
# to the 'investigation' object's list of studies.
study = Study(filename="s_study.txt")
study.identifier = "s1"
study.title = "My ISA Study"
study.description = "Like with the Investigation, we could use the class constructor to set some default values, " \
"but have chosen to demonstrate in this example the use of instance variables to set initial " \
"values."
study.submission_date = "2016-11-03"
study.public_release_date = "2016-11-03"
investigation.studies.append(study)
# Some instance variables are typed with different objects and lists of objects. For example, a Study can have a
# list of design descriptors. A design descriptor is an Ontology Annotation describing the kind of study at hand.
# Ontology Annotations should typically reference an Ontology Source. We demonstrate a mix of using the class
# constructors and setting values with instance variables. Note that the OntologyAnnotation object
# 'intervention_design' links its 'term_source' directly to the 'obi' object instance. To ensure the OntologySource
# is encapsulated in the descriptor, it is added to a list of 'ontology_source_references' in the Investigation
# object. The 'intervention_design' object is then added to the list of 'design_descriptors' held by the Study
# object.
obi = OntologySource(name='OBI', description="Ontology for Biomedical Investigations")
investigation.ontology_source_references.append(obi)
intervention_design = OntologyAnnotation(term_source=obi)
intervention_design.term = "intervention design"
intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
study.design_descriptors.append(intervention_design)
# Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
# each with lists of corresponding Person and Publication objects.
contact = Person(first_name="Alice", last_name="Robertson", affiliation="University of Life", roles=[OntologyAnnotation(term='submitter')])
study.contacts.append(contact)
publication = Publication(title="Experiments with Elephants", author_list="A. Robertson, B. Robertson")
publication.pubmed_id = "12345678"
publication.status = OntologyAnnotation(term="published")
study.publications.append(publication)
# To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
# to create a process sequence. To do this we use the Process class and attach it to the Study object's
# 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
# a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
# a Source material and as output a Sample material.
# Here we create one Source material object and attach it to our study.
source = Source(name='source_material')
study.sources.append(source)
# Then we create three Sample objects, with organism as Homo Sapiens, and attach them to the study. We use the utility function
# batch_create_material() to clone a prototype material object. The function automatically appends
# an index to the material name. In this case, three samples will be created, with the names
# 'sample_material-0', 'sample_material-1' and 'sample_material-2'.
prototype_sample = Sample(name='sample_material', derives_from=[source])
ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
characteristic_organism = Characteristic(category=OntologyAnnotation(term="Organism"),
value=OntologyAnnotation(term="Homo Sapiens", term_source=ncbitaxon,
term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606"))
prototype_sample.characteristics.append(characteristic_organism)
study.samples = batch_create_materials(prototype_sample, n=3) # creates a batch of 3 samples
# Now we create a single Protocol object that represents our sample collection protocol, and attach it to the
# study object. Protocols must be declared before we describe Processes, as a processing event of some sort
# must execute some defined protocol. In the case of the class model, Protocols should therefore be declared
# before Processes in order for the Process to be linked to one.
sample_collection_protocol = Protocol(name="sample collection",
protocol_type=OntologyAnnotation(term="sample collection"))
study.protocols.append(sample_collection_protocol)
sample_collection_process = Process(executes_protocol=sample_collection_protocol)
# Next, we link our materials to the Process. In this particular case, we are describing a sample collection
# process that takes one source material, and produces three different samples.
#
# (source_material)->(sample collection)->[(sample_material-0), (sample_material-1), (sample_material-2)]
for src in study.sources:
sample_collection_process.inputs.append(src)
for sam in study.samples:
sample_collection_process.outputs.append(sam)
# Finally, attach the finished Process object to the study process_sequence. This can be done many times to
# describe multiple sample collection events.
study.process_sequence.append(sample_collection_process)
# Next, we build n Assay object and attach two protocols, extraction and sequencing.
assay = Assay(filename="a_assay.txt")
extraction_protocol = Protocol(name='extraction', protocol_type=OntologyAnnotation(term="material extraction"))
study.protocols.append(extraction_protocol)
sequencing_protocol = Protocol(name='sequencing', protocol_type=OntologyAnnotation(term="material sequencing"))
study.protocols.append(sequencing_protocol)
# To build out assay graphs, we enumereate the samples from the study-level, and for each sample we create an
# extraction process and a sequencing process. The extraction process takes as input a sample material, and produces
# an extract material. The sequencing process takes the extract material and produces a data file. This will
# produce three graphs, from sample material through to data, as follows:
#
# (sample_material-0)->(extraction)->(extract-0)->(sequencing)->(sequenced-data-0)
# (sample_material-1)->(extraction)->(extract-1)->(sequencing)->(sequenced-data-1)
# (sample_material-2)->(extraction)->(extract-2)->(sequencing)->(sequenced-data-2)
#
# Note that the extraction processes and sequencing processes are distinctly separate instances, where the three
# graphs are NOT interconnected.
for i, sample in enumerate(study.samples):
# create an extraction process that executes the extraction protocol
extraction_process = Process(executes_protocol=extraction_protocol)
# extraction process takes as input a sample, and produces an extract material as output
extraction_process.inputs.append(sample)
material = Material(name="extract-{}".format(i))
material.type = "Extract Name"
extraction_process.outputs.append(material)
# create a sequencing process that executes the sequencing protocol
sequencing_process = Process(executes_protocol=sequencing_protocol)
sequencing_process.name = "assay-name-{}".format(i)
sequencing_process.inputs.append(extraction_process.outputs[0])
# Sequencing process usually has an output data file
datafile = DataFile(filename="sequenced-data-{}".format(i), label="Raw Data File", generated_from=[sample])
sequencing_process.outputs.append(datafile)
# Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
# these links for you. It is found in the isatools.model package
plink(extraction_process, sequencing_process)
# make sure the extract, data file, and the processes are attached to the assay
assay.samples.append(sample)
assay.data_files.append(datafile)
assay.other_material.append(material)
assay.process_sequence.append(extraction_process)
assay.process_sequence.append(sequencing_process)
assay.measurement_type = OntologyAnnotation(term="gene sequencing")
assay.technology_type = OntologyAnnotation(term="nucleotide sequencing")
# attach the assay to the study
study.assays.append(assay)
import json
from isatools.isajson import ISAJSONEncoder
# To write JSON out, use the ISAJSONEncoder class with the json package and use dump() or dumps()
# Note that the extra parameters sort_keys, indent and separators are to make the output more human-readable.
return json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))
if __name__ == '__main__':
print(create_descriptor()) # print the result to stdout