{
  "_schema": "https://data.nist.gov/od/dm/nerdm-schema/v0.7#",
  "@context": [
    "https://data.nist.gov/od/dm/nerdm-pub-context.jsonld",
    {
      "@base": "ark:/88434/mds2-2301"
    }
  ],
  "@type": [
    "nrdp:PublicDataResource",
    "dcat:Dataset"
  ],
  "_extensionSchemas": [
    "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/PublicDataResource"
  ],
  "@id": "ark:/88434/mds2-2301",
  "ediid": "ark:/88434/mds2-2301",
  "version": "1.0.2",
  "title": "Dataset: An Open Combinatorial Diffraction Dataset Including Consensus Human and Machine Learning Labels with Quantified Uncertainty for Training New Machine Learning Models ",
  "contactPoint": {
    "fn": "Brian DeCost",
    "hasEmail": "mailto:brian.decost@nist.gov"
  },
  "modified": "2020-09-22 00:00:00",
  "landingPage": "https://data.nist.gov/od/id/mds2-2301",
  "description": [
    "The open dataset, software, and other files accompanying the manuscript \"An Open Combinatorial Diffraction Dataset Including Consensus Human and Machine Learning Labels with Quantified Uncertainty for Training New Machine Learning Models,\" submitted for publication to  Integrated Materials and Manufacturing Innovations.",
    "Machine learning and autonomy are increasingly prevalent in materials science, but existing models are often trained or tuned using idealized data as absolute ground truths. In actual materials science, \"ground truth\" is often a matter of interpretation and is more readily determined by consensus. Here we present the data, software, and other files for a study using as-obtained diffraction data as a test case for evaluating the performance of machine learning models in the presence of differing expert opinions. We demonstrate that experts with similar backgrounds can disagree greatly even for something as intuitive as using diffraction to identify the start and end of a phase transformation. We then use a logarithmic likelihood method to evaluate the performance of machine learning models in relation to the consensus expert labels and their variance. We further illustrate this method's efficacy in ranking a number of state-of-the-art phase mapping algorithms. We propose a materials data challenge centered around the problem of evaluating models based on consensus with uncertainty. The data, labels, and code used in this study are all available online at data.gov, and the interested reader is encouraged to replicate and improve the existing models or to propose alternative methods for evaluating algorithmic performance."
  ],
  "keyword": [
    "machine learning models",
    "X-ray diffraction",
    "human labeling",
    "combinatorial methods",
    "V-Nb-O thin films",
    "quantified uncertainyy",
    "open data challenge"
  ],
  "theme": [
    "Information Technology: Data and informatics",
    "Materials: Modeling and computational material science",
    "Materials: Materials characterization"
  ],
  "topic": [
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Information Technology: Data and informatics"
    },
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Materials: Modeling and computational material science"
    },
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Materials: Materials characterization"
    }
  ],
  "accessLevel": "public",
  "license": "https://www.nist.gov/open/license",
  "publisher": {
    "name": "National Institute of Standards and Technology",
    "@type": "org:Organization"
  },
  "language": [
    "en"
  ],
  "bureauCode": [
    "006:55"
  ],
  "programCode": [
    "006:052"
  ],
  "doi": "doi:10.18434/mds2-2301",
  "components": [
    {
      "accessURL": "https://doi.org/10.18434/mds2-2301",
      "title": "DOI Access for Dataset: An Open Combinatorial Diffraction Dataset Including Consensus Human and Machine Learning Labels with Quantified Uncertainty for Training New Machine Learning Models ",
      "@type": [
        "nrd:Hidden",
        "dcat:Distribution"
      ],
      "@id": "#doi:10.18434/mds2-2301"
    },
    {
      "@id": "cmps/Open Data Challenge Notebook Human Labels and Plots.py.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Open Data Challenge Notebook Human Labels and Plots.py.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Open%20Data%20Challenge%20Notebook%20Human%20Labels%20and%20Plots.py.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Open Data Challenge Notebook Human Labels and Plots.py",
      "description": "SHA-256 checksum value for Open Data Challenge Notebook Human Labels and Plots.py",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "27948fe0fc211ac0022fa25255c66e9f1bd0516ad46ea73b662f80ec6584666b",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Condensing Write-Ups of Human and Machine Labeling Metrics.docx",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Condensing Write-Ups of Human and Machine Labeling Metrics.docx",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Condensing%20Write-Ups%20of%20Human%20and%20Machine%20Labeling%20Metrics.docx",
      "mediaType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
      "size": 767921,
      "checksum": {
        "hash": "85724c3f0c340f6a8839805791b9cdfb404dbc36962fa8bf2525a245f59f604b",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/VO2 -Nb2O3 XRD Combiview.txt",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "VO2 -Nb2O3 XRD Combiview.txt",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/VO2%20-Nb2O3%20XRD%20Combiview.txt",
      "mediaType": "text/plain",
      "size": 13396322,
      "checksum": {
        "hash": "3b47bf36b2abaef376730226e2616a353ba07571c46e71bce464cf9e9bfbe348",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/VO2 -Nb2O3 XRD Combiview.txt.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "VO2 -Nb2O3 XRD Combiview.txt.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/VO2%20-Nb2O3%20XRD%20Combiview.txt.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/VO2 -Nb2O3 XRD Combiview.txt",
      "description": "SHA-256 checksum value for VO2 -Nb2O3 XRD Combiview.txt",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "61b3ea776912f2adc80d8bff3b1f505d3346bef6db57e3a5fd5b3889f2bc3508",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Readme.txt",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Readme.txt",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Readme.txt",
      "mediaType": "text/plain",
      "size": 7255,
      "checksum": {
        "hash": "79664bb816f830b98254f13bd699fe4266097471b3a4eb8093cdf3725b475110",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/Compare ML Labels.csv",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Compare ML Labels.csv",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Compare%20ML%20Labels.csv",
      "mediaType": "application/vnd.ms-excel",
      "size": 3314,
      "checksum": {
        "hash": "ea71726981f484032a287deb4391e0b1d78a3aa32526eae4a08045334b3ec6bb",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/Readme.txt.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Readme.txt.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Readme.txt.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Readme.txt",
      "description": "SHA-256 checksum value for Readme.txt",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "2664dca65b4e2a631395dcd5474b49c2e8dc671ec1f00a29b45ee800159b07aa",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Open Data Challenge Notebook Human Labels and Plots.py",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Open Data Challenge Notebook Human Labels and Plots.py",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Open%20Data%20Challenge%20Notebook%20Human%20Labels%20and%20Plots.py",
      "mediaType": "application/octet-stream",
      "size": 13428,
      "checksum": {
        "hash": "8c42083be78e24fab507984611665d0744fae2c64d9d9a527098410e42671997",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/VO2 - Nb2O3 Composition and temp Combiview.txt",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "VO2 - Nb2O3 Composition and temp Combiview.txt",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/VO2%20-%20Nb2O3%20Composition%20and%20temp%20Combiview.txt",
      "mediaType": "text/plain",
      "size": 2472,
      "checksum": {
        "hash": "6fcc4e862ea866286436a8624b2b82241ce80a08cc87eb0bb878f56ce6fdd027",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/Open Data Challenge Notebook Machine Labels and Plots.py.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Open Data Challenge Notebook Machine Labels and Plots.py.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Open%20Data%20Challenge%20Notebook%20Machine%20Labels%20and%20Plots.py.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Open Data Challenge Notebook Machine Labels and Plots.py",
      "description": "SHA-256 checksum value for Open Data Challenge Notebook Machine Labels and Plots.py",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "5f9f437424f445d7604b7d4423dd10332fe25cee29a9451a114d0e495fea0c8c",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Open Data Challenge Notebook Machine Labels and Plots.py",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Open Data Challenge Notebook Machine Labels and Plots.py",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Open%20Data%20Challenge%20Notebook%20Machine%20Labels%20and%20Plots.py",
      "mediaType": "application/octet-stream",
      "size": 11437,
      "checksum": {
        "hash": "5be8431734c679761488d7f43dc9bbfe6a209e8ff111c50a3b56fd0b3cd2f567",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/Human Labels.xlsx",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "Human Labels.xlsx",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Human%20Labels.xlsx",
      "mediaType": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
      "size": 17183,
      "checksum": {
        "hash": "0056a45f7d45694368597fe7804569339745214530584dae10652873fed38cd2",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/Condensing Write-Ups of Human and Machine Labeling Metrics.docx.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Condensing Write-Ups of Human and Machine Labeling Metrics.docx.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Condensing%20Write-Ups%20of%20Human%20and%20Machine%20Labeling%20Metrics.docx.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Condensing Write-Ups of Human and Machine Labeling Metrics.docx",
      "description": "SHA-256 checksum value for Condensing Write-Ups of Human and Machine Labeling Metrics.docx",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "1715eeac8c839b2bb772aa42a287c972ef048ac8738173e836f5de2ed5f00d7e",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/VO2 - Nb2O3 Composition and temp Combiview.txt.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "VO2 - Nb2O3 Composition and temp Combiview.txt.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/VO2%20-%20Nb2O3%20Composition%20and%20temp%20Combiview.txt.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/VO2 - Nb2O3 Composition and temp Combiview.txt",
      "description": "SHA-256 checksum value for VO2 - Nb2O3 Composition and temp Combiview.txt",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "9c4521bb368f675037a56b7aaa08390343baa0b6e69f18e194c1434c62c15170",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Compare ML Labels.csv.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Compare ML Labels.csv.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Compare%20ML%20Labels.csv.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Compare ML Labels.csv",
      "description": "SHA-256 checksum value for Compare ML Labels.csv",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "2e8490742cc2c8b52284c5f68eb8d43dc1afcff6110aa0ce48a420fc2ccfe2fb",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/Human Labels.xlsx.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "Human Labels.xlsx.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/Human%20Labels.xlsx.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/Human Labels.xlsx",
      "description": "SHA-256 checksum value for Human Labels.xlsx",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "53c39ff611cdf5a9eab380ef87cc4d71f6a3e2c2568daf32175ce163af07459a",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    },
    {
      "@id": "cmps/cluster_assignment_loglik_all.csv",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "cluster_assignment_loglik_all.csv",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/cluster_assignment_loglik_all.csv",
      "mediaType": "application/vnd.ms-excel",
      "size": 22443,
      "checksum": {
        "hash": "e4fb673a2c2fcfc8a258eedc0a9236be7e6c0bd693ff2e78864395c8d882415a",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    },
    {
      "@id": "cmps/cluster_assignment_loglik_all.csv.sha256",
      "@type": [
        "nrdp:ChecksumFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "filepath": "cluster_assignment_loglik_all.csv.sha256",
      "downloadURL": "https://data.nist.gov/od/ds/mds2-2301/cluster_assignment_loglik_all.csv.sha256",
      "algorithm": {
        "tag": "sha256",
        "@type": "Thing"
      },
      "describes": "cmps/cluster_assignment_loglik_all.csv",
      "description": "SHA-256 checksum value for cluster_assignment_loglik_all.csv",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/ChecksumFile"
      ],
      "mediaType": "text/plain",
      "size": 64,
      "checksum": {
        "hash": "eb4f8351af9abeda666c3208ac53fa5f0e22859d9c86313ff9196a41cf52c8c6",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      },
      "valid": true
    }
  ],
  "_editStatus": "done",
  "issued": "2020-10-23T14:49:00.0",
  "status": "available",
  "versionHistory": [
    {
      "version": "1.0.0",
      "issued": "2020-09-22",
      "@id": "ark:/88434/mds2-2301",
      "location": "https://data.nist.gov/od/id/ark:/88434/mds2-2301",
      "description": "initial release"
    }
  ],
  "releaseHistory": {
    "@id": "ark:/88434/mds2-2301.rel",
    "@type": [
      "nrdr:ReleaseHistory"
    ],
    "hasRelease": [
      {
        "version": "1.0.0",
        "issued": "2020-09-22",
        "@id": "ark:/88434/mds2-2301/pdr:v/1.0.0",
        "location": "https://data.nist.gov/od/id/ark:/88434/mds2-2301/pdr:v/1.0.0",
        "description": "initial release"
      },
      {
        "version": "1.0.1",
        "issued": "2020-09-22 00:00:00",
        "@id": "ark:/88434/mds2-2301/pdr:v/1.0.1",
        "location": "https://data.nist.gov/od/id/ark:/88434/mds2-2301/pdr:v/1.0.1",
        "description": "metadata update"
      },
      {
        "version": "1.0.2",
        "issued": "2020-09-22 00:00:00",
        "@id": "ark:/88434/mds2-2301/pdr:v/1.0.2",
        "location": "https://data.nist.gov/od/id/ark:/88434/mds2-2301/pdr:v/1.0.2",
        "description": "metadata update"
      }
    ]
  },
  "annotated": "2022-01-07T21:34:55.422086"
}