{
  "_schema": "https://data.nist.gov/od/dm/nerdm-schema/v0.7#",
  "@context": [
    "https://data.nist.gov/od/dm/nerdm-pub-context.jsonld",
    {
      "@base": "ark:/88434/mds2-3198"
    }
  ],
  "@type": [
    "nrdp:PublicDataResource",
    "dcat:Dataset"
  ],
  "_extensionSchemas": [
    "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/PublicDataResource"
  ],
  "@id": "ark:/88434/mds2-3198",
  "ediid": "ark:/88434/mds2-3198",
  "version": "1.0.1",
  "doi": "doi:10.18434/mds2-3198",
  "title": "Code used to produce terms list in the work \"NLP-Driven Electron Microscopy Ontology Development\"",
  "contactPoint": {
    "fn": "June W. Lau",
    "hasEmail": "mailto:june.lau@nist.gov"
  },
  "modified": "2021-12-31 00:00:00",
  "status": "available",
  "landingPage": "https://data.nist.gov/od/id/mds2-3198",
  "description": [
    "This is a collection of code written by Maurice Curran that was used to process the Microscopy and Microanalysis conference proceeding corpus into word products described in the publication \"NLP-Driven Electron Microscopy Ontology Development\". The scripts are written in Python, to  be used in the following order:\n1. SettingUpTextFiles.py and CopyingText.py to get the raw text files; \n2. SentenceConversion.py; \n3. reference_remover.py; \n4. testing.py and testingavg.py; \n5. SentenceCreator.py; \n6. matscholar_model.py to get matscholar tags; \n7. training_model_gensim.py to get gensim model;\n8. word2vecscript.py and gensim_visual.py;"
  ],
  "keyword": [
    "Natural language processing",
    "NLP",
    "electron microscopy",
    "controlled vocabulary",
    "ontology"
  ],
  "topic": [
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Information Technology: Data and informatics"
    },
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Materials: Modeling and computational material science"
    },
    {
      "@type": "Concept",
      "scheme": "https://data.nist.gov/od/dm/nist-themes/v1.1",
      "tag": "Materials: Materials characterization"
    }
  ],
  "accessLevel": "public",
  "license": "https://www.nist.gov/open/license",
  "publisher": {
    "name": "National Institute of Standards and Technology",
    "@type": "org:Organization"
  },
  "language": [
    "en"
  ],
  "bureauCode": [
    "006:55"
  ],
  "programCode": [
    "006:052"
  ],
  "theme": [
    "Information Technology: Data and informatics",
    "Materials: Modeling and computational material science",
    "Materials: Materials characterization"
  ],
  "components": [
    {
      "@id": "cmps/PythonFiles_Maurice_clean.zip",
      "@type": [
        "nrdp:DataFile",
        "nrdp:DownloadableFile",
        "dcat:Distribution"
      ],
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/pub/v0.7#/definitions/DataFile"
      ],
      "filepath": "PythonFiles_Maurice_clean.zip",
      "downloadURL": "https://data.nist.gov/od/ds/ark:/88434/mds2-3198/PythonFiles_Maurice_clean.zip",
      "mediaType": "application/zip",
      "description": "This zip file contains a set of scripts that extracts frequently occurring words from the conference proceedings of Microscopy & Microanalysis between the years of 2002 and 2019.",
      "title": "NLP code to produce words about electron microscopy",
      "size": 54540,
      "checksum": {
        "hash": "b4ffe47eb144e0b2a77eac3b368ff8cf67e42092e0f42acb9fb15a0bfb22fb82",
        "algorithm": {
          "tag": "sha256",
          "@type": "Thing"
        }
      }
    }
  ],
  "annotated": "2025-07-09T15:33:39.037893",
  "revised": "2024-09-05T15:45:20.732646",
  "issued": null,
  "firstIssued": "2024-09-05T15:45:20.732646",
  "references": [
    {
      "@type": [
        "deo:BibliographicReference"
      ],
      "@id": "#ref:10.1007/s40192-024-00378-y",
      "refType": "IsSupplementTo",
      "location": "10.1007/s40192-024-00378-y",
      "_extensionSchemas": [
        "https://data.nist.gov/od/dm/nerdm-schema/bib/v0.7#/definitions/DCiteReference"
      ]
    }
  ],
  "releaseHistory": {
    "@id": "ark:/88434/mds2-3198.rel",
    "@type": [
      "nrdr:ReleaseHistory"
    ],
    "hasRelease": [
      {
        "version": "1.0.0",
        "issued": "2021-12-31",
        "@id": "ark:/88434/mds2-3198/pdr:v/1.0.0",
        "location": "https://data.nist.gov/od/id/ark:/88434/mds2-3198/pdr:v/1.0.0",
        "description": "initial release"
      },
      {
        "version": "1.0.1",
        "issued": "2021-12-31 00:00:00",
        "@id": "ark:/88434/mds2-3198/pdr:v/1.0.1",
        "location": "https://data.nist.gov/od/id/ark:/88434/mds2-3198/pdr:v/1.0.1",
        "description": "metadata update"
      }
    ]
  }
}