From 77a813ae2e830d5fb391a06d098b3af6ecf7ed26 Mon Sep 17 00:00:00 2001 From: meerkat Date: Fri, 8 Oct 2021 22:57:29 +1100 Subject: [PATCH] Initial draft --- .gitignore | 2 + .vscode/launch.json | 15 + Marti.md | 9 + README.md | 94 ++++- docs/README.md | 0 docs/ckan.md | 5 + docs/ckan.txt | 1 + docs/distribution.md | 93 +++++ docs/formats/csv.md | 0 docs/formats/fixedlength.md | 0 docs/formats/sitpro.md | 0 docs/formats/xml.md | 0 docs/magda.md | 7 + docs/quality.md | 60 +++ docs/references.md | 22 + docs/samples/asic_ckan_api.json | 188 +++++++++ docs/what.md | 0 docs/when.md | 0 docs/who.md | 0 docs/why.md | 0 source/powershell/Compare-MartiResource.ps1 | 153 +++++++ source/powershell/Compress-Marti.ps1 | 120 ++++++ source/powershell/ConvertFrom-Ckan.ps1 | 63 +++ source/powershell/ConvertTo-Ckan.ps1 | 2 + source/powershell/Get-Marti.ps1 | 124 ++++++ source/powershell/New-Marti.ps1 | 435 ++++++++++++++++++++ test/powershell/test_Marti.ps1 | 45 ++ test/powershell/test_MartiCkan.ps1 | 36 ++ test/powershell/test_MartiData1.ps1 | 96 +++++ test/powershell/test_MartiData2.ps1 | 41 ++ test/powershell/test_MartiData3.ps1 | 18 + test/powershell/test_retrievedata.ps1 | 38 ++ tools.md | 10 + 33 files changed, 1675 insertions(+), 2 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 Marti.md create mode 100644 docs/README.md create mode 100644 docs/ckan.md create mode 100644 docs/ckan.txt create mode 100644 docs/distribution.md create mode 100644 docs/formats/csv.md create mode 100644 docs/formats/fixedlength.md create mode 100644 docs/formats/sitpro.md create mode 100644 docs/formats/xml.md create mode 100644 docs/magda.md create mode 100644 docs/quality.md create mode 100644 docs/references.md create mode 100644 docs/samples/asic_ckan_api.json create mode 100644 docs/what.md create mode 100644 docs/when.md create mode 100644 docs/who.md create mode 100644 docs/why.md create mode 100644 source/powershell/Compare-MartiResource.ps1 create mode 100644 source/powershell/Compress-Marti.ps1 create mode 100644 source/powershell/ConvertFrom-Ckan.ps1 create mode 100644 source/powershell/ConvertTo-Ckan.ps1 create mode 100644 source/powershell/Get-Marti.ps1 create mode 100644 source/powershell/New-Marti.ps1 create mode 100644 test/powershell/test_Marti.ps1 create mode 100644 test/powershell/test_MartiCkan.ps1 create mode 100644 test/powershell/test_MartiData1.ps1 create mode 100644 test/powershell/test_MartiData2.ps1 create mode 100644 test/powershell/test_MartiData3.ps1 create mode 100644 test/powershell/test_retrievedata.ps1 create mode 100644 tools.md diff --git a/.gitignore b/.gitignore index 0e13eeb..d4bf2b4 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ buildNumber.properties .mvn/timing.properties # https://github.com/takari/maven-wrapper#usage-without-binary-jar .mvn/wrapper/maven-wrapper.jar + +./test/powershell/results/* diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..4dd840e --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "name": "PowerShell: Interactive Session", + "type": "PowerShell", + "request": "launch", + "cwd": "" + } + ] +} \ No newline at end of file diff --git a/Marti.md b/Marti.md new file mode 100644 index 0000000..1cbbb07 --- /dev/null +++ b/Marti.md @@ -0,0 +1,9 @@ +# Marti document + +The metadata reconciliation transfer information is referred +to as the **Marti** document throughout this documentation. + +The **Marti** document can be part of a message or a document +in its own right. If the document is a file then the recommended +name for the document is the same name as the data file, +including extension, with the added extension of ``.mti`` diff --git a/README.md b/README.md index 576d46b..0c73528 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,92 @@ -# marti -Metadata Reconcile Transfer Information +# Marti + +Marti is for metadata reconcilation for transfer information. + +The objective is the provide transfer information for high volume data such as +in files. The document (files) can be transferred via HTTPS, SFTP, message queue, +network share or other. The transfer information being described here does not +need to arrive via the same channel and cluld be received via email or +even synchronous / asynchronous API. The transfer information does not dictate or +determine how the data is formatted. + +The transfer information can provide details on the document format, but in itself +it does not understand the data fomrat. + +Marti is intended to provide minimum basic information on the transfer with +ability to include additional optional information. The metadata reconcilation +transfer document being decscribed here wil be referred to as the [Marti](Marti.md) +document throughout this documentation. + +The information is supplied as a separate document which could be another file +or supplied via API by the publisher notifying the consumer(s). + +## Tools and Scenarios + +Tools and code snippets are provided to generate the information and then +assist in reconcile the document contents once received. Refer to the +programming folders for more details or [Tools](tools.md) for more general +information + +## Transfer information + +### Mandatory information + +The mandatory information is: + +* Title +* Unique identifier +* Distribution list - See Distribution section summary below or detailed document [Distribution](docs/distribution.md) + + +### Optional information + +The option information is: + +* Description +* Modified +* Tags or keywords +* Publisher +* Contact point +* Acces level +* Rights +* License +* Spatial (*) +* Temporal (*) +* Described By - A link to the metadata describing the document. + More details information could be supplied in the distribution +* Landing page +* Theme + +### Information extension + +The information supplied can be extended by agreeing parties and there +are place holders in the defintion. + +### Distribution + +The distribution section can be repeated, but at least one must be included. +If the distribution is repeated it will comonly be for definiting +multiple formats of the same data. + +* Title +* Unique identifier +* Document name - If no download URL, then this will be the document name +* Issued date - When the document was made available. The date can include time +* Modified - When the document was created or modified. This is the data and time +* Size of document - The document size in bytes +* Hash of document - The hash of the document, which can be blank especially for large documents +* Hash algorithm + +### Distribution optional + +The following are some of the optional items in the distribution section. See [Distribution](dstribution.md) +for more items and details + +* Description +* Download URL +* Version +* Format +* Compression +* Encryption + + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/ckan.md b/docs/ckan.md new file mode 100644 index 0000000..65e74df --- /dev/null +++ b/docs/ckan.md @@ -0,0 +1,5 @@ + + +https://ckan.org/ + +Sample Json from https://data.gov.au/data/dataset/f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038 diff --git a/docs/ckan.txt b/docs/ckan.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/ckan.txt @@ -0,0 +1 @@ + diff --git a/docs/distribution.md b/docs/distribution.md new file mode 100644 index 0000000..dd627b0 --- /dev/null +++ b/docs/distribution.md @@ -0,0 +1,93 @@ +# Distribution definition + +The distrubution definition describes a single document, though +some documents may expand to multiple documents if they are +compressed with a utility such as WinZIP or 7ZIP + + +* Title +* Document name - Commonly being absolute or relative file name. + This value could also be an URL address or network path +* Issued date - When the document was made available. The date can include time +* Modified - When the document was created or modified. This is the data and time +* Size of document - The document size in bytes +* Hash of document - The hash of the document, which can be blank especially for large documents +* Hash algorithm + + +The following are optional in the distribution section. + +* Identifier +* Description +* Download URL +* Version - Document version. The same document coudl be updated or this might denote the next version + of a regular report. For example a daily extract will have the version number incremented + every day and provide a new URL. The previous document can be retained. +* Format - if not specified then the consumer will in all likelihood use the document extension / mime type +* Media Type +* Expiry Date - The date and time that this document expires and can be removed from the download URL + location. This is not the document retention period as might be required for archiving. +* Described By - A link to the metadata describing this document data and format +* Compression - Type of compression used if any +* Encryption - Type of encryption used if any + + +## Compression + +Documents can be compressed using a utility. A single compressed document can contain +multiple documents. The Marti definition document applies to the compressed document +and not to the contents, which could be multiple documents. + +In the case of a compressed document, there should be a Marti definition document in the +compressed document to match the data document. That is the number of the records in a +compressed document should always be an even number. + +Compression of documents always occur before encryption. + +### Marti definition for Compressed Document + +For a compressed document that is not encrypted, the distribution definition will be: + +* Title - The compressed document title which could be a group name +* Document name - Commonly being absolute or relative file name. + This value could also be an URL address or network path +* Issued date - When the compressed document was made available. +* Modified - When the compressed document was created or modified. This is the data and time + and is not the modified date of the document in the compressed document. +* Size of document - The compressed document size in bytes +* Hash of document - The hash of the compressed document, which can be + blank especially for large documents +* Hash algorithm + +The reason for this approach is it allows a generic tool to be deployed to +check the validity of the contents without unpacking the received /fetched +document. That is you can perform load quality pipeline processing. + +## Encryption + +The encryption of content is always applied after compression not before, if +you are not using the compression tool native encryption. WinZIP and 7ZIP +provide encryption within the tool execution. + +If the compression is TAR or GZIP then you may consider applying a GPG +or other encryption algorithm to the compressed file. + +* Title - The encrypted document title +* Document name - Commonly being absolute or relative file name. + This value could also be an URL address or network path +* Issued date - When the **encrypted** document was made available. +* Modified - When the **encrypted** document was created or modified. + This is the data and time and is not the modified date of the encrypted document. +* Size of document - The **decrypted** document size in bytes +* Hash of document - The hash of the **decrypted** document, which can be + blank especially for large documents +* Hash algorithm + +The rational for using the decrypted document attributes is that an ecrypted +document is unlikely to be able to be modified without knowing encryption keys. +Checking the decrypted document attributes is a better check wheer appropriate. + +The reason for this approach is it allows a generic tool to be deployed to +decrypt and check the validity of the received / fetched document without +needing to understand the contents. That is you can perform load quality +pipeline processing. diff --git a/docs/formats/csv.md b/docs/formats/csv.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/formats/fixedlength.md b/docs/formats/fixedlength.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/formats/sitpro.md b/docs/formats/sitpro.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/formats/xml.md b/docs/formats/xml.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/magda.md b/docs/magda.md new file mode 100644 index 0000000..569f65f --- /dev/null +++ b/docs/magda.md @@ -0,0 +1,7 @@ + + +https://magda.io/ + +https://search.data.gov.au/api/v0/apidocs/index.html + + diff --git a/docs/quality.md b/docs/quality.md new file mode 100644 index 0000000..331f0b2 --- /dev/null +++ b/docs/quality.md @@ -0,0 +1,60 @@ +# Quality definition + +The Marti definition allows for the inclusion of a load quality +definition. This load quality definition is intended to be +able to be applied universally with common tools. As such not +all needs are covered. + + + + +* Number of records in the document - This is the number of data primary records not the + count of end of lines and is agreed between parties. XML record counts could be based + on the number of primary segments under root. JSON records can be counted in a similar way. + The headers or trailling records are not counted + + +Sequence number - linked to the job producing the document and therefore a daily, weekly and monthly extracts for the +same document would have different sequence numbers + +Discourage us of magic formats for document names such as + +XT_PARTY_20210911_SQ00001_N000567891234_V01.DAT + +Header and Trailer records are not part of the quality definition except + +In fact the trailer record is intended to be replaced by this quality definition + +The header, if it exists, is only used where it identifies the column name sequence of the data. + +Effective and Process date ----------------- + +Row count - +Column count - +Depth count - + +Mandatory - data must be present and cannot be blank or null +Uniqueness - data value must be unique within the document +Data integrity - data exists within defined tolerances + + +row count 9999 + +column count 9999 + +column_name sum 9999 + +column_name gt 9999 +column_name lt 9999 +column_name eq 9999 +column_name eq " " +column_name ne 9999 +column_name ne " " == Check for value +column_name ge 9999 +column_name le " " +column_name in " ", " ", " " + +column_name is integer +column_name is decimal +column_name is unique + diff --git a/docs/references.md b/docs/references.md new file mode 100644 index 0000000..914cc53 --- /dev/null +++ b/docs/references.md @@ -0,0 +1,22 @@ + + + +https://dex.dss.gov.au/sites/default/files/documents/2021-06/data-exchange-protocols-june-2021.pdf + +https://www.imf.org/en/Data +https://www.imf.org/-/media/Files/Publications/WEO/WEO-Database/2021/WEOApr2021all.ashx + +SDMX + +LIXI +https://www.imf.org/-/media/Files/Publications/WEO/WEO-Database/2021/WEOApr2021all.ashx + + +https://standards.theodi.org/introduction/types-of-open-standards-for-data/ +https://developers.google.com/transit/gtfs/ + +http://www.popoloproject.com/ + +https://datatracker.ietf.org/doc/html/rfc6350#section-6.2.7 +https://www.w3.org/TR/vocab-dcat/ + diff --git a/docs/samples/asic_ckan_api.json b/docs/samples/asic_ckan_api.json new file mode 100644 index 0000000..4119d78 --- /dev/null +++ b/docs/samples/asic_ckan_api.json @@ -0,0 +1,188 @@ + +{ + "help": "https://data.gov.au/data/api/3/action/help_show?name=package_show", + "success": true, + "result": { + "license_title": "Creative Commons Attribution 3.0 Australia", + "maintainer": "xuantungphan", + "relationships_as_object": [], + "jurisdiction": "Commonwealth of Australia", + "temporal_coverage_to": "", + "private": false, + "maintainer_email": null, + "num_tags": 7, + "geospatial_topic": [], + "id": "f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038", + "metadata_created": "2015-05-17T23:41:45.976104", + "spatial_coverage": "Australia", + "metadata_modified": "2021-09-08T20:42:43.210579", + "author": "Australian Securities and Investments Commission (ASIC)", + "author_email": null, + "state": "active", + "version": null, + "license_id": "cc-by", + "contact_point": "Access.Request@asic.gov.au", + "type": "dataset", + "resources": [ + { + "mimetype": "application/pdf", + "cache_url": null, + "hash": "22552e688cf87ff1edcaf000daec307c", + "description": "Help File as at 29/03/2021", + "name": "Financial Advisers Dataset - Help File", + "format": "PDF", + "url": "https://data.gov.au/data/dataset/f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038/resource/b4fc36ca-b9d5-4ad4-9631-c914367ea302/download/financial-advisers-register-help-file.pdf", + "datastore_active": true, + "cache_last_updated": null, + "package_id": "f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038", + "created": "2015-05-19T05:10:05.120187", + "state": "active", + "mimetype_inner": null, + "last_modified": "2021-03-29T02:23:10.146935", + "position": 0, + "wms_layer": "", + "revision_id": "e0c387a2-8ae3-4c95-a88c-f0328546a824", + "url_type": "upload", + "id": "b4fc36ca-b9d5-4ad4-9631-c914367ea302", + "resource_type": null, + "size": 741345 + }, + { + "mimetype": "application/zip", + "cache_url": null, + "hash": "5708dfd23d7a089d78da03932e4f8871", + "description": "Financial Advisers Dataset extract as at 09/09/2021 06:42", + "name": "Financial Advisers Dataset - Current", + "format": "CSV", + "url": "https://data.gov.au/data/dataset/f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038/resource/691ff9ed-b601-481d-8283-88127dbbc869/download/financial_advisers_202109.zip", + "datastore_active": true, + "cache_last_updated": null, + "package_id": "f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038", + "created": "2015-07-14T05:18:49.260369", + "state": "active", + "mimetype_inner": null, + "last_modified ": "09/09/2021 06:42", + "last_modified": "2021-09-08T20:42:41.180925", + "position": 1, + "revision_id": "052e8870-2258-4e1a-bd8e-63539ea6a86d", + "url_type": "upload", + "id": "691ff9ed-b601-481d-8283-88127dbbc869", + "resource_type": null, + "size": 4724626 + }, + { + "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "cache_url": null, + "hash": "", + "description": "Financial Advisers Dataset extract as at 09/09/2021 06:42", + "name": "Financial Advisers Dataset - Current", + "format": "XLSX", + "url": "https://data.gov.au/data/dataset/f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038/resource/2156cb99-3358-4847-8b5b-fcd2f0d3c4e2/download/financial_advisers_202109.xlsx", + "datastore_active": true, + "cache_last_updated": null, + "package_id": "f2b7c2c1-f4ef-4ae9-aba5-45c19e4d3038", + "created": "2016-11-02T11:54:38.840025", + "state": "active", + "mimetype_inner": null, + "last_modified ": "09/09/2021 06:42", + "last_modified": "2021-09-08T20:42:43.173322", + "position": 2, + "revision_id": "052e8870-2258-4e1a-bd8e-63539ea6a86d", + "url_type": "upload", + "id": "2156cb99-3358-4847-8b5b-fcd2f0d3c4e2", + "resource_type": null, + "size": 9518613 + } + ], + "num_resources": 3, + "tags": [ + { + "vocabulary_id": null, + "state": "active", + "display_name": "ASIC", + "id": "810f2863-57e1-4667-bcd7-a7e5a23e2c85", + "name": "ASIC" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "FA", + "id": "33f60a4e-677d-488e-997f-35b5d0e07f7e", + "name": "FA" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "Financial Advisor", + "id": "185cfc29-d1fa-49d4-9838-5fc8a5356eac", + "name": "Financial Advisor" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "Financial Services", + "id": "bca08a1e-f484-4d80-985b-21a8edceffeb", + "name": "Financial Services" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "Government", + "id": "3ae412c6-5a51-43f3-ace6-db9cce09078a", + "name": "Government" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "Moneysmart", + "id": "29392d0c-4634-43a1-9fa8-75b725d8b2af", + "name": "Moneysmart" + }, + { + "vocabulary_id": null, + "state": "active", + "display_name": "Register", + "id": "08a3cb64-498a-4a1f-a28a-637f5883c2da", + "name": "Register" + } + ], + "temporal_coverage_from": "2015-05-18", + "language": "English", + "groups": [ + { + "display_name": "Business Support and Regulation", + "description": "Formulating policy to regulate and support the private sector, including small business and non-profit organisations. Developing strategies to assist business growth and management. Implementing advocacy programs, providing funding and administering regulatory bodies.\r\n\r\nIncludes components listed at http://agift.naa.gov.au/000411.htm", + "image_display_url": "", + "title": "Business Support and Regulation", + "id": "ada735db-98c9-42cb-8969-dc356ea4281e", + "name": "business" + } + ], + "creator_user_id": "ca0a12ce-2789-41d6-8441-2a554ebea837", + "relationships_as_subject": [], + "field_of_research": [], + "organization": { + "description": "ASIC is Australia’s corporate, markets and financial services regulator.\r\nASIC contributes to Australia’s economic reputation and wellbeing by ensuring that Australia’s financial markets are fair and transparent, supported by confident and informed investors and consumers.\r\n", + "created": "2014-08-26T02:05:56.563716", + "title": "Australian Securities and Investments Commission (ASIC)", + "name": "australian-securities-and-investments-commission-asic", + "is_organization": true, + "state": "active", + "image_url": "2019-05-14-060147.648429ASIC-Master-Logo.jpg", + "revision_id": "a24f67c9-a818-4f33-a50e-0d80adc5b0ac", + "type": "organization", + "id": "ddd3e2d8-d0e8-4d43-a1e5-39984eb8e774", + "approval_status": "approved" + }, + "name": "asic-financial-adviser", + "isopen": true, + "url": null, + "notes": "###Update November 2019 - additional fields ###\r\n\r\nFrom 21 November 2019, the dataset will be updated to include 7 new fields (see help file for details)\r\n\r\nThese fields are included in conjunction with the professional standards reforms for financial advisers. More information can be found on the ASIC website https://asic.gov.au/regulatory-resources/financial-services/professional-standards-for-financial-advisers-reforms/.\r\n\r\n__Note:__ For most advisers the new fields will be unpopulated on 21 November 2019. As advisers provide this data to ASIC it will appear in the dataset.\r\n\r\n***\r\n\r\n###Dataset summary###\r\n\r\nASIC is Australia’s corporate, markets and financial services regulator. ASIC contributes to Australia’s economic reputation and wellbeing by ensuring that Australia’s financial markets are fair and transparent, supported by confident and informed investors and consumers. \r\n \r\nAustralian Financial Services Licensees are required to keep the details of their financial advisers up to date on ASIC's Financial Advisers Register. Information contained in the register is made available to the public to search via ASIC's Moneysmart website. \r\n\r\nSelect data from the Financial Advisers Register will be uploaded each week to www.data.gov.au. The data made available will be a snapshot of the register at a point in time. Legislation prescribes the type of information ASIC is allowed to disclose to the public. \r\n\r\nThe information included in the downloadable dataset is: \r\n\r\n* Adviser name\r\n* Adviser number\r\n* Adviser role\r\n* Adviser sub type\r\n* Adviser role status\r\n* Adviser ABN\r\n* Year first provided advice \r\n* Licence name\r\n* Licence number\r\n* Licence ABN\r\n* Licence controlled by\r\n* Adviser start date\r\n* Adviser end date\r\n* Adviser CPD failure year\r\n* Adviser principal business address suburb\r\n* Adviser principal business address State/Territory\r\n* Adviser principal business address postcode\r\n* Adviser principal business address Country\r\n* Appointing representative name\r\n* Appointing representative number\r\n* Appointing representative ABN\r\n* Disciplinary action start date\r\n* Disciplinary action end date\r\n* Disciplinary action type\r\n* Product authorisations (for a full list see the Financial Adviser Register – Help File)\r\n* Qualifications and Training\r\n* FASEA approved qualifications\r\n* Memberships\r\n* Further restrictions\r\n\r\nAdditional information about financial advisers can be found via [ASIC's website] (http://www.asic.gov.au/ \"ASIC's website\"). Accessing some information may attract a fee. \r\n\r\nMore information about searching [ASIC's registers] (http://www.asic.gov.au/online-services/search-asics-registers/ \"ASIC's registers\"). ", + "owner_org": "ddd3e2d8-d0e8-4d43-a1e5-39984eb8e774", + "license_url": "http://creativecommons.org/licenses/by/3.0/au/", + "data_state": "active", + "title": "ASIC – Financial Advisers Dataset", + "revision_id": "aa30a12e-e903-44eb-adb8-da54ce62df58", + "update_freq": "weekly" + } + } diff --git a/docs/what.md b/docs/what.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/when.md b/docs/when.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/who.md b/docs/who.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/why.md b/docs/why.md new file mode 100644 index 0000000..e69de29 diff --git a/source/powershell/Compare-MartiResource.ps1 b/source/powershell/Compare-MartiResource.ps1 new file mode 100644 index 0000000..100ec13 --- /dev/null +++ b/source/powershell/Compare-MartiResource.ps1 @@ -0,0 +1,153 @@ + +function Compare-MartiResource { + Param( + [Parameter(Mandatory)][String] $DataSource, + [Parameter(Mandatory)][PSCustomObject] $Resource, + [String] $LogPath + ) + + + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'Compare-MartiResource' parameters follow" + Write-Log "" + + if ($null -eq $Resource) { + $Global:MartiErrorId = "MRI2201" + $message = "No Marti resource definition supplied" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + + + if ($null -eq $DataSource -or $DataSource -eq "") { + $Global:MartiErrorId = "MRI2202" + $message = "No document supplied" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + + if ($DataSource.Length -le 1000) { + # Check if the name is a file + if (Test-Path -Path $DataSource) { + $inputData = Get-Content -Path $DataSource -Raw + Write-Host "Loading file $DataSource" + } else { + $inputData = $DataSource + } + } else { + $inputData = $DataSource + } + + $formatProcessed = $false + [System.Collections.ArrayList]$lerror = @() + + if ($Resource.format -eq "CSV") { + $formatProcessed = $true + + $data = $inputData | ConvertFrom-Csv -Delim ',' + + $columns = ($data | get-member -type NoteProperty).count + $rows = @($data).count + + $Resource.attributes | ForEach-Object { + + if ($_.category -eq "dataset" -and $_.name -eq "records" -and $_.function -eq "count" -and $_.comparison -eq "EQ") { + + if ($_.value -ne $rows) { + $oError = [PSCustomObject]@{ + id = "MRI2203" + message = "Row count does not match" + found = "$rows" + expected = "$($_.value)" + } + $lerror += $oError + } + } + + if ($_.category -eq "dataset" -and $_.name -eq "columns" -and $_.function -eq "count" -and $_.comparison -eq "EQ") { + + if ($_.value -ne $columns) { + $oError = [PSCustomObject]@{ + id = "MRI2204" + message = "Column count does not match" + found = "$columns" + expected = "$($_.value)" + } + $lerror += $oError + } + } + + } + + + } + + + if ($Resource.format -eq "JSON") { + $formatProcessed = $true + + $data = $inputData | ConvertFrom-Json + + $rows = @($data.data.monitor).count + $item = $data.data.monitor[0] + $columns = ($item | get-member -type NoteProperty).count + + $Resource.attributes | ForEach-Object { + + if ($_.category -eq "dataset" -and $_.name -eq "records" -and $_.function -eq "count" -and $_.comparison -eq "EQ") { + + if ($_.value -ne $rows) { + $oError = [PSCustomObject]@{ + id = "MRI2203" + message = "Row count does not match" + found = "$rows" + expected = "$($_.value)" + } + $lerror += $oError + } + } + + if ($_.category -eq "dataset" -and $_.name -eq "columns" -and $_.function -eq "count" -and $_.comparison -eq "EQ") { + + if ($_.value -ne $columns) { + $oError = [PSCustomObject]@{ + id = "MRI2204" + message = "Column count does not match" + found = "$columns" + expected = "$($_.value)" + } + $lerror += $oError + } + } + + } + + + } + + + if (!$formatProcessed) { + $Global:MartiErrorId = "MRI2203" + $message = "Data format not supported" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + + $status = "OK" + if ($lerror.Count -gt 0) { + $status = "ERROR" + } + $oResult = [PSCustomObject]@{ + status = $status + errors = $lerror + } + + Close-Log + return $oResult +} diff --git a/source/powershell/Compress-Marti.ps1 b/source/powershell/Compress-Marti.ps1 new file mode 100644 index 0000000..7cb7bda --- /dev/null +++ b/source/powershell/Compress-Marti.ps1 @@ -0,0 +1,120 @@ + +$script:LogPathName = "" +$script:SoftwareVersion = "0.0.1" + +$global:default_metaFile = "##marti##.mri" + +function Get-LogName { + + $date = Get-Date -f "yyyy-MM-dd" + + if (($null -eq $script:LogPathName) -or ($script:LogPathName -eq "")) + { + return $null + } + + if (!(Test-Path -Path $script:LogPathName)) { + $null = New-Item -Path $script:LogPathName -ItemType Directory + } + + $logName = $(Get-SoftwareName) + "_$date.log" + + return Join-Path -Path $script:LogPathName -ChildPath $logName +} + + +function Write-Log { + param( + [String] $LogEntry + ) + + $sFullPath = Get-LogName + + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + if ($null -ne $sFullPath -and $sFullPath -ne "") { + + if (!(Test-Path -Path $sFullPath)) { + Write-Host "Log path: $sFullPath" + $null = New-Item -Path $sFullPath -ItemType File + } + Add-Content -Path $sFullPath -Value "[$dateTime]. $LogEntry" + } + Write-Debug "[$dateTime]. $LogEntry" + +} + +function Open-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* Start of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Close-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* End of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Get-SoftwareName { + return [String] "MARTIREFERENCE" +} + + +function Compress-Marti +{ +Param( + [Parameter(Mandatory)][String] $SourceFolder, + [Parameter(Mandatory)][String] $ArchiveFile, + [String] $Filter ="*", + [switch] $ExcludeHash, + [String] $LogPath + +) + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'Compress-Marti' parameters follow" + Write-Log "Parameter: SourceFolder Value: $SourceFolder " + Write-Log "Parameter: ArchiveFile Value: $ArchiveFile " + Write-Log "Parameter: Filter Value: $Filter " + Write-Log "" + + $marti_mri = $global:default_metaFile + + $oMarti = New-MartiDefinition -SourceFolder $SourceFolder -Filter $Filter -LogPath $LogPath + $oMarti.description = "Sample execution" + + $fullMetadatName = Join-Path -Path (Split-Path -Path $ArchiveFile -Parent) -ChildPath $marti_mri + $x = ConvertTo-Json -InputObject $oMarti + Add-Content -Path $fullMetadatName -Value $x + + $getEnvName = $(Get-SoftwareName) + "_7ZIPLEVEL" + if ([System.Environment]::GetEnvironmentVariable($getEnvName) -ne "" -and $null -ne [System.Environment]::GetEnvironmentVariable($getEnvName)) { + $7zipLevel = [System.Environment]::GetEnvironmentVariable($getEnvName) + Write-Log "Compression level set to '$7zipLevel'" + } else { + $7zipLevel = "Normal" + } + + $getEnvName = $(Get-SoftwareName) + "_ZIPFORMAT" + if ([System.Environment]::GetEnvironmentVariable($getEnvName) -ne "" -and $null -ne [System.Environment]::GetEnvironmentVariable($getEnvName)) { + $7zipFormat = [System.Environment]::GetEnvironmentVariable($getEnvName) + Write-Log "Compression format set to '$7zipFormat'" + } else { + $7zipFormat= "SevenZip" + $7zipFormat= "Zip" + } + + Compress-7Zip -Path $SourceFolder -ArchiveFileName $ArchiveFile -Format $7zipFormat -CompressionLevel $7zipLevel -Filter $Filter + + Compress-7Zip -Path $fullMetadatName -ArchiveFileName $ArchiveFile -PreserveDirectoryRoot -Format $7zipFormat -CompressionLevel $7zipLevel -Append + + Remove-Item -Path $fullMetadatName + + Close-Log +} + + diff --git a/source/powershell/ConvertFrom-Ckan.ps1 b/source/powershell/ConvertFrom-Ckan.ps1 new file mode 100644 index 0000000..add6776 --- /dev/null +++ b/source/powershell/ConvertFrom-Ckan.ps1 @@ -0,0 +1,63 @@ + + +function ConvertFrom-Ckan +{ +Param( + [Parameter(Mandatory)][String] $InputObject +) + + $oCkan = ConvertFrom-Json -InputObject $InputObject + + $oMarti = New-MartiDefinition + + $oMarti.title = "Conversion from CKAN" + $oMarti.state = $oCkan.result.state + $oMarti.uid = $oCkan.result.id + $oMarti.contactPoint = $oCkan.result.contact_point + $oMarti.license = $oCkan.result.license_id + $oMarti.description = $oCkan.result.notes + + $hashAlgo = "SHA256" + $version = "1.1.0" + + [System.Collections.ArrayList]$lresource = @() + + $oCkan.result.resources | ForEach-Object { + + $idx = $_.url.LastIndexOf("/") + if ($idx -gt 1) { + $name = $_.url.Substring(($idx+1)) + } else { + $name = "" + } + + $oResource = [PSCustomObject]@{ + title = $_.name + uid = $_.id + documentName = $name + issuedDate = $_.created + modified = $_.last_modified + state = $_.state + author = $oCkan.result.author + length = $_.size + hash = $_.hash + hashAlgo = $hashAlgo + + description = $_.description + url = $_.url + version = $version + format = $_.format + compression = "" + encryption = "" + } + + $lresource += $oResource + + } + + $oMarti.resources = $lresource + + + return $oMarti + +} diff --git a/source/powershell/ConvertTo-Ckan.ps1 b/source/powershell/ConvertTo-Ckan.ps1 new file mode 100644 index 0000000..139597f --- /dev/null +++ b/source/powershell/ConvertTo-Ckan.ps1 @@ -0,0 +1,2 @@ + + diff --git a/source/powershell/Get-Marti.ps1 b/source/powershell/Get-Marti.ps1 new file mode 100644 index 0000000..5ee4020 --- /dev/null +++ b/source/powershell/Get-Marti.ps1 @@ -0,0 +1,124 @@ + +$script:LogPathName = "" +$script:SoftwareVersion = "0.0.1" + +$global:default_metaFile = "##marti##.mri" + +function Get-LogName { + + $date = Get-Date -f "yyyy-MM-dd" + + if (($null -eq $script:LogPathName) -or ($script:LogPathName -eq "")) + { + return $null + } + + if (!(Test-Path -Path $script:LogPathName)) { + $null = New-Item -Path $script:LogPathName -ItemType Directory + } + + $logName = $(Get-SoftwareName) + "_$date.log" + + return Join-Path -Path $script:LogPathName -ChildPath $logName +} + + +function Write-Log { + param( + [String] $LogEntry + ) + + $sFullPath = Get-LogName + + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + if ($null -ne $sFullPath -and $sFullPath -ne "") { + + if (!(Test-Path -Path $sFullPath)) { + Write-Host "Log path: $sFullPath" + $null = New-Item -Path $sFullPath -ItemType File + } + Add-Content -Path $sFullPath -Value "[$dateTime]. $LogEntry" + } + Write-Debug "[$dateTime]. $LogEntry" + +} + +function Open-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* Start of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Close-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* End of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Get-SoftwareName { + return [String] "MARTIREFERENCE" +} + + + + +function Get-MartiItem +{ + Param( + [Parameter(Mandatory)][PSCustomObject] $MartiDefintiion, + [Parameter(Mandatory)][String] $Title, + [String] $DocumentName, + [String] $Format, + [String] $LogPath + + ) + + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'Get-MartiItem' parameters follow" + Write-Log "Parameter: DocumentName Value: $DocumentName " + Write-Log "Parameter: Filter Value: $Filter " + Write-Log "" + + + if ($null -eq $MartiDefintiion) { + $Global:MartiErrorId = "MRI2101" + $message = "No Marti definition supplied" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + + + if ($null -eq $MartiDefintiion.resources -or $MartiDefintiion.resources.Count -lt 1) { + $Global:MartiErrorId = "MRI2102" + $message = "No documents listed" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + + [System.Collections.ArrayList]$lresource = @() + + $MartiDefintiion.resources | ForEach-Object { + + if ($null -eq $Format -or $Format -eq "*" -or $Format -eq $_.format ) { + if ($Title -ne "*" -and $_.title -eq $Title) { + $lresource += $_ + } else { + if ($DocumentName -ne "*" -and $_.documentName -eq $DocumentName) { + $lresource += $_ + } + } + } + + } + + Close-Log + return $lresource +} + diff --git a/source/powershell/New-Marti.ps1 b/source/powershell/New-Marti.ps1 new file mode 100644 index 0000000..b6463f4 --- /dev/null +++ b/source/powershell/New-Marti.ps1 @@ -0,0 +1,435 @@ + +$script:LogPathName = "" +$script:SoftwareVersion = "0.0.1" + +$global:default_metaFile = "##marti##.mri" + +function Get-LogName { + + $date = Get-Date -f "yyyy-MM-dd" + + if (($null -eq $script:LogPathName) -or ($script:LogPathName -eq "")) + { + return $null + } + + if (!(Test-Path -Path $script:LogPathName)) { + $null = New-Item -Path $script:LogPathName -ItemType Directory + } + + $logName = $(Get-SoftwareName) + "_$date.log" + + return Join-Path -Path $script:LogPathName -ChildPath $logName +} + + +function Write-Log { + param( + [String] $LogEntry + ) + + $sFullPath = Get-LogName + + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + if ($null -ne $sFullPath -and $sFullPath -ne "") { + + if (!(Test-Path -Path $sFullPath)) { + Write-Host "Log path: $sFullPath" + $null = New-Item -Path $sFullPath -ItemType File + } + Add-Content -Path $sFullPath -Value "[$dateTime]. $LogEntry" + } + Write-Debug "[$dateTime]. $LogEntry" + +} + +function Open-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* Start of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Close-Log { + $dateTime = Get-Date -f "yyyy-MM-dd HH:mm:ss" + Write-Log "***********************************************************************************" + Write-Log "* End of processing: [$dateTime]" + Write-Log "***********************************************************************************" +} + +function Get-SoftwareName { + return [String] "MARTIREFERENCE" +} + + + +function New-MartiDefinition +{ + + $oSoftware = [PSCustomObject]@{ + extension = "software" + softwareName = "MartiReference" + author = "Meerkat@merebox.com" + version = "$script:SoftwareVersion" + } + + $publisher = [System.Security.Principal.WindowsIdentity]::GetCurrent().Name + + [System.Collections.ArrayList]$lcustom = @() + $lcustom += $oSoftware + + [System.Collections.ArrayList]$lresource = @() + + $oMarti = [PSCustomObject]@{ + title = "" + uid = (New-Guid).ToString() + resources = $lresource + + description = "" + modified = Get-Date -f "yyyy-MM-ddTHH:mm:ss" + tags = @( "document", "marti") + publisher = $publisher + contactPoint = "" + accessLevel = "Confidential" + rights = "Restricted" + license = "" + state = "active" + + describedBy = "" + landingPage = "" + theme ="" + + custom = $lCustom + } + + return $oMarti +} + + + + +function New-MartiChildItem +{ +Param( + [Parameter(Mandatory)][String] $SourceFolder, + [String] $Filter ="*", + [String] $UrlPath, + [switch] $Recurse, + [switch] $ExcludeHash, + [String] $LogPath + +) + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'New-MartiDefinition' parameters follow" + Write-Log "Parameter: SourceFolder Value: $SourceFolder " + Write-Log "Parameter: Filter Value: $Filter " + Write-Log "Parameter: Recurse Value: $Recurse " + Write-Log "Parameter: ExcludeHash Value: $ExcludeHash " + Write-Log "" + + if ($ExcludeHash) { + $hashAlgo = "" + } + else { + $hashAlgo = "SHA256" + } + $version = "1.1.0" + + $oMarti = New-MartiDefinition + $lresource = $oMarti.resources + + $SourceFullName = (Get-Item -Path $SourceFolder).FullName + + Get-ChildItem $SourceFolder -Filter $Filter -Recurse:$Recurse -Force| Where-Object {!$_.PSIsContainer} | ForEach-Object { + + Write-Log "Define file $_.FullName " + if ($ExcludeHash) { + $hash = "" + } else { + $hash = (Get-FileHash -Path $_.FullName -Algorithm $hashAlgo).Hash + } + + [System.Collections.ArrayList]$lattribute = @() + if ($item.Extension.Substring(1) -eq "CSV") { + $lattribute = New-DefaultAttributes + } + + $oResource = [PSCustomObject]@{ + title = $_.Name.Replace($_.Extension, "") + uid = (New-Guid).ToString() + documentName = $_.Name + issuedDate = Get-Date -f "yyyy-MM-ddTHH:mm:ss" + modified = $_.LastWriteTime.ToString("yyyy-MM-ddTHH:mm:ss") + state = "active" + author = "" + length = $_.Length + hash = $hash + hashAlgo = $hashAlgo + + description = "" + url = "" + version = $version + format = $_.Extension.Substring(1) + compression = "" + encryption = "" + + attributes = $lattribute + } + + if ($null -ne $UrlPath -and $UrlPath -ne "") { + $postfixName = $_.FullName.Replace($SourceFullName, "") + $oResource.url = Join-Path -Path $UrlPath -ChildPath $postfixName + } + + $lresource += $oResource + + } + Write-Log "Captured $($lresource.Count) items" + $oMarti.resources = $lresource + Close-Log + + return $oMarti + +} + +function Set-MartiAttribute +{ +Param( + [System.Collections.ArrayList] $Attributes, + [String] $ACategory, + [String] $AName, + [String] $AFunction, + [String] $Comparison, + [String] $Value +) + + $matched = $false + + $Attributes | ForEach-Object { + + if ($_.category -eq $ACategory -and $_.name -eq $AName -and $_.function -eq $AFunction) { + $matched = $true + $_.comparison = $comparison + $_.value = $value + } + + } + + if (!($matched)) { + + $oAttribute = [PSCustomObject]@{ + category = $Acategory + name = $AName + function = $Afunction + comparison = $comparison + value = $value + } + + $Attributes += $oAttribute + } + + return $Attributes +} + +function New-DefaultCsvAttributes { + + [System.Collections.ArrayList]$lattribute = @() + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "header" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "footer" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "separator" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "columns" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "records" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "columns" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + return $lattribute +} + + +function New-DefaultJsonAttributes { + + [System.Collections.ArrayList]$lattribute = @() + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "list" + function = "offset" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "columns" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "records" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "columns" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + return $lattribute +} + + +function New-MartiItem +{ +Param( + [Parameter(Mandatory)][String] $SourcePath, + [String] $UrlPath = "", + [switch] $ExcludeHash, + [String] $LogPath + +) + $Global:MartiErrorId = "" + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'New-MartiItem' parameters follow" + Write-Log "Parameter: SourceFolder Value: $SourceFolder " + Write-Log "Parameter: ExcludeHash Value: $ExcludeHash " + Write-Log "" + + + if ($ExcludeHash) { + $hashAlgo = "" + } + else { + $hashAlgo = "SHA256" + } + $version = "1.1.0" + + $oMarti = New-MartiDefinition + $lresource = $oMarti.resources + + if (Test-Path -Path $SourcePath -PathType Leaf) { + + $item = Get-Item -Path $SourcePath -Force + + Write-Log "Define file $item.FullName " + if ($ExcludeHash) { + $hash = "" + } else { + $hash = (Get-FileHash -Path $item.FullName -Algorithm $hashAlgo).Hash + } + + [System.Collections.ArrayList]$lattribute = @() + if ($item.Extension.Substring(1) -eq "CSV") { + $lattribute = New-DefaultCsvAttributes + } + if ($item.Extension.Substring(1) -eq "JSON") { + $lattribute = New-DefaultJsonAttributes + } + + $oResource = [PSCustomObject]@{ + title = $item.Name.Replace($item.Extension, "") + uid = (New-Guid).ToString() + documentName = $item.Name + issuedDate = Get-Date -f "yyyy-MM-ddTHH:mm:ss" + modified = $item.LastWriteTime.ToString("yyyy-MM-ddTHH:mm:ss") + state = "active" + author = "" + length = $item.Length + hash = $hash + hashAlgo = $hashAlgo + + description = "" + url = "" + version = $version + format = $item.Extension.Substring(1) + compression = "" + encryption = "" + + attributes = $lattribute + } + + if ($null -ne $UrlPath -and $UrlPath -ne "") { + $oResource.url = Join-Path -Path $UrlPath -ChildPath $_.Name + } + + $lresource += $oResource + + } else { + $Global:MartiErrorId = "MRI2001" + $message = "Document '$SourcePath' not found or is a folder" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + Write-Log "Captured $($lresource.Count) items" + $oMarti.resources = $lresource + Close-Log + + return $oMarti + +} + diff --git a/test/powershell/test_Marti.ps1 b/test/powershell/test_Marti.ps1 new file mode 100644 index 0000000..97c1d8b --- /dev/null +++ b/test/powershell/test_Marti.ps1 @@ -0,0 +1,45 @@ + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\Get-Marti.ps1 +. .\source\powershell\Compress-Marti.ps1 + +Write-Host "Test case #1" +$oMarti = New-MartiChildItem -SourceFolder ".\docs" -Recurse -UrlPath ".\docs" -Filter "*" -LogPath ".\test\powershell\results\Logs" +$oMarti.description = "Sample execution" + +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test01.mri.json" -Value $x + +Write-Host "Test case #2" +$ArchiveFile = ".\test\powershell\results\marti_test02.zip" +Compress-Marti -SourceFolder ".\docs" -Filter "*" -LogPath ".\test\powershell\results\Logs" -ArchiveFile $ArchiveFile + +Write-Host "Test case #3" +$y = Get-MartiItem -MartiDefintiion $oMarti -Title "ckan" -Format "txt" -LogPath ".\test\powershell\results\Logs" +Write-Host "Get item Title: $($y.title)" +Write-Host "Get item Url: $($y.url)" + +Write-Host "Test case #4" +$oMarti = New-MartiItem -SourcePath ".\docs\ckan.md" -LogPath ".\test\powershell\results\Logs" +$oMarti.description = "Sample execution for ckan" + +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test02.mri.json" -Value $x + +$x = ConvertTo-Csv -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test02.mri.csv" -Value $x + +$x = ConvertTo-Xml -As String -InputObject $oMarti -Depth 6 +Set-Content -Path ".\test\powershell\results\marti_test02.mri.xml" -Value $x + +$x = ConvertTo-Html -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test02.mri.html" -Value $x + +Write-Host "Test case #5" +$oMarti = New-MartiItem -SourcePath ".\docs\eror" -LogPath ".\test\powershell\results\Logs" +$oMarti.description = "Sample execution with error" + +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test03.mri.json" -Value $x + + diff --git a/test/powershell/test_MartiCkan.ps1 b/test/powershell/test_MartiCkan.ps1 new file mode 100644 index 0000000..7fb4338 --- /dev/null +++ b/test/powershell/test_MartiCkan.ps1 @@ -0,0 +1,36 @@ + + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\ConvertFrom-Ckan.ps1 + + +$ckan = Get-Content -Path ".\docs\samples\asic_ckan_api.json" -Raw +$oMarti = ConvertFrom-Ckan -InputObject $ckan +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test05.mri.json" -Value $x + + +$covid_1 = Invoke-WebRequest "https://data.nsw.gov.au/data/api/3/action/package_show?id=793ac07d-a5f4-4851-835c-3f7158c19d15" +$oMarti = ConvertFrom-Ckan -InputObject $covid_1 +$oMarti.description = "This data has been converted from NSW CKAN data source with URL 'https://data.nsw.gov.au/data/api/3/action/package_show?id=793ac07d-a5f4-4851-835c-3f7158c19d15'" +$oMarti.tags += "ckan" +$oMarti.tags += "gov" +$oMarti.tags += "nsw" +$oMarti.publisher = "NSW government (Australia)" +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test06.mri.json" -Value $x + + +# cases +$covid19 = "https://data.nsw.gov.au/data/api/3/action/package_show?id=3dc5dc39-40b4-4ee9-8ec6-2d862a916dcf" +#Invoke-WebRequest $covid19 -Method GET -OutFile ".\test\powershell\results\data\nsw_covid19.csv" +$covid_2 = Invoke-WebRequest $covid19 +$oMarti = ConvertFrom-Ckan -InputObject $covid_2 +$oMarti.description = "This data has been converted from NSW CKAN data source with URL '$covid19'" +$oMarti.tags += "ckan" +$oMarti.tags += "gov" +$oMarti.tags += "nsw" +$oMarti.publisher = "NSW government (Australia)" +$x = ConvertTo-Json -InputObject $oMarti +Set-Content -Path ".\test\powershell\results\marti_test07.mri.json" -Value $x + diff --git a/test/powershell/test_MartiData1.ps1 b/test/powershell/test_MartiData1.ps1 new file mode 100644 index 0000000..86c8b95 --- /dev/null +++ b/test/powershell/test_MartiData1.ps1 @@ -0,0 +1,96 @@ + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\ConvertFrom-Ckan.ps1 + +if (!(Test-Path -Path ".\test\powershell\results\data")) { + $null = New-Item -Path ".\test\powershell\results\data" -ItemType Directory +} + +#$x = Get-Content -Path ".\test\results\data\bsb.csv" +$bsbFile = ".\test\powershell\results\data\bsb.csv" +$data = Import-Csv -Path $bsbFile + +$columns = ($data | get-member -type NoteProperty).count +$rows = @($data).count + +Write-Host "Rows: $rows Columns: $columns" + +[System.Collections.ArrayList]$lattribute = @() + +$oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "header" + function = "count" + comparison = "EQ" + value = 0 +} + +$lattribute += $oAttribute + +$oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "footer" + function = "count" + comparison = "EQ" + value = 0 +} + +$lattribute += $oAttribute + +$oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "rows" + function = "count" + comparison = "EQ" + value = $rows +} + +$lattribute += $oAttribute + +$oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "columns" + function = "count" + comparison = "EQ" + value = $columns +} + +$lattribute += $oAttribute + +$oAttribute = [PSCustomObject]@{ + category = "data" + name = "BSB" + function = "sum" + comparison = "EQ" + value = 1032092 +} + +$lattribute += $oAttribute + +$oAttribute = [PSCustomObject]@{ + category = "data" + name = "BSB" + function = "unique" + comparison = "EQ" + value = $rows +} + +$lattribute += $oAttribute + + + +$uq = Get-Content $bsbFile | ConvertFrom-Csv -Header "C1", "C2" | Select-Object "C2" | Sort-Object "C2" -Unique | Group-Object -Property "C2" +$oAttribute = [PSCustomObject]@{ + category = "data" + name = "Institution" + function = "unique" + comparison = "EQ" + value = $uq.Count +} + +$lattribute += $oAttribute + +$x = ConvertTo-Json -InputObject $lattribute +$x +#select -skip 6 +#| select name diff --git a/test/powershell/test_MartiData2.ps1 b/test/powershell/test_MartiData2.ps1 new file mode 100644 index 0000000..8c540fa --- /dev/null +++ b/test/powershell/test_MartiData2.ps1 @@ -0,0 +1,41 @@ + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\ConvertFrom-Ckan.ps1 +. .\source\powershell\Compare-MartiResource.ps1 + + +$bsbFile = ".\test\powershell\results\data\bsb.csv" + +Write-Host ">>>>>>Test case #1" +$x = New-MartiItem -SourcePath $bsbFile -UrlPath "" -ExcludeHash -LogPath ".\test\powershell\results\Logs" + +Write-Host ">>>>>>Test case #2" +$x.resources + +[System.Collections.ArrayList] $Attr = Set-MartiAttribute -Attributes $x.resources[0].attributes -ACategory "dataset" -AName "records" -AFunction "count" -comparison "EQ" -value 10 +$x.resources[0].attributes = Set-MartiAttribute -Attributes $Attr -ACategory "dataset" -AName "columns" -AFunction "count" -comparison "EQ" -value 8 + +Write-Host ">>>>>>Test case #3" +$x.resources[0].attributes + +Write-Host ">>>>>>Test case #4" +$y = Compare-MartiResource -DataSource $bsbFile -Resource $x.resources[0] -LogPath ".\test\powershell\results\Logs" +$y + +$covidFile = ".\test\powershell\results\data\covid-case-locations-20210920-1315.json" + +Write-Host ">>>>>>Test case #5" +$x = New-MartiItem -SourcePath $covidFile -UrlPath "" -ExcludeHash -LogPath ".\test\powershell\results\Logs" + +Write-Host ">>>>>>Test case #6" +$x.resources + +[System.Collections.ArrayList] $Attr = Set-MartiAttribute -Attributes $x.resources[0].attributes -ACategory "dataset" -AName "records" -AFunction "count" -comparison "EQ" -value 10 +$x.resources[0].attributes = Set-MartiAttribute -Attributes $Attr -ACategory "dataset" -AName "columns" -AFunction "count" -comparison "EQ" -value 8 + +Write-Host ">>>>>>Test case #7" +$x.resources[0].attributes + +Write-Host ">>>>>>Test case #8" +$y = Compare-MartiResource -DataSource $covidFile -Resource $x.resources[0] -LogPath ".\test\powershell\results\Logs" +$y diff --git a/test/powershell/test_MartiData3.ps1 b/test/powershell/test_MartiData3.ps1 new file mode 100644 index 0000000..ddb7947 --- /dev/null +++ b/test/powershell/test_MartiData3.ps1 @@ -0,0 +1,18 @@ + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\ConvertFrom-Ckan.ps1 +. .\source\powershell\Compare-MartiResource.ps1 + + +$covidFile = ".\test\powershell\results\data\covid-case-locations-20210920-1315.json" + +$x = New-MartiItem -SourcePath $covidFile -UrlPath "" -ExcludeHash -LogPath ".\test\powershell\results\Logs" + +[System.Collections.ArrayList] $attr = Set-MartiAttribute -Attributes $x.resources[0].attributes -ACategory "dataset" -AName "records" -AFunction "count" -comparison "EQ" -value 516 +$attr = Set-MartiAttribute -Attributes $attr -ACategory "dataset" -AName "columns" -AFunction "count" -comparison "EQ" -value 12 +$x.resources[0].attributes = Set-MartiAttribute -Attributes $attr -ACategory "format" -AName "list" -AFunction "offset" -comparison "EQ" -value "data.monitor" + +$y = Compare-MartiResource -DataSource $covidFile -Resource $x.resources[0] -LogPath ".\test\powershell\results\Logs" +$y + +$attr | Get-Member \ No newline at end of file diff --git a/test/powershell/test_retrievedata.ps1 b/test/powershell/test_retrievedata.ps1 new file mode 100644 index 0000000..54301d2 --- /dev/null +++ b/test/powershell/test_retrievedata.ps1 @@ -0,0 +1,38 @@ + + + +. .\source\powershell\New-Marti.ps1 +. .\source\powershell\ConvertFrom-Ckan.ps1 + + +if (!(Test-Path -Path ".\test\powershell\results\data")) { + $null = New-Item -Path ".\test\powershell\results\data" -ItemType Directory +} + + +$bsb = "ftp://bsb.hostedftp.com/~auspaynetftp/BSB" +$bsb = "http://apnedata.merebox.com.s3.ap-southeast-2.amazonaws.com/au/bsb/BSBDirectory.csv" +Invoke-WebRequest $bsb -Method GET -OutFile ".\test\powershell\results\data\bsb.csv" +#Set-Content -Path ".\test\results\data\bsb.csv" -Value $bsbList.Content +#$bsbList.Content + + +$covid19j = "https://data.nsw.gov.au/data/api/3/action/package_show?id=3dc5dc39-40b4-4ee9-8ec6-2d862a916dcf" +Invoke-WebRequest $covid19j -Method GET -OutFile ".\test\powershell\results\nsw_covid19_age.json" + +$covid19 = "https://data.nsw.gov.au/data/dataset/3dc5dc39-40b4-4ee9-8ec6-2d862a916dcf/resource/24b34cb5-8b01-4008-9d93-d14cf5518aec/download/confirmed_cases_table2_age_group.csv" +Invoke-WebRequest $covid19 -Method GET -OutFile ".\test\powershell\results\data\COVID-19 cases by notification date and age range.csv" + + +$covid19j = "https://data.nsw.gov.au/data/api/3/action/package_show?id=0a52e6c1-bc0b-48af-8b45-d791a6d8e289" +Invoke-WebRequest $covid19j -Method GET -OutFile ".\test\powershell\results\nsw_covid19_location.json" + +$covid19 = "https://data.nsw.gov.au/data/dataset/0a52e6c1-bc0b-48af-8b45-d791a6d8e289/resource/5200e552-0afb-4bde-b20f-f8dd4feff3d7/download/c19_location_09.24.csv" +Invoke-WebRequest $covid19 -Method GET -OutFile ".\test\powershell\results\data\c19_location_09.24.csv" + +$covid19 = "https://data.nsw.gov.au/data/dataset/0a52e6c1-bc0b-48af-8b45-d791a6d8e289/resource/f3a28eed-8c2a-437b-8ac1-2dab3cf760f9/download/covid-case-locations-20210920-1315.json" +Invoke-WebRequest $covid19 -Method GET -OutFile ".\test\powershell\results\data\covid-case-locations-20210920-1315.json" + +$covid19 = "https://data.vic.gov.au/data/dataset/890da9b3-0976-4de3-8028-e0c22b9a0e09#embed-28becc42-9616-4d60-ac8e-a3853dbddb55" +Invoke-WebRequest $covid19 -Method GET -OutFile ".\test\powershell\results\data\covid-case-locations-20210920-1315.json" + diff --git a/tools.md b/tools.md new file mode 100644 index 0000000..5f8ed30 --- /dev/null +++ b/tools.md @@ -0,0 +1,10 @@ +# Tools + +A number of tools are povided that can be incorporated into your +projects that are want to use the metadata transfer reconciliation format +(Marti document). + +You can combine these in different ways such as: + +* Use Python extract program to generate the Marti document +* Use a Java program to reconcile the received data document with the Marti file