diff --git a/.gitignore b/.gitignore index 1fc2913..2f2bb1b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ buildNumber.properties .mvn/wrapper/maven-wrapper.jar test/powershell/results/ +docs/samples/powershell/test/ diff --git a/docs/README.md b/docs/README.md index ff54607..e7b1620 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,5 +21,4 @@ can adjust if they resonate with your circumstances, 6. [Comparison of marti definition](comparison.md) 7. [References](references.md) - [!INCLUDE [marti High Level Definition](../marti.md)] diff --git a/docs/samples/powershell/Invoke-BSBSample.ps1 b/docs/samples/powershell/Invoke-BSBSample.ps1 new file mode 100644 index 0000000..49a1b36 --- /dev/null +++ b/docs/samples/powershell/Invoke-BSBSample.ps1 @@ -0,0 +1,130 @@ + +. C:\Users\meerkat\source\marti\source\powershell\New-Marti.ps1 +. C:\Users\meerkat\source\marti\source\powershell\Add-MartiItem.ps1 +. C:\Users\meerkat\source\marti\source\powershell\Get-Marti.ps1 +. C:\Users\meerkat\source\marti\source\powershell\Compress-Marti.ps1 +. C:\Users\meerkat\source\marti\source\powershell\Get-MartiFileAttributes.ps1 + + + +function PullFtpFile { + param ( + [String] $RemoteFile, + [String] $OutputPath, + [String] $Username, + [String] $Password, + [int] $Buffersize = 1024 + ) + + + + $FTPRequest = [System.Net.FtpWebRequest]::Create($RemoteFile) + $FTPRequest.Credentials = New-Object System.Net.NetworkCredential($Username,$Password) + $FTPRequest.Method = [System.Net.WebRequestMethods+Ftp]::DownloadFile + $FTPRequest.UseBinary = $true + $FTPRequest.KeepAlive = $false + + $FTPResponse = $FTPRequest.GetResponse() + $ResponseStream = $FTPResponse.GetResponseStream() + + $LocalFileStream = New-Object IO.FileStream ($OutputPath,[IO.FileMode]::Create) + if ($null -eq $LocalFileStream) { + Write-Host "Write failed to file $OutputPath" + return + } + [byte[]]$ReadBuffer = New-Object byte[] $Buffersize + + # Loop through the download + do { + $ReadLength = $ResponseStream.Read($ReadBuffer,0,$Buffersize) + $LocalFileStream.Write($ReadBuffer,0,$ReadLength) + } + while ($ReadLength -gt 0) + $LocalFileStream.close() +} + + +function ListFtpDirectory { + param ( + [String] $RemoteFile, + [String] $Username, + [String] $Password, + [int] $Buffersize = 1024 + ) + + $FTPRequest = [System.Net.FtpWebRequest]::Create($RemoteFile) + $FTPRequest.Credentials = New-Object System.Net.NetworkCredential($Username,$Password) + $FTPRequest.Method = [System.Net.WebRequestMethods+Ftp]::ListDirectory + $FTPRequest.UseBinary = $false + $FTPRequest.KeepAlive = $false + + $FTPResponse = $FTPRequest.GetResponse() + $ResponseStream = $FTPResponse.GetResponseStream() + + $Encoding = new-object System.Text.AsciiEncoding + $ListBuffer = "" + [byte[]]$ReadBuffer = New-Object byte[] $Buffersize + + do { + $ReadLength = $ResponseStream.Read($ReadBuffer,0,$Buffersize) + $ListBuffer += ($Encoding.GetString($ReadBuffer, 0, $ReadLength)) + } + while ($ReadLength -ne 0) + + $list = $ListBuffer.Split([Environment]::NewLine) + return $list +} + +$remoteDirectory = "ftp://bsb.hostedftp.com/~auspaynetftp/BSB/" +# Change local directory +$localDirectory = "./test/powershell/results/data" +$localDirectory = "./test" + +Write-Host "First fetch the BSB files " -ForeGroundColor Green + +$fileList = ListFtpDirectory -Username "anonymous" -Password "anon@merebox.com" -RemoteFile $remoteDirectory +Write-Host "File list size: $($fileList.count)" + +$oMarti = New-MartiDefinition +ForEach ($item in $fileList) { + if ($item -ne "" -and $item.startswith("BSBDirectory")) { + #Write-Host "Pulling file: $item" + PullFtpFile -Username "anonymous" -Password "anon@merebox.com" -RemoteFile ($remoteDirectory + $item) -OutputPath (Join-Path -Path $localDirectory -ChildPath $item) + Write-Host "Add BSB $item file to Remote marti metadata sample " -ForeGroundColor Yellow + $oResource = Add-MartiItem -SourcePath (Join-Path -Path $localDirectory -ChildPath $item) -UrlPath $remoteDirectory -LogPath ".\test\Logs" -ExtendAttributes + $oMarti.resources += $oResource + } +} + +$fileJson = Join-Path -Path $localDirectory -ChildPath "MartiBSBRemote.mri.json" +$oMarti | ConvertTo-Json -depth 100 | Out-File $fileJson +Write-Host "Remote marti definition file is $fileJson " -ForeGroundColor Green + +Write-Host "Now iterate through the local files and build ZIP " -ForeGroundColor Green + +if ($fileList -lt 0) { + $zipFile = Join-Path -Path $localDirectory -ChildPath "BSBDirectory.zip" + if (Test-Path -Path $zipFile) { + Remove-Item -Path $zipFile + } + foreach($file in Get-ChildItem $localDirectory) + { + if ($file.Name.startswith("BSBDirectory")) { + Write-Host "Add BSB file $file to Local marti metadata sample " -ForeGroundColor Yellow + Compress-Archive -Path $file.FullName -DestinationPath $zipFile -Update + } + } +} + +$oMarti = New-MartiChildItem -SourceFolder $localDirectory -UrlPath "./test" -Filter "BSBDirectory*" -LogPath ".\test\Logs" -ExtendAttributes +$oMarti.title = "Local_BSB_data" +$oMarti.description = "This definition covers the local BSB data files `r downloaded from the Australian Payment Network" +$oMarti.contactPoint = "meerkat@merebox.com" +$oMarti.landingPage = "https://github.com/meerkat-manor/marti/blob/main/docs/samples/asic_ckan_api.json" +$oMarti.theme = "payment" + +$fileJson = Join-Path -Path $localDirectory -ChildPath "MartiBSBLocal.mri.json" +$oMarti | ConvertTo-Json -depth 100 | Out-File $fileJson +Write-Host "Local marti definition file is $fileJson " -ForeGroundColor Green + +Write-Host "Sample execution completed" -ForeGroundColor Green diff --git a/source/powershell/Add-MartiItem.ps1 b/source/powershell/Add-MartiItem.ps1 new file mode 100644 index 0000000..59273f2 --- /dev/null +++ b/source/powershell/Add-MartiItem.ps1 @@ -0,0 +1,85 @@ + +function Add-MartiItem +{ +Param( + [Parameter(Mandatory)][String] $SourcePath, + [String] $UrlPath = "", + [switch] $ExcludeHash, + [switch] $ExtendAttributes, + [String] $LogPath + +) + $Global:MartiErrorId = "" + $script:LogPathName = $LogPath + + Write-Debug "Parameter: LogPath Value: $LogPath " + Open-Log + Write-Log "Function 'Add-MartiItem' parameters follow" + Write-Log "Parameter: SourcePath Value: $SourcePath " + Write-Log "Parameter: ExcludeHash Value: $ExcludeHash " + Write-Log "" + + + if (Test-Path -Path $SourcePath -PathType Leaf) { + + $item = Get-Item -Path $SourcePath -Force + + Write-Log "Define file $($item.FullName) " + + if ($ExcludeHash) { + $hashAlgo = "" + } + else { + $hashAlgo = "SHA256" + } + if ($ExcludeHash) { + $hash = "" + } else { + $hash = (Get-FileHash -Path $item.FullName -Algorithm $hashAlgo).Hash + } + + $lattribute = Get-MartiFileAttributes -Path $item.FullName -FileType $item.Extension.Substring(1) -ExtendedAttributes:$ExtendAttributes + + $oResource = [PSCustomObject]@{ + title = $item.Name.Replace($item.Extension, "") + uid = (New-Guid).ToString() + documentName = $item.Name + issuedDate = Get-Date -f "yyyy-MM-ddTHH:mm:ss" + modified = $item.LastWriteTime.ToString("yyyy-MM-ddTHH:mm:ss") + state = "active" + author = "" + length = $item.Length + hash = $hash + hashAlgo = $hashAlgo + + description = "" + url = "" + version = $version + format = $item.Extension.Substring(1) + compression = "" + encryption = "" + + attributes = $lattribute + } + + if ($null -ne $UrlPath -and $UrlPath -ne "") { + if ($UrlPath[$UrlPath.Length-1] -eq "/" -or $UrlPath[$UrlPath.Length-1] -eq "\\") { + $oResource.url = $UrlPath.Replace("\\", "/") + $item.Name + } else { + $oResource.url = $UrlPath.Replace("\\", "/") + "/" + $item.Name + } + } + + } else { + $Global:MartiErrorId = "MRI2001" + $message = "Document '$SourcePath' not found or is a folder" + Write-Log ($message + " " + $Global:MartiErrorId) + Close-Log + throw $message + } + Close-Log + + return $oResource + +} + diff --git a/source/powershell/Get-MartiFileAttributes.ps1 b/source/powershell/Get-MartiFileAttributes.ps1 new file mode 100644 index 0000000..a694c1f --- /dev/null +++ b/source/powershell/Get-MartiFileAttributes.ps1 @@ -0,0 +1,288 @@ + + +function New-DefaultCsvAttributes { + + [System.Collections.ArrayList]$lattribute = @() + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "header" + function = "count" + comparison = "NA" + value = 1 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "footer" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "separator" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "columns" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "records" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "columns" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + return $lattribute +} + + +function New-DefaultJsonAttributes { + + [System.Collections.ArrayList]$lattribute = @() + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "list" + function = "offset" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "columns" + function = "value" + comparison = "NA" + value = "," + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "records" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "columns" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + return $lattribute +} + +function New-DefaultZipAttributes { + Param ( + [String] $CompressionType = "ZIP", + [String] $Encryption = "" + ) + + [System.Collections.ArrayList]$lattribute = @() + + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "compression" + function = "algorithm" + comparison = "NA" + value = $CompressionType + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "format" + name = "encryption" + function = "algorithm" + comparison = "NA" + value = $Encryption + } + $lattribute += $oAttribute + + $oAttribute = [PSCustomObject]@{ + category = "dataset" + name = "files" + function = "count" + comparison = "NA" + value = 0 + } + $lattribute += $oAttribute + + + return $lattribute +} + +function Update-AttributeValueString { + Param ( + # Attribute List + [Parameter(Mandatory)] [System.Collections.ArrayList] $lAttribute, + # Attribute Category + [Parameter(Mandatory)] [String] $Category, + # Attribute Key Name + [Parameter(Mandatory)] [String] $Key, + # Attribute Function + [Parameter(Mandatory)] [String] $Function, + # Attribute Value + [Parameter(Mandatory)] [String] $Value, + # Attribute Comparison + [String] $Comparison = "EQ" + ) + + foreach ($item in $lAttribute) + { + if ($item.category -eq $Category -and $item.name -eq $Key -and $item.function -eq $Function) + { + if ($item.comparison -eq "NA" -or $item.comparison -eq $Comparison) { + $item.comparison = $Comparison + $item.value = $Value + return + } + } + } + + Write-Error "No match found for attribute name '$Key' and category '$Category' and function '$Function' " +} + + +function Update-AttributeValueNumber { + Param ( + # Attribute List + [Parameter(Mandatory)] [System.Collections.ArrayList] $Attributes, + # Attribute Category + [Parameter(Mandatory)] [String] $Category, + # Attribute Key Name + [Parameter(Mandatory)] [String] $Key, + # Attribute Function + [Parameter(Mandatory)] [String] $Function, + # Attribute Value + [Parameter(Mandatory)] [Decimal] $Value, + # Attribute Comparison + [String] $Comparison = "EQ" + ) + + foreach ($item in $Attributes) + { + if ($item.category -eq $Category -and $item.name -eq $Key -and $item.function -eq $Function) + { + if ($item.comparison -eq "NA" -or $item.comparison -eq $Comparison) { + $item.comparison = $Comparison + $item.value = $Value + return + } + } + } + + Write-Error "No match found for attribute name '$Key' and category '$Category' and function '$Function' " +} + + +function Get-MartiFileAttributes { + Param ( + # File path + [Parameter(Mandatory)] [String] $Path, + # File type + [Parameter(Mandatory)] [String] $FileType, + # Process the file for attributes + [Switch] $ExtendedAttributes + ) + + + if ($FileType -eq "CSV") { + $lattribute = New-DefaultCsvAttributes + + if ($ExtendedAttributes) { + $delimiter = "," + $rowCount = 0 + $colCount = 0 + $csvData = Import-Csv $Path -Delimiter $delimiter + foreach ($datum in $csvData) { + $cc = (Get-Member -InputObject $datum -type NoteProperty).count + if ($colCount -lt $cc) { + $colCount = $cc + } + $rowCount += 1 + } + Update-AttributeValueNumber -Attributes $lattribute -Key "records" -Category "dataset" -Function "count" -Value $rowCount + Update-AttributeValueNumber -Attributes $lattribute -Key "columns" -Category "dataset" -Function "count" -Value $colCount + } + } + + + if ($FileType -eq "TXT") { + $lattribute = New-DefaultCsvAttributes + + if ($ExtendedAttributes) { + $delimiter = "`t" + $rowCount = 0 + $colCount = 0 + $csvData = Import-Csv $Path -Delimiter $delimiter + foreach ($datum in $csvData) { + $cc = (Get-Member -InputObject $datum -type NoteProperty).count + if ($colCount -lt $cc) { + $colCount = $cc + } + $rowCount += 1 + } + Update-AttributeValueNumber -Attributes $lattribute -Key "records" -Category "dataset" -Function "count" -Value $rowCount + Update-AttributeValueNumber -Attributes $lattribute -Key "columns" -Category "dataset" -Function "count" -Value $colCount + } + } + + if ($FileType -eq "JSON") { + $lattribute = New-DefaultJsonAttributes + } + + if ($FileType -eq "ZIP") { + $lattribute = New-DefaultZipAttributes -CompressionType "ZIP" + if ($ExtendedAttributes) { + $shell = New-Object -Com Shell.Application + $zipFile = $shell.NameSpace($Path) + $items = $zipFile.Items() + Update-AttributeValueNumber -Attributes $lattribute -Key "files" -Category "dataset" -Function "count" -Value $items.Count + } + } + + if ($FileType -eq "7Z") { + $lattribute = New-DefaultZipAttributes -CompressionType "7Z" + } + + if ($null -eq $lattribute) { + [System.Collections.ArrayList]$lattribute = @() + } + + return $lattribute +} + diff --git a/source/powershell/New-Marti.ps1 b/source/powershell/New-Marti.ps1 index b6463f4..f94ee8c 100644 --- a/source/powershell/New-Marti.ps1 +++ b/source/powershell/New-Marti.ps1 @@ -115,6 +115,7 @@ Param( [String] $Filter ="*", [String] $UrlPath, [switch] $Recurse, + [switch] $ExtendAttributes, [switch] $ExcludeHash, [String] $LogPath @@ -145,17 +146,14 @@ Param( Get-ChildItem $SourceFolder -Filter $Filter -Recurse:$Recurse -Force| Where-Object {!$_.PSIsContainer} | ForEach-Object { - Write-Log "Define file $_.FullName " + Write-Log "Define file $($_.FullName) " if ($ExcludeHash) { $hash = "" } else { $hash = (Get-FileHash -Path $_.FullName -Algorithm $hashAlgo).Hash } - [System.Collections.ArrayList]$lattribute = @() - if ($item.Extension.Substring(1) -eq "CSV") { - $lattribute = New-DefaultAttributes - } + $lattribute = Get-MartiFileAttributes -Path $_.FullName -FileType $_.Extension.Substring(1) -ExtendedAttributes:$ExtendAttributes $oResource = [PSCustomObject]@{ title = $_.Name.Replace($_.Extension, "") @@ -189,6 +187,7 @@ Param( } Write-Log "Captured $($lresource.Count) items" $oMarti.resources = $lresource + Close-Log return $oMarti @@ -234,118 +233,13 @@ Param( return $Attributes } -function New-DefaultCsvAttributes { - - [System.Collections.ArrayList]$lattribute = @() - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "header" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "footer" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "format" - name = "separator" - function = "value" - comparison = "NA" - value = "," - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "format" - name = "columns" - function = "value" - comparison = "NA" - value = "," - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "records" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "columns" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - return $lattribute -} - - -function New-DefaultJsonAttributes { - - [System.Collections.ArrayList]$lattribute = @() - - $oAttribute = [PSCustomObject]@{ - category = "format" - name = "list" - function = "offset" - comparison = "NA" - value = "," - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "format" - name = "columns" - function = "value" - comparison = "NA" - value = "," - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "records" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - $oAttribute = [PSCustomObject]@{ - category = "dataset" - name = "columns" - function = "count" - comparison = "NA" - value = 0 - } - $lattribute += $oAttribute - - return $lattribute -} - - function New-MartiItem { Param( [Parameter(Mandatory)][String] $SourcePath, [String] $UrlPath = "", [switch] $ExcludeHash, + [switch] $ExtendAttributes, [String] $LogPath ) @@ -355,7 +249,7 @@ Param( Write-Debug "Parameter: LogPath Value: $LogPath " Open-Log Write-Log "Function 'New-MartiItem' parameters follow" - Write-Log "Parameter: SourceFolder Value: $SourceFolder " + Write-Log "Parameter: SourcePath Value: $SourcePath " Write-Log "Parameter: ExcludeHash Value: $ExcludeHash " Write-Log "" @@ -375,20 +269,14 @@ Param( $item = Get-Item -Path $SourcePath -Force - Write-Log "Define file $item.FullName " + Write-Log "Define file $($item.FullName) " if ($ExcludeHash) { $hash = "" } else { $hash = (Get-FileHash -Path $item.FullName -Algorithm $hashAlgo).Hash } - [System.Collections.ArrayList]$lattribute = @() - if ($item.Extension.Substring(1) -eq "CSV") { - $lattribute = New-DefaultCsvAttributes - } - if ($item.Extension.Substring(1) -eq "JSON") { - $lattribute = New-DefaultJsonAttributes - } + $lattribute = Get-MartiFileAttributes -Path $item.FullName -FileType $item.Extension.Substring(1) -ExtendedAttributes:$ExtendAttributes $oResource = [PSCustomObject]@{ title = $item.Name.Replace($item.Extension, "") @@ -413,7 +301,11 @@ Param( } if ($null -ne $UrlPath -and $UrlPath -ne "") { - $oResource.url = Join-Path -Path $UrlPath -ChildPath $_.Name + if ($UrlPath[$UrlPath.Length-1] -eq "/" -or $UrlPath[$UrlPath.Length-1] -eq "\\") { + $oResource.url = $UrlPath.Replace("\\", "/") + "/" + $_.Name + } else { + $oResource.url = $UrlPath.Replace("\\", "/") + "/" + $_.Name + } } $lresource += $oResource