martiLQ/source/python/client/martiLQ.py

495 lines
18 KiB
Python
Raw Normal View History

2021-10-13 20:29:30 +00:00
import ftplib
from genericpath import getsize
import os
import uuid
import json
import datetime
import getpass
import hashlib
import glob
2021-11-10 08:55:01 +00:00
import argparse
from configparser import ConfigParser
import requests
import mimetypes
2021-10-13 20:29:30 +00:00
2021-11-10 11:54:01 +00:00
from mconfiguration import mConfiguration
from mlogging import mLogging
from mresource import mResource
2021-10-13 20:29:30 +00:00
class martiLQ:
_SoftwareVersion = "0.0.1"
2021-11-05 10:59:56 +00:00
_default_metaFile = "##marti##.json"
_oSoftware = {
"extension": "software",
"softwareName": "MARTILQREFERENCE",
"author": "Meerkat@merebox.com",
"version": "0.0.1"
}
_MartiErrorId = ""
_oMartiDefinition = None
_oConfiguration = None
2021-11-10 11:54:01 +00:00
_Log = None
2021-10-13 20:29:30 +00:00
def GetSoftwareName(self):
return "MARTILQREFERENCE"
2021-10-13 20:29:30 +00:00
def __init__(self):
2021-11-10 11:54:01 +00:00
self._oConfiguration = mConfiguration()
self._Log = mLogging()
self._Log.SetConfig(self._oConfiguration.GetConfig("logPath"), self._oConfiguration.GetSoftwareName())
2021-11-10 08:55:01 +00:00
2021-11-10 11:54:01 +00:00
def LoadConfig(self, ConfigPath):
self._oConfiguration.LoadConfig(ConfigPath)
2021-11-10 08:55:01 +00:00
2021-11-10 11:54:01 +00:00
def SaveConfig(self, ConfigPath):
return self._oConfiguration.SaveConfig(ConfigPath)
2021-11-10 08:55:01 +00:00
def Set(self, MartiLQ):
self._oMartiDefinition = MartiLQ
2021-10-13 20:29:30 +00:00
2021-11-10 08:55:01 +00:00
def SetTitle(self, Title):
self._oMartiDefinition.title = Title
2021-10-13 20:29:30 +00:00
def Get(self):
return self._oMartiDefinition
def Save(self, JsonPath):
jsonFile = open(JsonPath, "w")
jsonFile.write(json.dumps(self._oMartiDefinition, indent=5))
jsonFile.close()
def Load(self, JsonPath):
self._MartiErrorId = ""
2021-11-10 11:54:01 +00:00
self._Log.OpenLog()
self._Log.WriteLog("Function 'Load' parameters follow")
self._Log.WriteLog("Parameter: SourcePath Value: {}".format(JsonPath))
self._Log.WriteLog("")
2021-11-10 08:55:01 +00:00
if not os.path.exists(JsonPath):
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("martiLQ document file '"+ JsonPath +"' does not exist")
2021-11-10 08:55:01 +00:00
raise Exception("martiLQ document file '{}' does not exist".format(JsonPath))
if not self._oMartiDefinition is None:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("Existing definition overwritten in memory")
jsonFile = open(JsonPath, "r")
self._oMartiDefinition = json.load(jsonFile)
jsonFile.close()
2021-10-13 20:29:30 +00:00
def Close(self):
2021-11-10 11:54:01 +00:00
self._Log.CloseLog()
2021-10-13 20:29:30 +00:00
def NewMartiDefinition(self):
today = datetime.datetime.today()
2021-11-10 08:55:01 +00:00
dateToday = today.strftime("%Y-%m-%dT%H:%M:%S")
2021-10-13 20:29:30 +00:00
2021-11-10 11:54:01 +00:00
publisher = self._oConfiguration.GetConfig("publisher")
2021-11-10 08:55:01 +00:00
if publisher == "":
publisher = getpass.getuser()
2021-10-13 20:29:30 +00:00
lcustom = []
lcustom.append(self._oSoftware)
2021-10-13 20:29:30 +00:00
lresource = []
self._oMartiDefinition = {
"content-type": "application/vnd.martilq.json",
2021-10-13 20:29:30 +00:00
"title": "",
"uid": str(uuid.uuid4()),
"description": "",
"modified": dateToday,
"publisher": publisher,
2021-11-10 11:54:01 +00:00
"contactPoint": self._oConfiguration.GetConfig("contactPoint"),
"accessLevel": self._oConfiguration.GetConfig("accessLevel"),
"rights": self._oConfiguration.GetConfig("rights"),
"tags": self._oConfiguration.GetConfig("tags"),
"license": self._oConfiguration.GetConfig("license"),
"state": self._oConfiguration.GetConfig("state"),
"batch": self._oConfiguration.GetConfig("batch"),
"describedBy": self._oConfiguration.GetConfig("describedBy"),
"landingPage": self._oConfiguration.GetConfig("landingPage"),
"theme": self._oConfiguration.GetConfig("theme"),
2021-10-13 20:29:30 +00:00
"resources": lresource,
2021-10-13 20:29:30 +00:00
"custom": lcustom
}
return self._oMartiDefinition
def Temporal(self):
oTemporal = {
2021-11-10 08:55:01 +00:00
"enabled": False,
"extension": "temporal",
"businessDate": "",
"runDate": ""
}
return oTemporal
def Spatial(self):
oSpatial = {
2021-11-10 08:55:01 +00:00
"enabled": False,
"country": "",
"region": "",
"town": "",
}
return oSpatial
2021-10-13 20:29:30 +00:00
def NewMartiChildItem(self, SourceFolder, UrlPath=None, Recurse=True, ExtendAttributes=True, ExcludeHash=False, Filter ="*"):
2021-10-13 20:29:30 +00:00
2021-11-10 08:55:01 +00:00
if not SourceFolder.endswith("*"):
SourceFullName = os.path.abspath(SourceFolder)
SourceFullName = os.path.join(SourceFullName, Filter)
else:
SourceFullName = os.path.abspath(SourceFolder)
2021-10-13 20:29:30 +00:00
for fullName in glob.iglob(SourceFullName, recursive=Recurse):
if os.path.isfile(fullName):
oResource = self.NewMartiLQResource(SourcePath=fullName, UrlPath=UrlPath, ExtendAttributes=ExtendAttributes, ExcludeHash=ExcludeHash)
2021-11-10 08:55:01 +00:00
if self._oMartiDefinition["resources"] is None:
print("MartiLQ defintion resources not initialised")
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("MartiLQ defintion resources not initialised")
self._oMartiDefinition["resources"].append(oResource)
2021-11-10 08:55:01 +00:00
def NewMartiLQResource(self, SourcePath, UrlPath, ExcludeHash, ExtendAttributes):
2021-10-13 20:29:30 +00:00
self._MartiErrorId = ""
2021-11-10 11:54:01 +00:00
oRes = mResource()
oRes.SetConfig(self._oConfiguration)
2021-10-13 20:29:30 +00:00
2021-11-10 11:54:01 +00:00
resource = oRes.NewMartiLQResource(SourcePath, UrlPath, ExcludeHash, ExtendAttributes)
2021-10-13 20:29:30 +00:00
2021-11-10 11:54:01 +00:00
return resource
2021-10-13 20:29:30 +00:00
2021-10-13 20:29:30 +00:00
def FtpPull(self, host, file_remote, file_local):
with ftplib.FTP(host) as ftp:
try:
ftp.login()
with open(file_local, 'wb') as fl:
res = ftp.retrbinary(f"RETR {file_remote}", fl.write)
if not res.startswith('226 Transfer complete'):
2021-11-10 08:55:01 +00:00
print('Download failed for: '+file_remote)
2021-11-10 11:54:01 +00:00
self._Log.WriteLog('Download failed for: '+file_remote)
if os.path.isfile(file_local):
os.remove(file_local)
except ftplib.all_errors as e:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog('FTP error:', e)
if os.path.isfile(file_local):
os.remove(file_local)
def Fetch(self, TargetPath):
if TargetPath is None or TargetPath == "":
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("Target path is missing from fetch")
raise Exception("Target path is missing from fetch")
if self._oMartiDefinition is None:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("No defintion loaded")
raise Exception("No defintion loaded")
if len(self._oMartiDefinition["resources"]) < 1:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("No resources in defintion")
raise Exception("No resources in defintion")
2021-11-10 11:54:01 +00:00
if not os.path.exists(TargetPath):
os.makedirs(TargetPath, exist_ok=True)
fetched_files = []
fetch_error = []
for resource in self._oMartiDefinition["resources"]:
if not resource["url"] is None and not resource["url"] == "":
method = str(resource["url"].split(":", 2)[0]).lower()
2021-11-10 08:55:01 +00:00
if method == "ftp":
parts = resource["url"].split("/", 3)
host = parts[2]
file_remote = parts[3]
self.FtpPull(host, file_remote, os.path.join(TargetPath, resource["documentName"]))
fetched_files.append(os.path.join(TargetPath, resource["documentName"]))
2021-11-10 08:55:01 +00:00
elif method == "http" or method == "https":
response = requests.get(resource["url"])
if not response.status_code == 200:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("HTTP fetch failed with code {} for '{}'".format(response.status_code, resource["url"]))
2021-11-10 08:55:01 +00:00
print("HTTP fetch failed with code {} for '{}'".format(response.status_code, resource["url"]))
fetch_error.append(resource["url"])
else:
with open(os.path.join(TargetPath, resource["documentName"]),'wb') as fh:
fh.write(response.content)
fetched_files.append(os.path.join(TargetPath, resource["documentName"]))
2021-11-10 08:55:01 +00:00
elif method == "file":
pass
2021-11-10 08:55:01 +00:00
else:
fetch_error.append(resource["documentName"])
else:
fetch_error.append(resource["documentName"])
return fetched_files, fetch_error
2021-10-13 20:29:30 +00:00
def TestAttributeDefinition(self, oTestResults, documentName, localR, remoteR):
errorCount = 0
for attrL in localR:
if attrL["comparison"] != "NA":
try:
for attrR in remoteR:
if attrL["category"] == attrR["category"] and attrL["name"] == attrR["name"] and attrL["function"] == attrR["function"]:
match = False
if attrL["comparison"] == "EQ":
match = attrL["value"] == attrR["value"]
otest = [documentName, "Attribute", (attrL["category"]+" " + attrL["name"]+" " + attrL["function"]), match, attrL["value"], attrR["value"] ]
oTestResults.append(otest)
if not match:
errorCount = errorCount + 1
break
except Exception as e:
print(e.message)
print("ERROR with: {}".format(attrL["name"]))
otest = [documentName, "Attribute", attrL["name"], False, "N/F", "N/F" ]
oTestResults.append(otest)
errorCount = errorCount + 1
return errorCount
def TestMartiDefinition(self, SourcePath, Sign=None):
2021-10-13 20:29:30 +00:00
self._MartiErrorId = ""
2021-10-13 20:29:30 +00:00
2021-11-10 11:54:01 +00:00
self._Log.OpenLog()
self._Log.WriteLog("Function 'TestMartiDefinition' parameters follow")
self._Log.WriteLog("Parameter: SourcePath Value: {}".format(SourcePath))
self._Log.WriteLog("")
2021-10-13 20:29:30 +00:00
if self._oMartiDefinition is None:
2021-10-13 20:29:30 +00:00
pass
if not os.path.exists(SourcePath):
2021-10-13 20:29:30 +00:00
pass
jsonFile = open(SourcePath, "r")
2021-10-13 20:29:30 +00:00
lq = json.load(jsonFile)
jsonFile.close()
testError = 0
2021-10-13 20:29:30 +00:00
oTestResults = []
otest = ["ResourceName", "Level", "Metric", "Matches", "LocalCalculation", "SuppliedValue" ]
oTestResults.append(otest)
2021-11-05 10:59:56 +00:00
otest = ["@", "Batch", "Resource count", (len(self._oMartiDefinition["resources"]) == len(lq["resources"])), len(self._oMartiDefinition["resources"]), len(lq["resources"]) ]
2021-10-13 20:29:30 +00:00
oTestResults.append(otest)
for resource in self._oMartiDefinition["resources"]:
2021-10-13 20:29:30 +00:00
for retarget in lq["resources"]:
if resource["documentName"] == retarget["documentName"]:
if retarget["hash"]["signed"]:
# Need to verify the hash
if Sign is None:
2021-11-10 11:54:01 +00:00
Sign = self._oConfiguration.GetConfig("signKey_file")
if Sign is None:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("No Sign Key specified so Hash check cannot be performed for signed content")
else:
try:
import OpenSSL
from OpenSSL import crypto
import base64
except ImportError:
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("Import error in signed verification")
pub_key_file = open(Sign, "r")
pubkey = pub_key_file.read()
pub_key_file.close()
x509 = crypto.X509()
x509.set_pubkey(pubkey)
try:
crypto.verify(x509, retarget["hash"]["value"], resource["hash"]["value"], retarget["hash"]["algo"])
otest = [resource["documentName"], "Resource", "Hash",False, resource["hash"]["value"], retarget["hash"]["value"] ]
except:
testError = testError + 1
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("Error in verification for {}".format(resource["documentName"]))
otest = [resource["documentName"], "Resource", "Hash", True, resource["hash"]["value"], retarget["hash"]["value"] ]
oTestResults.append(otest)
pass
else:
if not resource["hash"]["value"] == retarget["hash"]["value"]:
testError = testError + 1
otest = [resource["documentName"], "Resource", "Hash", (resource["hash"]["value"] == retarget["hash"]["value"]), resource["hash"]["value"], retarget["hash"]["value"] ]
oTestResults.append(otest)
if not resource["length"] == retarget["length"]:
testError = testError + 1
2021-10-13 20:29:30 +00:00
otest = [resource["documentName"], "Resource", "Length", (resource["length"] == retarget["length"]), resource["length"], retarget["length"] ]
oTestResults.append(otest)
errorAttrCount = self.TestAttributeDefinition(oTestResults, resource["documentName"], resource["attributes"], retarget["attributes"])
testError = testError + errorAttrCount
2021-10-13 20:29:30 +00:00
break
2021-11-10 11:54:01 +00:00
self._Log.WriteLog("TestMartiDefinition function completed with {} errors".format(testError))
2021-10-13 20:29:30 +00:00
return oTestResults, testError
2021-11-10 08:55:01 +00:00
def Make(ConfigPath, SourcePath, Filter, Recursive, UrlPrefix, DefinitionPath):
oMarti = martiLQ()
if ConfigPath != "":
oMarti.LoadConfig(ConfigPath)
oMarti.NewMartiDefinition()
oMarti.NewMartiChildItem(SourceFolder=SourcePath, UrlPath=UrlPrefix , ExcludeHash=False, Filter=Filter, Recurse=Recursive, ExtendAttributes=True)
if DefinitionPath != "":
oMarti.Save(DefinitionPath)
return oMarti
def GetResources(ConfigPath, OutputPath, DefinitionPath, Proxy=None, ProxyUser=None,ProxyCredential=None):
oMarti = martiLQ()
if ConfigPath != "":
oMarti.LoadConfig(ConfigPath)
oMarti.Load(DefinitionPath)
2021-11-10 11:54:01 +00:00
oMarti._oConfiguration.SetConfig("proxy", Proxy)
2021-11-10 08:55:01 +00:00
fetched_files, fetch_error = oMarti.Fetch(OutputPath)
if len(fetch_error) > 0:
print("Fetch file error")
else:
print("Fetched files")
return fetched_files, fetch_error
def main():
parser = argparse.ArgumentParser(description='Processing for MartiLQ')
parser.add_argument("-t", "--task", dest="task", type=str,
choices=["INIT", "MAKE", "GET", "RECON"],
help='task to execute')
parser.add_argument("-s", "--source", dest="sourcePath",
help='path to source documents')
parser.add_argument("-c", "--config", dest="configPath",
help='path to source documents')
parser.add_argument("-m", "--martilq", dest="definitionPath",
help='martiLQ document path')
parser.add_argument("-o", "--output", dest="outputPath",
help="output file path")
parser.add_argument("-u", "--url", dest="urlPrefix",
help="URL prefix for documents")
parser.add_argument("-R", "--recursive", action="store_false",
help="recursive processing for source")
parser.add_argument("--udpate", action="store_false",
help="allow update of existing martiLQ document")
parser.add_argument("--title", dest="title",
help="title for martiLQ document")
parser.add_argument("--filter", dest="filter",
default="*",
help="filter for source documents")
parser.add_argument("--description", dest="description",
help="decription for document")
parser.add_argument("--landing", dest="landing",
help="landing detail for martiLQ document")
args = parser.parse_args()
if args.task == "INIT":
if args.configPath is None or args.configPath == "":
raise Exception("Configuration path parameter required")
m = martiLQ()
if m.SaveConfig(args.configPath):
print("Saved martiLQ configuration: " + args.configPath)
else:
print("Error in saving configuration file")
2021-11-10 11:54:01 +00:00
m.Close()
2021-11-10 08:55:01 +00:00
if args.task == "MAKE":
if args.sourcePath is None or args.sourcePath == "":
raise Exception("Source path parameter required")
if args.definitionPath is None or args.definitionPath == "":
raise Exception("martiLQ document (json) path and name parameter required")
m = Make(ConfigPath=args.configPath, SourcePath=args.sourcePath, Filter=args.filter, Recursive=args.recursive, UrlPrefix=args.urlPrefix, DefinitionPath=args.definitionPath)
if args.title != "":
m.Get()["title"] = args.title
if args.description != "":
m.Get()["description"] = args.description
m.Save(args.definitionPath)
2021-11-10 11:54:01 +00:00
m.Close()
2021-11-10 08:55:01 +00:00
print("Saved martiLQ document: " + args.definitionPath)
if args.task == "GET":
if args.outputPath is None or args.outputPath == "":
raise Exception("Output path parameter required")
if args.definitionPath is None or args.definitionPath == "":
raise Exception("martiLQ document (json) path and name parameter required")
fetched_files, fetch_error = GetResources(ConfigPath=args.configPath, OutputPath=args.outputPath, DefinitionPath=args.definitionPath)
for item in fetched_files:
print("\t"+item)
print("GET Feature done")
if args.task == "RECON":
print("RECON Feature not imlemented yet")
if __name__ == "__main__":
main()