# 模板classModelHandler(BaseHandler):"""
A custom model handler implementation.
"""def__init__(self):
self._context =None
self.initialized =Falsedefinitialize(self, context):"""
Initialize model. This will be called during model loading time
:param context: Initial context contains model server system properties.
:return:
"""
self._context = context
self.initialized =True
properties = context.system_properties
# load the model
self.manifest = context.manifest
model_dir = properties.get("model_dir")
self.device = torch.device("cuda:"+str(properties.get("gpu_id"))if torch.cuda.is_available()else"cpu")# Read model serialize/pt file
serialized_file = self.manifest['model']['serializedFile']
model_pt_path = os.path.join(model_dir, serialized_file)ifnot os.path.isfile(model_pt_path):raise RuntimeError("Missing the model.pt file")
self.model = torch.jit.load(model_pt_path)
self.model.to(self.device)...
self.initialized =Truedefpreprocess(self, data:bytes):"""
Transform raw input into model input data.
:param batch: list of raw requests, should match batch size
:return: list of preprocessed model input data
"""# Take the input data and make it inference ready
text = data[0].get("data")or data[0].get("body")# 异常判断if text isNone:
warnings.warn("data params is none")raise Exception("no data")else:
text = text.decode()# 预处理, 获取batch...definference(self, texts_tokens_, positional_enc):"""
Internal inference methods
:param model_input: transformed model input data
:return: list of inference output in NDArray
"""# Do some inference call to engine here and return output
predictions = self.model.forward(texts_tokens_, positional_enc)...defpostprocess(self, inference_output):"""
Return inference result.
:param inference_output: list of inference output
:return: list of predict results
"""# Take output from network and post-process to desired format
postprocess_output = inference_output
return postprocess_output
defhandle(self, data, context):"""
Invoke by TorchServe for prediction request.
Do pre-processing of data, prediction using model and postprocessing of prediciton output
:param data: Input data for prediction
:param context: Initial context contains model server system properties.
:return: prediction output
"""
self.preprocess(data)
self.inference()
self.postprocess(model_output)...
service = ModelHandler()defhandle(data, context):ifnot service.initialized:
service.initialize(context)if data isNone:returnNonereturn service.handle(data, context)