Есть ли способ преобразовать тип tvm.runtime.module.Module в тип tvm.IRModule?

Я новичок в ТВМ. У меня есть оптимизированная модель TVM в виде файла tar. Я пытаюсь сравнить его, запустив на нем вывод. Я ссылался на код бенчмаркинга и на этот файл .

Вот мой код:

      import numpy as np
import os
import tvm
from tvm import te
from tvm.contrib import graph_executor, graph_runtime
from tvm import relay
from tvm.runtime import container
from tvm.runtime import vm as vm_rt
from tvm.relay import testing
from tvm.relay import vm


def benchmark_execution(
    mod,
    params,
    measure=True,
    data_shape=(1, 3, 224, 224),
    out_shape=(1, 1000),
    dtype="float32",
    model="unknown",
):
    def get_graph_executor_output(
        mod, data, params, target, dev, dtype="float32", number=2, repeat=20
    ):
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod, target, params=params)

        m = graph_executor.GraphModule(lib["default"](dev))
        # set inputs
        m.set_input("data", data)
        m.run()
        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

        if measure:
            print("Evaluate graph executor inference cost of {} on " "{}".format(model,     repr(dev)))
            ftimer = m.module.time_evaluator("run", dev, number=1, repeat=20)
            # Measure in millisecond.
            prof_res = np.array(ftimer().results) * 1000
            print(
                "Mean graph executor inference time (std dev): %.2f ms (%.2f ms)"
                % (np.mean(prof_res), np.std(prof_res))
            )

        return out.numpy()

    def get_vm_output(mod, data, params, target, dev, dtype="float32", number=2, repeat=20):
        with tvm.transform.PassContext(opt_level=3):
            exe = vm.compile(mod, target, params=params)
            rly_vm = vm_rt.VirtualMachine(exe, dev)
            result = rly_vm.run(data)

        if measure:
            print("Evaluate vm inference cost of {} on {}".format(model, repr(dev)))
            ftimer = rly_vm.module.time_evaluator("invoke", dev, number=number, repeat=repeat)
            # Measure in millisecond.
            prof_res = np.array(ftimer("main", data).results) * 1000
            print(
                "Mean vm inference time (std dev): %.2f ms (%.2f ms)"
                % (np.mean(prof_res), np.std(prof_res))
            )

        return result.numpy().astype(dtype)

    # random input
    data = np.random.uniform(size=data_shape).astype(dtype)

    for target, dev in testing.enabled_targets():
        tvm_out = get_graph_executor_output(
            mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype
        )
        vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params, target, dev,     dtype)
        tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5)

def run(model_path, image_shape = (1, 3, 224, 224), out_shape=(1, 1000)):
    from tvm.contrib import cc as _cc, utils as _utils, tar as _tar
    tar_temp = _utils.tempdir(custom_path=model_path.replace(".tar", ""))
    _tar.untar(model_path, tar_temp.temp_dir)
    files = [tar_temp.relpath(x) for x in tar_temp.listdir()]
    model_lib, json_file, params_file = [os.path.basename(x) for x in files]
    print(model_lib, json_file, params_file)
    loaded_json = open(tar_temp.relpath(json_file)).read()
    loaded_mod = tvm.runtime.load_module(tar_temp.relpath(model_lib))
    print(type(loaded_mod))
    loaded_params = bytearray(open(tar_temp.relpath(params_file), "rb").read())
    benchmark_execution(loaded_mod, loaded_params,model="optimized")

if __name__ == "__main__":
    run("model.tar")

Я получаю следующее при запуске:

      mod.so mod.json mod.params <class 'tvm.runtime.module.Module'> [10:59:08] /workspace/tvm/src/target/target_kind.cc:164: Warning: Unable to detect CUDA version, default to "-arch=sm_20" instead 
Traceback (most recent call last): 
File "tvm_benchmark.py", line 87, in <module> 
    run("model.tar") 
File "tvm_benchmark.py", line 84, in run 
    benchmark_execution(loaded_mod, loaded_params,model="optimized") 
File "tvm_benchmark.py", line 68, in benchmark_execution 
    mod, tvm.nd.array(data.astype(dtype)), params, target, dev, dtype 
File "tvm_benchmark.py", line 25, in get_graph_executor_output 
    lib = relay.build(mod, target, params=params) 
File "/opt/conda/envs/env_orig/lib/python3.7/site-packages/tvm/relay/build_module.py", line 339, in build 
    raise ValueError("Type of input parameter mod must be tvm.IRModule") ValueError: Type of input parameter mod must be tvm.IRModule

У меня: версия CUDA - 11.5 версия TVM - 0.10.0

Я попытался tvm.runtime.Module.save сохранить файл, чтобы позже я мог получить тип tvm.IRModel с помощью tvm.ir.load_json, но это не сработало.

0 ответов

Другие вопросы по тегам