Mtl.evaluate报出 AssertionError: No samples loaded

运行报错
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/common/dataset.py”, line 129, in init
assert data, ‘No samples loaded’
AssertionError: No samples loaded

完整代码

import os
from hanlp.common.dataset import SortingSamplerBuilder
from hanlp.common.transform import NormalizeCharacter
from hanlp.components.mtl.multi_task_learning import MultiTaskLearning
from hanlp.components.mtl.tasks.ner.tag_ner import TaggingNamedEntityRecognition
from hanlp.components.mtl.tasks.pos import TransformerTagging
from hanlp.components.mtl.tasks.tok.tag_tok import TaggingTokenization
from hanlp.layers.embeddings.contextual_word_embedding import ContextualWordEmbedding
from hanlp.layers.transformers.relative_transformer import RelativeTransformerEncoder
from hanlp.utils.lang.zh.char_table import HANLP_CHAR_TABLE_JSON
from hanlp.utils.log_util import cprint


root = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))


def cdroot():
    os.chdir(root)


F_LEARN_RATE = 1e-3
N_EPOCH = 1
N_BATCH_SIZE = 16
N_MAX_SEQ_LEN = 510

CTB8_CWS_TRAIN = r"/home/_tmp/data/ctb8_cn/tasks/cws/train.txt"
CTB8_CWS_DEV = r"/home/_tmp/data/ctb8_cn/tasks/cws/dev.txt"
CTB8_CWS_TEST = r"/home/_tmp/data/ctb8_cn/tasks/cws/test.txt"

CTB8_POS_TRAIN = r"/home/_tmp/data/ctb8_cn/tasks/pos/train.txt"
CTB8_POS_DEV = r"/home/_tmp/data/ctb8_cn/tasks/pos/dev.txt"
CTB8_POS_TEST = r"/home/_tmp/data/ctb8_cn/tasks/pos/test.txt"

S_NER_DATA_DIR = r"/home/_tmp/data/msra_ner_token_level_cn/"
MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TRAIN = os.path.join(S_NER_DATA_DIR, "word_level.train.short.tsv")
MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_DEV = os.path.join(S_NER_DATA_DIR, "word_level.dev.short.tsv")
MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TEST = os.path.join(S_NER_DATA_DIR, "word_level.test.short.tsv")

S_SAV_MODEL_DIR = '/home/_tmp/data/model/mtl/open_tok_pos_ner_srl_dep_sdp_con_electra_small'
S_PERTRAINED_DIR = r"/home/data_sync/pretrain_model/hfl__chinese-electra-180g-small-discriminator"

print(CTB8_CWS_TRAIN)
print(CTB8_CWS_DEV)
print(CTB8_CWS_TEST)
print()
print(CTB8_POS_TRAIN)
print(CTB8_POS_DEV)
print(CTB8_POS_TEST)
print()
print(MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TRAIN)
print(MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_DEV)
print(MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TEST)

tasks = {
    'tok': TaggingTokenization(  # 分词
        CTB8_CWS_TRAIN,
        CTB8_CWS_DEV,
        CTB8_CWS_TEST,
        SortingSamplerBuilder(batch_size=N_BATCH_SIZE),
        max_seq_len=N_MAX_SEQ_LEN,
        hard_constraint=True,
        char_level=True,
        tagging_scheme='BMES',
        lr=F_LEARN_RATE,
        transform=NormalizeCharacter(HANLP_CHAR_TABLE_JSON, 'token'),
    ),
    'ner': TaggingNamedEntityRecognition(  # 实体  mat multi mat erro
        MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TRAIN,
        MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_DEV,
        MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TEST,
        SortingSamplerBuilder(batch_size=N_BATCH_SIZE),
        lr=F_LEARN_RATE,
        # secondary_encoder=RelativeTransformerEncoder(768, k_as_x=True),  # base
        secondary_encoder=RelativeTransformerEncoder(256, k_as_x=True),   # small
        dependencies='tok',
    )
}

mtl = MultiTaskLearning()

mtl.load(S_SAV_MODEL_DIR)
print(mtl('华纳音乐旗下的新垣结衣在12月21日于日本武道馆举办歌手出道活动'))
for k, v in tasks.items():
    v.trn = tasks[k].trn
    v.dev = tasks[k].dev
    v.tst = tasks[k].tst
metric, *_ = mtl.evaluate(S_SAV_MODEL_DIR)
for k, v in tasks.items():
    print(metric[k], end=' ')
print()

报错信息
/home/_tmp/data/ctb8_cn/tasks/cws/train.txt
/home/_tmp/data/ctb8_cn/tasks/cws/dev.txt
/home/_tmp/data/ctb8_cn/tasks/cws/test.txt

/home/_tmp/data/ctb8_cn/tasks/pos/train.txt
/home/_tmp/data/ctb8_cn/tasks/pos/dev.txt
/home/_tmp/data/ctb8_cn/tasks/pos/test.txt

/home/_tmp/data/msra_ner_token_level_cn/word_level.train.tsv
/home/_tmp/data/msra_ner_token_level_cn/word_level.dev.tsv
/home/tmp/data/msra_ner_token_level_cn/word_level.test.tsv
{
“tok”: [
“华纳”,
“音乐”,
“旗下”,
“的”,
“新垣结衣”,
“在”,
“12月”,
“21日”,
“于”,
“日本”,
“武道馆”,
“举办”,
“歌手”,
“出道”,
“活动”
],
“ner”: [
[“华纳音乐”, “ORGANIZATION”, 0, 2],
[“新垣结衣”, “PERSON”, 4, 5],
[“12月”, “DATE”, 6, 7],
[“21日”, “DATE”, 7, 8],
[“日本”, “LOCATION”, 9, 10],
[“武道馆”, “LOCATION”, 10, 11]
]
}
1 / 2 Building tst dataset for ner …
Traceback (most recent call last):
File “hanlp_train_cn.py”, line 134, in
metric, *
= mtl.evaluate(S_SAV_MODEL_DIR)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/components/mtl/multi_task_learning.py”, line 753, in evaluate
rets = super().evaluate(‘tst’, save_dir, logger, batch_size, output, **kwargs)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/common/torch_component.py”, line 469, in evaluate
device=self.devices[0], logger=logger, overwrite=True))
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/components/mtl/multi_task_learning.py”, line 156, in build_dataloader
cache=isinstance(data, str), **config)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/components/mtl/tasks/ner/tag_ner.py”, line 123, in build_dataloader
dataset = self.build_dataset(data, cache=cache, transform=transform, **args)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/components/ner/transformer_ner.py”, line 216, in build_dataset
dataset = super().build_dataset(data, transform, **kwargs)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/components/taggers/transformers/transformer_tagger.py”, line 170, in build_dataset
return TSVTaggingDataset(data, transform=transform, **kwargs)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/datasets/ner/tsv.py”, line 45, in init
super().init(data, transform, cache, generate_idx)
File “/home/xy/miniconda3/envs/py364_xy/lib/python3.6/site-packages/hanlp/common/dataset.py”, line 129, in init
assert data, ‘No samples loaded’
AssertionError: No samples loaded

其他说明

其他说明

/home/_tmp/data/ctb8_cn/tasks/cws/train.txt
/home/_tmp/data/ctb8_cn/tasks/pos/train.txt
/home/_tmp/data/msra_ner_token_level_cn/word_level.train.tsv
这几个数据集文件是从
from hanlp.datasets.ner.msra import MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TRAIN
from hanlp.datasets.parsing.ctb8 import CTB8_POS_TRAIN
中MSRA_NER_TOKEN_LEVEL_SHORT_IOBES_TRAIN和CTB8_POS_TRAIN对应的URL
https://wakespace.lib.wfu.edu/bitstream/handle/10339/39379/LDC2013T21.tgz#data/tasks/cws/train.txt
http://file.hankcs.com/corpus/msra_ner_token_level.zip#word_level.train.short.tsv
里下载并解压后得到的数据,是hanlp_demo里的数据
现在使用github的hanlp_demo里的数据运行出现上述问题