Creating a ModelData object without torchtext splits?

mmr · December 6, 2017, 7:25am

Just checking, if anybody has managed to modify this part of code from nlp-arxis notebook to take dataframe of sentences with labels.

# class ArxivDataset(torchtext.data.Dataset):
#     def __init__(self, path, text_field, label_field, **kwargs):
#         fields = [('text', text_field), ('label', label_field)]
#         examples = []
#         for label in ['yes', 'no']:
#             for fname in iglob(os.path.join(path, label, '*.txt')):
#                 with open(fname, 'r') as f: text = f.readline()
#                 examples.append(data.Example.fromlist([text, label], fields))
#         super().__init__(examples, fields, **kwargs)

#     @staticmethod
#     def sort_key(ex): return len(ex.text)
    
#     @classmethod
#     def splits(cls, text_field, label_field, root='.data',
#                train='train', test='test', **kwargs):
#         return super().splits(
#             root, text_field=text_field, label_field=label_field,
#             train=train, validation=None, test=test, **kwargs)