diff --git a/RWKV-v4/src/utils.py b/RWKV-v4/src/utils.py index 78bf604..db13dd0 100644 --- a/RWKV-v4/src/utils.py +++ b/RWKV-v4/src/utils.py @@ -85,13 +85,14 @@ class TOKENIZER(): self.UNKNOWN_CHAR = self.stoi[UNKNOWN_CHAR] def refine_context(self, context): - context = context.strip().split('\n') - for c in range(len(context)): - context[c] = context[c].strip().strip('\u3000').strip('\r') - context = list(filter(lambda c: c != '', context)) - context = '\n' + ('\n'.join(context)).strip() - if context == '': - context = '\n' + if self.charMode: + context = context.strip().split('\n') + for c in range(len(context)): + context[c] = context[c].strip().strip('\u3000').strip('\r') + context = list(filter(lambda c: c != '', context)) + context = '\n' + ('\n'.join(context)).strip() + if context == '': + context = '\n' return context