Hi !
I’m trying to deploy a ULMFit model in production (on aws). The language is french, thus the tokenizer is french also:
tokenizer = TokenizerForProduction(lang='fr', n_cpus=5) data_lm = (TextList.from_df(df=df, processor=[TokenizeProcessor(tokenizer=tokenizer), NumericalizeProcessor(max_vocab=600000)]) .split_by_rand_pct(valid_pct=0.2) .label_for_lm() .databunch(bs=32)) data_lm.show_batch()
I’ve notice that the performances are really bad when calling learner.predict:
@lru_cache(maxsize=2048) def predict(sentence): _, _, predictions = learner.predict(sentence.lower()) result = sorted(list(zip(predictions.tolist(), learner.data.classes)), key=lambda tup: tup[0], reverse=True) return result
And I’ve track it down to the code in https://github.com/fastai/fastai/blob/master/fastai/text/transform.py#L87 on line 112:
def _process_all_1(self, texts:Collection[str]) -> List[List[str]]: "Process a list of `texts` in one process." tok = self.tok_func(self.lang) if self.special_cases: tok.add_special_cases(self.special_cases) return [self.process_text(str(t), tok) for t in texts]
In this function, a Spacy tokenizer is recreated for each request. It’s quite time consuming, especially with french.
Just to try, I wrote my own Tokenizer class where the Spacy tokenizer is created only once and stored in the class instance. I got 10 times more request per second on a simple small aws instance. (The code is at the end of the message).
My question is: Is there any design reason I’m not seeing for re-creating the Spacy tokenizer at each call of learner.predict() ? Am I missing something ? learner.predict is not supposed to be used in production ?
Cheers,
Daniel
class TokenizerForProduction(): "Put together rules and a tokenizer function to tokenize text with multiprocessing." def __init__(self, tok_func:Callable=SpacyTokenizer, lang:str='en', pre_rules:ListRules=None, post_rules:ListRules=None, special_cases:Collection[str]=None, n_cpus:int=None): self.tok_func,self.lang,self.special_cases = tok_func,lang,special_cases self.pre_rules = ifnone(pre_rules, defaults.text_pre_rules ) self.post_rules = ifnone(post_rules, defaults.text_post_rules) self.special_cases = special_cases if special_cases is not None else defaults.text_spec_tok self.n_cpus = ifnone(n_cpus, defaults.cpus) self.tok = self.tok_func(self.lang) def __repr__(self) -> str: res = f'Tokenizer {self.tok_func.__name__} in {self.lang} with the following rules:\n' for rule in self.pre_rules: res += f' - {rule.__name__}\n' for rule in self.post_rules: res += f' - {rule.__name__}\n' return res def process_text(self, t:str, tok:BaseTokenizer) -> List[str]: "Process one text `t` with tokenizer `tok`." for rule in self.pre_rules: t = rule(t) toks = self.tok.tokenizer(t) for rule in self.post_rules: toks = rule(toks) return toks def _process_all_1(self, texts:Collection[str]) -> List[List[str]]: "Process a list of `texts` in one process." if self.special_cases: self.tok.add_special_cases(self.special_cases) return [self.process_text(str(t), self.tok) for t in texts] def process_all(self, texts:Collection[str]) -> List[List[str]]: "Process a list of `texts`." if self.n_cpus <= 1: return self._process_all_1(texts) with ProcessPoolExecutor(self.n_cpus) as e: return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])