Fix race condition in predict method.

Fix a race condition here:

  with self.learn.no_bar(), self.learn.no_logging():
    dl = self.learn.dls.test_dl(files, bs=bs)
    batch, _ = self.learn.get_preds(dl=dl)

The calls to `no_bar()` and maybe `no_logging()` were unsafe in a
multithreaded environment because they modified the shared `self.learn`
object. This led to random exceptions and deadlocks when gunicorn was
under load in production.
This commit is contained in:
evazion
2022-06-30 00:13:35 -05:00
parent 31d4f7a22f
commit b975102633
+11 -8
View File
@@ -1,5 +1,7 @@
from fastbook import *
from pandas import DataFrame, read_csv
from fastai.imports import noop
from fastai.callback.progress import ProgressCallback
import timm
import sys
@@ -24,6 +26,8 @@ class Autotagger:
learn = vision_learner(dls, "resnet152", pretrained=False)
model_file = open(model_path, "rb")
learn.load(model_file, with_opt=False)
learn.remove_cb(ProgressCallback)
learn.logger = noop
return learn
@@ -31,12 +35,11 @@ class Autotagger:
if not files:
return
with self.learn.no_bar(), self.learn.no_logging():
dl = self.learn.dls.test_dl(files, bs=bs)
batch, _ = self.learn.get_preds(dl=dl)
dl = self.learn.dls.test_dl(files, bs=bs)
batch, _ = self.learn.get_preds(dl=dl)
for scores in batch:
df = DataFrame({ "tag": self.learn.dls.vocab, "score": scores })
df = df[df.score >= threshold].sort_values("score", ascending=False).head(limit)
tags = dict(zip(df.tag, df.score))
yield tags
for scores in batch:
df = DataFrame({ "tag": self.learn.dls.vocab, "score": scores })
df = df[df.score >= threshold].sort_values("score", ascending=False).head(limit)
tags = dict(zip(df.tag, df.score))
yield tags