Page MenuHomePhabricator (Chris)

No OneTemporary

Authored By
Unknown
Size
15 KB
Referenced Files
None
Subscribers
None
diff --git a/.gitignore b/.gitignore
index 94e5acb..0e65655 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,105 +1,106 @@
# Custom
logs/
*.csv
.DS_Store
datasets/
*.tar.gz
models/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
+eval/dataset
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# We dont want images
*.jpg
*.png
*.jpeg
*.zip
*.gz
*.xz
*.tar
diff --git a/classify_nsfw.py b/classify_nsfw.py
index 0acbe9b..d609775 100644
--- a/classify_nsfw.py
+++ b/classify_nsfw.py
@@ -1,68 +1,69 @@
#!/usr/bin/env python
import sys
import argparse
import tensorflow as tf
from model import OpenNsfwModel, InputType
from image_utils import create_tensorflow_image_loader
from image_utils import create_yahoo_image_loader
import numpy as np
IMAGE_LOADER_TENSORFLOW = "tensorflow"
IMAGE_LOADER_YAHOO = "yahoo"
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help="Path to the input image.\
Only jpeg images are supported.")
+
parser.add_argument("-m", "--model_weights", required=True,
help="Path to trained model weights file")
parser.add_argument("-l", "--image_loader",
default=IMAGE_LOADER_YAHOO,
help="image loading mechanism",
choices=[IMAGE_LOADER_YAHOO, IMAGE_LOADER_TENSORFLOW])
parser.add_argument("-t", "--input_type",
default=InputType.TENSOR.name.lower(),
help="input type",
choices=[InputType.TENSOR.name.lower(),
InputType.BASE64_JPEG.name.lower()])
args = parser.parse_args()
model = OpenNsfwModel()
with tf.Session() as sess:
input_type = InputType[args.input_type.upper()]
model.build(weights_path=args.model_weights, input_type=input_type)
fn_load_image = None
if input_type == InputType.TENSOR:
if args.image_loader == IMAGE_LOADER_TENSORFLOW:
- fn_load_image = create_tensorflow_image_loader(sess)
+ fn_load_image = create_tensorflow_image_loader(tf.Session(graph=tf.Graph()))
else:
fn_load_image = create_yahoo_image_loader()
elif input_type == InputType.BASE64_JPEG:
import base64
fn_load_image = lambda filename: np.array([base64.urlsafe_b64encode(open(filename, "rb").read())])
sess.run(tf.global_variables_initializer())
image = fn_load_image(args.input_file)
predictions = \
sess.run(model.predictions,
feed_dict={model.input: image})
print("Results for '{}'".format(args.input_file))
print("\tSFW score:\t{}\n\tNSFW score:\t{}".format(*predictions[0]))
if __name__ == "__main__":
main(sys.argv)
diff --git a/eval/batch_classify.py b/eval/batch_classify.py
new file mode 100644
index 0000000..b768dda
--- /dev/null
+++ b/eval/batch_classify.py
@@ -0,0 +1,112 @@
+
+import os
+import sys
+
+sys.path.append((os.path.normpath(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ '..'))))
+
+import argparse
+import glob
+import tensorflow as tf
+from tqdm import tqdm
+
+from model import OpenNsfwModel, InputType
+from image_utils import create_tensorflow_image_loader
+from image_utils import create_yahoo_image_loader
+
+
+IMAGE_LOADER_TENSORFLOW = "tensorflow"
+IMAGE_LOADER_YAHOO = "yahoo"
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+tf.logging.set_verbosity(tf.logging.ERROR)
+
+
+def create_batch_iterator(filenames, batch_size, fn_load_image):
+ for i in range(0, len(filenames), batch_size):
+ yield list(map(fn_load_image, filenames[i:i+batch_size]))
+
+
+def create_tf_batch_iterator(filenames, batch_size):
+ for i in range(0, len(filenames), batch_size):
+ with tf.Session(graph=tf.Graph()) as session:
+ fn_load_image = create_tensorflow_image_loader(session,
+ expand_dims=False)
+
+ yield list(map(fn_load_image, filenames[i:i+batch_size]))
+
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("-s", "--source", required=True,
+ help="Folder containing the images to classify")
+
+ parser.add_argument("-o", "--output_file", required=True,
+ help="Output file path")
+
+ parser.add_argument("-m", "--model_weights", required=True,
+ help="Path to trained model weights file")
+
+ parser.add_argument("-b", "--batch_size", help="Number of images to \
+ classify simultaneously.", type=int, default=64)
+
+ parser.add_argument("-l", "--image_loader",
+ default=IMAGE_LOADER_YAHOO,
+ help="image loading mechanism",
+ choices=[IMAGE_LOADER_YAHOO, IMAGE_LOADER_TENSORFLOW])
+
+ args = parser.parse_args()
+ batch_size = args.batch_size
+ output_file = args.output_file
+
+ input_type = InputType.TENSOR
+ model = OpenNsfwModel()
+
+ filenames = glob.glob(args.source + "/*.jpg")
+ num_files = len(filenames)
+
+ num_batches = int(num_files / batch_size)
+
+ print("Found", num_files, " files")
+ print("Split into", num_batches, " batches")
+
+ config = tf.ConfigProto()
+ config.gpu_options.allow_growth = True
+
+ batch_iterator = None
+
+ if args.image_loader == IMAGE_LOADER_TENSORFLOW:
+ batch_iterator = create_tf_batch_iterator(filenames, batch_size)
+ else:
+ fn_load_image = create_yahoo_image_loader(expand_dims=False)
+ batch_iterator = create_batch_iterator(filenames, batch_size,
+ fn_load_image)
+
+ with tf.Session(graph=tf.Graph(), config=config) as session:
+ model.build(weights_path=args.model_weights,
+ input_type=input_type)
+
+ session.run(tf.global_variables_initializer())
+
+ with tqdm(total=num_files) as progress_bar:
+ with open(output_file, 'w') as o:
+ o.write('File\tSFW Score\tNSFW Score\n')
+
+ for batch_num, images in enumerate(batch_iterator):
+ predictions = \
+ session.run(model.predictions,
+ feed_dict={model.input: images})
+
+ fi = (batch_num * batch_size)
+ for i, prediction in enumerate(predictions):
+ filename = os.path.basename(filenames[fi + i])
+ o.write('{}\t{}\t{}\n'.format(filename,
+ prediction[0],
+ prediction[1]))
+
+ progress_bar.update(len(images))
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/eval/eval.py b/eval/eval.py
new file mode 100644
index 0000000..856d7a6
--- /dev/null
+++ b/eval/eval.py
@@ -0,0 +1,90 @@
+import sys
+import operator
+import argparse
+import numpy as np
+from scipy import stats
+
+
+def load_classifications(filename):
+ is_first = True
+
+ results = {}
+
+ with open(filename, 'r') as f:
+ for line in f:
+ if is_first:
+ is_first = False
+ continue
+
+ parts = line.split('\t')
+
+ filename = parts[0]
+ sfw_score = float(parts[1])
+ nsfw_score = float(parts[2])
+
+ results[filename] = (sfw_score, nsfw_score)
+
+ return results
+
+
+def classification_matrix(classifications):
+ results = np.zeros(shape=(len(classifications), 2))
+
+ for i, classification in enumerate(classifications):
+ results[i] = np.array(classification[1])
+
+ return results
+
+
+def test(first, second):
+ delta = np.abs(first - second)
+
+ result = {
+ 'min': np.amin(delta),
+ 'max': np.amax(delta),
+ 'median': np.median(delta),
+ 'mean': np.mean(delta),
+ 'std': np.std(delta),
+ 'var': np.var(delta),
+ 't-prob': stats.ttest_ind(first, second, equal_var=True)[1]
+ }
+
+ return result
+
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("original",
+ help="File containing base classifications")
+
+ parser.add_argument("other",
+ help="File containing classifications to compare to\
+ base results")
+
+ args = parser.parse_args()
+ filename_original = args.original
+ filename_other = args.other
+
+ original = load_classifications(filename_original)
+ other = load_classifications(filename_other)
+
+ len(original) == len(other)
+
+ original = sorted(original.items(), key=operator.itemgetter(0))
+ other = sorted(other.items(), key=operator.itemgetter(0))
+
+ print("Found", len(original), "entries")
+
+ original_classifications = classification_matrix(original)
+ other_classifications = classification_matrix(other)
+
+ print('SFW:')
+ print(test(original_classifications[:, 0], other_classifications[:, 0]))
+
+ print()
+ print('NSFW:')
+ print(test(original_classifications[:, 1], other_classifications[:, 1]))
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/image_utils.py b/image_utils.py
index 96150af..b4b77db 100644
--- a/image_utils.py
+++ b/image_utils.py
@@ -1,128 +1,143 @@
VGG_MEAN = [104, 117, 123]
-def create_yahoo_image_loader():
+def create_yahoo_image_loader(expand_dims=True):
"""Yahoo open_nsfw image loading mechanism
Approximation of the image loading mechanism defined in
https://github.com/yahoo/open_nsfw/blob/79f77bcd45076b000df71742a59d726aa4a36ad1/classify_nsfw.py#L40
"""
import numpy as np
import skimage
import skimage.io
from PIL import Image
from io import BytesIO
def load_image(image_path):
pimg = open(image_path, 'rb').read()
img_data = pimg
im = Image.open(BytesIO(img_data))
if im.mode != "RGB":
im = im.convert('RGB')
imr = im.resize((256, 256), resample=Image.BILINEAR)
fh_im = BytesIO()
imr.save(fh_im, format='JPEG')
fh_im.seek(0)
image = (skimage.img_as_float(skimage.io.imread(fh_im, as_grey=False))
.astype(np.float32))
H, W, _ = image.shape
h, w = (224, 224)
h_off = max((H - h) // 2, 0)
w_off = max((W - w) // 2, 0)
image = image[h_off:h_off + h, w_off:w_off + w, :]
# RGB to BGR
image = image[:, :, :: -1]
image = image.astype(np.float32, copy=False)
image = image * 255.0
image -= np.array(VGG_MEAN, dtype=np.float32)
- image = np.expand_dims(image, axis=0)
+ if expand_dims:
+ image = np.expand_dims(image, axis=0)
+
return image
return load_image
-def create_tensorflow_image_loader(session):
+def create_tensorflow_image_loader(session, expand_dims=True,
+ options=None,
+ run_metadata=None):
"""Tensorflow image loader
- Results seem to deviate a bit from yahoo image loader due to different
- jpeg encoders/decoders and different image resize implementations between
- PIL, skimage and tensorflow
+ Results seem to deviate quite a bit from yahoo image loader due to
+ different jpeg encoders/decoders and different image resize
+ implementations between PIL, skimage and tensorflow
Only supports jpeg images.
+
+ Relevant tensorflow issues:
+ * https://github.com/tensorflow/tensorflow/issues/6720
+ * https://github.com/tensorflow/tensorflow/issues/12753
"""
import tensorflow as tf
def load_image(image_path):
image = tf.read_file(image_path)
image = __tf_jpeg_process(image)
- image_batch = tf.expand_dims(image, axis=0)
+ if expand_dims:
+ image_batch = tf.expand_dims(image, axis=0)
+ return session.run(image_batch,
+ options=options,
+ run_metadata=run_metadata)
- return session.run(image_batch)
+ return session.run(image,
+ options=options,
+ run_metadata=run_metadata)
return load_image
def load_base64_tensor(_input):
import tensorflow as tf
def decode_and_process(base64):
_bytes = tf.decode_base64(base64)
_image = __tf_jpeg_process(_bytes)
return _image
# we have to do some preprocessing with map_fn, since functions like
# decode_*, resize_images and crop_to_bounding_box do not support
# processing of batches
image = tf.map_fn(decode_and_process, _input,
back_prop=False, dtype=tf.float32)
return image
def __tf_jpeg_process(data):
import tensorflow as tf
# The whole jpeg encode/decode dance is neccessary to generate a result
# that matches the original model's (caffe) preprocessing
+ # (as good as possible)
image = tf.image.decode_jpeg(data, channels=3,
fancy_upscaling=True,
dct_method="INTEGER_FAST")
image = tf.image.convert_image_dtype(image, tf.float32, saturate=True)
image = tf.image.resize_images(image, (256, 256),
method=tf.image.ResizeMethod.BILINEAR,
align_corners=True)
image = tf.image.convert_image_dtype(image, tf.uint8, saturate=True)
image = tf.image.encode_jpeg(image, format='', quality=75,
progressive=False, optimize_size=False,
chroma_downsampling=True,
density_unit=None,
x_density=None, y_density=None,
xmp_metadata=None)
image = tf.image.decode_jpeg(image, channels=3,
fancy_upscaling=False,
dct_method="INTEGER_ACCURATE")
image = tf.cast(image, dtype=tf.float32)
image = tf.image.crop_to_bounding_box(image, 16, 16, 224, 224)
image = tf.reverse(image, axis=[2])
image -= VGG_MEAN
return image

File Metadata

Mime Type
text/x-diff
Expires
Fri, May 15, 8:04 AM (13 h, 28 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
63874
Default Alt Text
(15 KB)

Event Timeline