Page Menu
Home
Phabricator (Chris)
Search
Configure Global Search
Log In
Files
F118074
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Authored By
Unknown
Size
15 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/.gitignore b/.gitignore
index 94e5acb..0e65655 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,105 +1,106 @@
# Custom
logs/
*.csv
.DS_Store
datasets/
*.tar.gz
models/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
+eval/dataset
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
# We dont want images
*.jpg
*.png
*.jpeg
*.zip
*.gz
*.xz
*.tar
diff --git a/classify_nsfw.py b/classify_nsfw.py
index 0acbe9b..d609775 100644
--- a/classify_nsfw.py
+++ b/classify_nsfw.py
@@ -1,68 +1,69 @@
#!/usr/bin/env python
import sys
import argparse
import tensorflow as tf
from model import OpenNsfwModel, InputType
from image_utils import create_tensorflow_image_loader
from image_utils import create_yahoo_image_loader
import numpy as np
IMAGE_LOADER_TENSORFLOW = "tensorflow"
IMAGE_LOADER_YAHOO = "yahoo"
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help="Path to the input image.\
Only jpeg images are supported.")
+
parser.add_argument("-m", "--model_weights", required=True,
help="Path to trained model weights file")
parser.add_argument("-l", "--image_loader",
default=IMAGE_LOADER_YAHOO,
help="image loading mechanism",
choices=[IMAGE_LOADER_YAHOO, IMAGE_LOADER_TENSORFLOW])
parser.add_argument("-t", "--input_type",
default=InputType.TENSOR.name.lower(),
help="input type",
choices=[InputType.TENSOR.name.lower(),
InputType.BASE64_JPEG.name.lower()])
args = parser.parse_args()
model = OpenNsfwModel()
with tf.Session() as sess:
input_type = InputType[args.input_type.upper()]
model.build(weights_path=args.model_weights, input_type=input_type)
fn_load_image = None
if input_type == InputType.TENSOR:
if args.image_loader == IMAGE_LOADER_TENSORFLOW:
- fn_load_image = create_tensorflow_image_loader(sess)
+ fn_load_image = create_tensorflow_image_loader(tf.Session(graph=tf.Graph()))
else:
fn_load_image = create_yahoo_image_loader()
elif input_type == InputType.BASE64_JPEG:
import base64
fn_load_image = lambda filename: np.array([base64.urlsafe_b64encode(open(filename, "rb").read())])
sess.run(tf.global_variables_initializer())
image = fn_load_image(args.input_file)
predictions = \
sess.run(model.predictions,
feed_dict={model.input: image})
print("Results for '{}'".format(args.input_file))
print("\tSFW score:\t{}\n\tNSFW score:\t{}".format(*predictions[0]))
if __name__ == "__main__":
main(sys.argv)
diff --git a/eval/batch_classify.py b/eval/batch_classify.py
new file mode 100644
index 0000000..b768dda
--- /dev/null
+++ b/eval/batch_classify.py
@@ -0,0 +1,112 @@
+
+import os
+import sys
+
+sys.path.append((os.path.normpath(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)),
+ '..'))))
+
+import argparse
+import glob
+import tensorflow as tf
+from tqdm import tqdm
+
+from model import OpenNsfwModel, InputType
+from image_utils import create_tensorflow_image_loader
+from image_utils import create_yahoo_image_loader
+
+
+IMAGE_LOADER_TENSORFLOW = "tensorflow"
+IMAGE_LOADER_YAHOO = "yahoo"
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+tf.logging.set_verbosity(tf.logging.ERROR)
+
+
+def create_batch_iterator(filenames, batch_size, fn_load_image):
+ for i in range(0, len(filenames), batch_size):
+ yield list(map(fn_load_image, filenames[i:i+batch_size]))
+
+
+def create_tf_batch_iterator(filenames, batch_size):
+ for i in range(0, len(filenames), batch_size):
+ with tf.Session(graph=tf.Graph()) as session:
+ fn_load_image = create_tensorflow_image_loader(session,
+ expand_dims=False)
+
+ yield list(map(fn_load_image, filenames[i:i+batch_size]))
+
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("-s", "--source", required=True,
+ help="Folder containing the images to classify")
+
+ parser.add_argument("-o", "--output_file", required=True,
+ help="Output file path")
+
+ parser.add_argument("-m", "--model_weights", required=True,
+ help="Path to trained model weights file")
+
+ parser.add_argument("-b", "--batch_size", help="Number of images to \
+ classify simultaneously.", type=int, default=64)
+
+ parser.add_argument("-l", "--image_loader",
+ default=IMAGE_LOADER_YAHOO,
+ help="image loading mechanism",
+ choices=[IMAGE_LOADER_YAHOO, IMAGE_LOADER_TENSORFLOW])
+
+ args = parser.parse_args()
+ batch_size = args.batch_size
+ output_file = args.output_file
+
+ input_type = InputType.TENSOR
+ model = OpenNsfwModel()
+
+ filenames = glob.glob(args.source + "/*.jpg")
+ num_files = len(filenames)
+
+ num_batches = int(num_files / batch_size)
+
+ print("Found", num_files, " files")
+ print("Split into", num_batches, " batches")
+
+ config = tf.ConfigProto()
+ config.gpu_options.allow_growth = True
+
+ batch_iterator = None
+
+ if args.image_loader == IMAGE_LOADER_TENSORFLOW:
+ batch_iterator = create_tf_batch_iterator(filenames, batch_size)
+ else:
+ fn_load_image = create_yahoo_image_loader(expand_dims=False)
+ batch_iterator = create_batch_iterator(filenames, batch_size,
+ fn_load_image)
+
+ with tf.Session(graph=tf.Graph(), config=config) as session:
+ model.build(weights_path=args.model_weights,
+ input_type=input_type)
+
+ session.run(tf.global_variables_initializer())
+
+ with tqdm(total=num_files) as progress_bar:
+ with open(output_file, 'w') as o:
+ o.write('File\tSFW Score\tNSFW Score\n')
+
+ for batch_num, images in enumerate(batch_iterator):
+ predictions = \
+ session.run(model.predictions,
+ feed_dict={model.input: images})
+
+ fi = (batch_num * batch_size)
+ for i, prediction in enumerate(predictions):
+ filename = os.path.basename(filenames[fi + i])
+ o.write('{}\t{}\t{}\n'.format(filename,
+ prediction[0],
+ prediction[1]))
+
+ progress_bar.update(len(images))
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/eval/eval.py b/eval/eval.py
new file mode 100644
index 0000000..856d7a6
--- /dev/null
+++ b/eval/eval.py
@@ -0,0 +1,90 @@
+import sys
+import operator
+import argparse
+import numpy as np
+from scipy import stats
+
+
+def load_classifications(filename):
+ is_first = True
+
+ results = {}
+
+ with open(filename, 'r') as f:
+ for line in f:
+ if is_first:
+ is_first = False
+ continue
+
+ parts = line.split('\t')
+
+ filename = parts[0]
+ sfw_score = float(parts[1])
+ nsfw_score = float(parts[2])
+
+ results[filename] = (sfw_score, nsfw_score)
+
+ return results
+
+
+def classification_matrix(classifications):
+ results = np.zeros(shape=(len(classifications), 2))
+
+ for i, classification in enumerate(classifications):
+ results[i] = np.array(classification[1])
+
+ return results
+
+
+def test(first, second):
+ delta = np.abs(first - second)
+
+ result = {
+ 'min': np.amin(delta),
+ 'max': np.amax(delta),
+ 'median': np.median(delta),
+ 'mean': np.mean(delta),
+ 'std': np.std(delta),
+ 'var': np.var(delta),
+ 't-prob': stats.ttest_ind(first, second, equal_var=True)[1]
+ }
+
+ return result
+
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument("original",
+ help="File containing base classifications")
+
+ parser.add_argument("other",
+ help="File containing classifications to compare to\
+ base results")
+
+ args = parser.parse_args()
+ filename_original = args.original
+ filename_other = args.other
+
+ original = load_classifications(filename_original)
+ other = load_classifications(filename_other)
+
+ len(original) == len(other)
+
+ original = sorted(original.items(), key=operator.itemgetter(0))
+ other = sorted(other.items(), key=operator.itemgetter(0))
+
+ print("Found", len(original), "entries")
+
+ original_classifications = classification_matrix(original)
+ other_classifications = classification_matrix(other)
+
+ print('SFW:')
+ print(test(original_classifications[:, 0], other_classifications[:, 0]))
+
+ print()
+ print('NSFW:')
+ print(test(original_classifications[:, 1], other_classifications[:, 1]))
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/image_utils.py b/image_utils.py
index 96150af..b4b77db 100644
--- a/image_utils.py
+++ b/image_utils.py
@@ -1,128 +1,143 @@
VGG_MEAN = [104, 117, 123]
-def create_yahoo_image_loader():
+def create_yahoo_image_loader(expand_dims=True):
"""Yahoo open_nsfw image loading mechanism
Approximation of the image loading mechanism defined in
https://github.com/yahoo/open_nsfw/blob/79f77bcd45076b000df71742a59d726aa4a36ad1/classify_nsfw.py#L40
"""
import numpy as np
import skimage
import skimage.io
from PIL import Image
from io import BytesIO
def load_image(image_path):
pimg = open(image_path, 'rb').read()
img_data = pimg
im = Image.open(BytesIO(img_data))
if im.mode != "RGB":
im = im.convert('RGB')
imr = im.resize((256, 256), resample=Image.BILINEAR)
fh_im = BytesIO()
imr.save(fh_im, format='JPEG')
fh_im.seek(0)
image = (skimage.img_as_float(skimage.io.imread(fh_im, as_grey=False))
.astype(np.float32))
H, W, _ = image.shape
h, w = (224, 224)
h_off = max((H - h) // 2, 0)
w_off = max((W - w) // 2, 0)
image = image[h_off:h_off + h, w_off:w_off + w, :]
# RGB to BGR
image = image[:, :, :: -1]
image = image.astype(np.float32, copy=False)
image = image * 255.0
image -= np.array(VGG_MEAN, dtype=np.float32)
- image = np.expand_dims(image, axis=0)
+ if expand_dims:
+ image = np.expand_dims(image, axis=0)
+
return image
return load_image
-def create_tensorflow_image_loader(session):
+def create_tensorflow_image_loader(session, expand_dims=True,
+ options=None,
+ run_metadata=None):
"""Tensorflow image loader
- Results seem to deviate a bit from yahoo image loader due to different
- jpeg encoders/decoders and different image resize implementations between
- PIL, skimage and tensorflow
+ Results seem to deviate quite a bit from yahoo image loader due to
+ different jpeg encoders/decoders and different image resize
+ implementations between PIL, skimage and tensorflow
Only supports jpeg images.
+
+ Relevant tensorflow issues:
+ * https://github.com/tensorflow/tensorflow/issues/6720
+ * https://github.com/tensorflow/tensorflow/issues/12753
"""
import tensorflow as tf
def load_image(image_path):
image = tf.read_file(image_path)
image = __tf_jpeg_process(image)
- image_batch = tf.expand_dims(image, axis=0)
+ if expand_dims:
+ image_batch = tf.expand_dims(image, axis=0)
+ return session.run(image_batch,
+ options=options,
+ run_metadata=run_metadata)
- return session.run(image_batch)
+ return session.run(image,
+ options=options,
+ run_metadata=run_metadata)
return load_image
def load_base64_tensor(_input):
import tensorflow as tf
def decode_and_process(base64):
_bytes = tf.decode_base64(base64)
_image = __tf_jpeg_process(_bytes)
return _image
# we have to do some preprocessing with map_fn, since functions like
# decode_*, resize_images and crop_to_bounding_box do not support
# processing of batches
image = tf.map_fn(decode_and_process, _input,
back_prop=False, dtype=tf.float32)
return image
def __tf_jpeg_process(data):
import tensorflow as tf
# The whole jpeg encode/decode dance is neccessary to generate a result
# that matches the original model's (caffe) preprocessing
+ # (as good as possible)
image = tf.image.decode_jpeg(data, channels=3,
fancy_upscaling=True,
dct_method="INTEGER_FAST")
image = tf.image.convert_image_dtype(image, tf.float32, saturate=True)
image = tf.image.resize_images(image, (256, 256),
method=tf.image.ResizeMethod.BILINEAR,
align_corners=True)
image = tf.image.convert_image_dtype(image, tf.uint8, saturate=True)
image = tf.image.encode_jpeg(image, format='', quality=75,
progressive=False, optimize_size=False,
chroma_downsampling=True,
density_unit=None,
x_density=None, y_density=None,
xmp_metadata=None)
image = tf.image.decode_jpeg(image, channels=3,
fancy_upscaling=False,
dct_method="INTEGER_ACCURATE")
image = tf.cast(image, dtype=tf.float32)
image = tf.image.crop_to_bounding_box(image, 16, 16, 224, 224)
image = tf.reverse(image, axis=[2])
image -= VGG_MEAN
return image
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, May 15, 8:04 AM (12 h, 17 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
63874
Default Alt Text
(15 KB)
Attached To
Mode
R62 open_nsfw
Attached
Detach File
Event Timeline