{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np \n", "import pandas as pd \n", "import tensorflow as tf\n", "import keras\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Data info \n", "rootDir = '..your_path'\n", "\n", "Dataset = 'NCLT'\n", "TrainOrTest = '/Test/'\n", "SequenceDate = '2013-04-05'\n", "\n", "SCImiddlePath = '/4. SCI_jet0to15/'\n", "\n", "GridCellSize = '10'\n", "\n", "DataPath = rootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n", "print(DataPath)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "DataPath = ICRArootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n", "print(DataPath)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train): \n", "\n", " # load training label encoding information \n", " from sklearn.preprocessing import LabelEncoder\n", " ClassesTrainingSequenceHave = lbl_enc_train.classes_\n", " nClassesTrainingSequenceHave = len(ClassesTrainingSequenceHave)\n", "\n", " # info\n", " WholeData = os.listdir(DataPath)\n", " nWholeData = len(WholeData)\n", " print(str(nWholeData) + ' data exist in ' + SequenceDate)\n", " \n", " # read \n", " X_seen = []\n", " y_seen = []\n", " X_unseen = []\n", " y_unseen = []\n", " \n", " for ii in range(nWholeData):\n", " dataName = WholeData[ii]\n", " dataPath = DataPath + dataName\n", " \n", " dataTrajNodeOrder = int(dataName[0:5])\n", "\n", " SCI = plt.imread(dataPath)\n", " dataPlaceIndex = int(dataName[6:11])\n", " \n", " # if label is in the train, then save into the seen (seen is only subset to be tested)\n", " if dataPlaceIndex in ClassesTrainingSequenceHave:\n", " X_seen.append(SCI)\n", " y_seen.append(dataPlaceIndex)\n", " else:\n", " X_unseen.append(SCI)\n", " y_unseen.append(dataPlaceIndex)\n", " \n", " # progress message \n", " if ii%1000==0:\n", " print(str(format((ii/nWholeData)*100, '.1f')), '% loaded.')\n", " \n", " dataShape = SCI.shape\n", " \n", " # X\n", " nSeenData = len(X_seen)\n", " X_nd = np.zeros(shape=(nSeenData, dataShape[0], dataShape[1], dataShape[2]))\n", " for jj in range(nSeenData):\n", " X_nd[jj, :, :] = X_seen[jj]\n", " X_nd = X_nd.astype('float32')\n", " \n", " # y (one-hot encoded) \n", " y_seen = lbl_enc_train.transform(y_seen)\n", " y_nd = keras.utils.np_utils.to_categorical(y_seen, num_classes=nClassesTrainingSequenceHave)\n", "\n", " # log message \n", " print('Data size: %s' % nWholeData)\n", " print('- Seen data: %s' % len(X_seen))\n", " print('- Uneen data: %s' % len(X_unseen))\n", " print(' ')\n", " print('Data shape:', X_nd.shape)\n", " print('Label shape:', y_nd.shape)\n", " \n", " return X_nd, y_nd, X_unseen, y_unseen\n", " " ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "# load training label encoding information for discriminate seen/unseen of test \n", "import pickle\n", "TrainingDate = '2012-01-15'\n", "TrainingDataPath = 'data_pickle/Train_' + TrainingDate + '_SCI_color.pkl'\n", "\n", "with open(TrainingDataPath, 'rb') as f: # Python 3: open(..., 'rb')\n", " X_train, y_train, lbl_enc_train = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Load test data using training label encoder information \n", "[X_seen, y_seen, X_unseen, y_unseen] = getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/gskim/anaconda3/envs/tfkeras/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", " from ._conv import register_converters as _register_converters\n", "Using TensorFlow backend.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "input_1 (InputLayer) (None, 40, 120, 3) 0 \n", "_________________________________________________________________\n", "conv2d_1 (Conv2D) (None, 40, 120, 64) 4864 \n", "_________________________________________________________________\n", "max_pooling2d_1 (MaxPooling2 (None, 20, 60, 64) 0 \n", "_________________________________________________________________\n", "batch_normalization_1 (Batch (None, 20, 60, 64) 256 \n", "_________________________________________________________________\n", "conv2d_2 (Conv2D) (None, 20, 60, 128) 204928 \n", "_________________________________________________________________\n", "max_pooling2d_2 (MaxPooling2 (None, 10, 30, 128) 0 \n", "_________________________________________________________________\n", "batch_normalization_2 (Batch (None, 10, 30, 128) 512 \n", "_________________________________________________________________\n", "conv2d_3 (Conv2D) (None, 10, 30, 256) 819456 \n", "_________________________________________________________________\n", "max_pooling2d_3 (MaxPooling2 (None, 5, 15, 256) 0 \n", "_________________________________________________________________\n", "flatten_1 (Flatten) (None, 19200) 0 \n", "_________________________________________________________________\n", "dropout_1 (Dropout) (None, 19200) 0 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 64) 1228864 \n", "_________________________________________________________________\n", "dropout_2 (Dropout) (None, 64) 0 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 579) 37635 \n", "=================================================================\n", "Total params: 2,296,515\n", "Trainable params: 2,296,131\n", "Non-trainable params: 384\n", "_________________________________________________________________\n" ] } ], "source": [ "from keras.models import load_model\n", "modelName = 'pre_trained_model/base0.h5'\n", "testModel = load_model(modelName)\n", "\n", "testModel.summary()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5170/5170 [==============================] - 1s 182us/step\n", "2012-02-04\n", "- Test score: 1.008079155962518\n", "- Test accuracy: 82.76595741913904\n", " \n" ] } ], "source": [ "# Load Trained net \n", "from keras.models import load_model\n", "modelName = 'model/base0.h5'\n", "testModel = load_model(modelName)\n", "\n", "# Predict \n", "scores_TEST = testModel.evaluate(X_seen, y_seen, verbose=1, batch_size=1000)\n", "print(SequenceDate)\n", "print('- Test score:', scores_TEST[0])\n", "print('- Test accuracy:', scores_TEST[1]*100)\n", "print(' ')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# save prediction (for later top N analysis )\n", "y_seen_predicted = testModel.predict(X_seen)\n", "\n", "# save \n", "filename_y_seen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_seen_predicted'\n", "np.save(filename_y_seen_predicted, y_seen_predicted)\n", "\n", "# save prediction (for later top N analysis )\n", "X_unseen = np.array(X_unseen)\n", "y_unseen_predicted = testModel.predict(X_unseen)\n", "\n", "# save \n", "filename_y_unseen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_predicted'\n", "np.save(filename_y_unseen_predicted, y_unseen_predicted)\n" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "scrolled": true }, "outputs": [], "source": [ "### save GT also \n", "\n", "# seen \n", "filename_y_seen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_seen_GT'\n", "np.save(filename_y_seen_GT, y_seen)\n", "\n", "# unseen\n", "filename_y_unseen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_GT'\n", "np.save(filename_y_unseen_GT, y_unseen)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }