{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np \n",
    "import pandas as pd \n",
    "import tensorflow as tf\n",
    "import keras\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data info \n",
    "rootDir = '..your_path'\n",
    "\n",
    "Dataset = 'NCLT'\n",
    "TrainOrTest = '/Test/'\n",
    "SequenceDate = '2013-04-05'\n",
    "\n",
    "SCImiddlePath = '/4. SCI_jet0to15/'\n",
    "\n",
    "GridCellSize = '10'\n",
    "\n",
    "DataPath = rootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n",
    "print(DataPath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "DataPath = ICRArootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n",
    "print(DataPath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train):    \n",
    "\n",
    "    # load training label encoding information \n",
    "    from sklearn.preprocessing import LabelEncoder\n",
    "    ClassesTrainingSequenceHave = lbl_enc_train.classes_\n",
    "    nClassesTrainingSequenceHave = len(ClassesTrainingSequenceHave)\n",
    "\n",
    "    # info\n",
    "    WholeData = os.listdir(DataPath)\n",
    "    nWholeData = len(WholeData)\n",
    "    print(str(nWholeData) + ' data exist in ' + SequenceDate)\n",
    "    \n",
    "    # read \n",
    "    X_seen = []\n",
    "    y_seen = []\n",
    "    X_unseen = []\n",
    "    y_unseen = []\n",
    "    \n",
    "    for ii in range(nWholeData):\n",
    "        dataName = WholeData[ii]\n",
    "        dataPath = DataPath + dataName\n",
    "        \n",
    "        dataTrajNodeOrder = int(dataName[0:5])\n",
    "\n",
    "        SCI = plt.imread(dataPath)\n",
    "        dataPlaceIndex = int(dataName[6:11])\n",
    "        \n",
    "        # if label is in the train, then save into the seen (seen is only subset to be tested)\n",
    "        if dataPlaceIndex in ClassesTrainingSequenceHave:\n",
    "            X_seen.append(SCI)\n",
    "            y_seen.append(dataPlaceIndex)\n",
    "        else:\n",
    "            X_unseen.append(SCI)\n",
    "            y_unseen.append(dataPlaceIndex)\n",
    "        \n",
    "        # progress message \n",
    "        if ii%1000==0:\n",
    "            print(str(format((ii/nWholeData)*100, '.1f')), '% loaded.')\n",
    "        \n",
    "    dataShape = SCI.shape\n",
    "    \n",
    "    # X\n",
    "    nSeenData = len(X_seen)\n",
    "    X_nd = np.zeros(shape=(nSeenData, dataShape[0], dataShape[1], dataShape[2]))\n",
    "    for jj in range(nSeenData):\n",
    "        X_nd[jj, :, :] = X_seen[jj]\n",
    "    X_nd = X_nd.astype('float32')\n",
    "    \n",
    "    # y (one-hot encoded)   \n",
    "    y_seen = lbl_enc_train.transform(y_seen)\n",
    "    y_nd = keras.utils.np_utils.to_categorical(y_seen, num_classes=nClassesTrainingSequenceHave)\n",
    "\n",
    "    # log message \n",
    "    print('Data size: %s' % nWholeData)\n",
    "    print('- Seen data: %s' % len(X_seen))\n",
    "    print('- Uneen data: %s' % len(X_unseen))\n",
    "    print(' ')\n",
    "    print('Data shape:', X_nd.shape)\n",
    "    print('Label shape:', y_nd.shape)\n",
    "    \n",
    "    return X_nd, y_nd, X_unseen, y_unseen\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load training label encoding information for discriminate seen/unseen of test \n",
    "import pickle\n",
    "TrainingDate = '2012-01-15'\n",
    "TrainingDataPath = 'data_pickle/Train_' + TrainingDate + '_SCI_color.pkl'\n",
    "\n",
    "with open(TrainingDataPath, 'rb') as f:  # Python 3: open(..., 'rb')\n",
    "    X_train, y_train, lbl_enc_train = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load test data using training label encoder information  \n",
    "[X_seen, y_seen, X_unseen, y_unseen] = getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/gskim/anaconda3/envs/tfkeras/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n",
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "input_1 (InputLayer)         (None, 40, 120, 3)        0         \n",
      "_________________________________________________________________\n",
      "conv2d_1 (Conv2D)            (None, 40, 120, 64)       4864      \n",
      "_________________________________________________________________\n",
      "max_pooling2d_1 (MaxPooling2 (None, 20, 60, 64)        0         \n",
      "_________________________________________________________________\n",
      "batch_normalization_1 (Batch (None, 20, 60, 64)        256       \n",
      "_________________________________________________________________\n",
      "conv2d_2 (Conv2D)            (None, 20, 60, 128)       204928    \n",
      "_________________________________________________________________\n",
      "max_pooling2d_2 (MaxPooling2 (None, 10, 30, 128)       0         \n",
      "_________________________________________________________________\n",
      "batch_normalization_2 (Batch (None, 10, 30, 128)       512       \n",
      "_________________________________________________________________\n",
      "conv2d_3 (Conv2D)            (None, 10, 30, 256)       819456    \n",
      "_________________________________________________________________\n",
      "max_pooling2d_3 (MaxPooling2 (None, 5, 15, 256)        0         \n",
      "_________________________________________________________________\n",
      "flatten_1 (Flatten)          (None, 19200)             0         \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 19200)             0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 64)                1228864   \n",
      "_________________________________________________________________\n",
      "dropout_2 (Dropout)          (None, 64)                0         \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 579)               37635     \n",
      "=================================================================\n",
      "Total params: 2,296,515\n",
      "Trainable params: 2,296,131\n",
      "Non-trainable params: 384\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "from keras.models import load_model\n",
    "modelName = 'pre_trained_model/base0.h5'\n",
    "testModel = load_model(modelName)\n",
    "\n",
    "testModel.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5170/5170 [==============================] - 1s 182us/step\n",
      "2012-02-04\n",
      "- Test score: 1.008079155962518\n",
      "- Test accuracy: 82.76595741913904\n",
      " \n"
     ]
    }
   ],
   "source": [
    "# Load Trained net \n",
    "from keras.models import load_model\n",
    "modelName = 'model/base0.h5'\n",
    "testModel = load_model(modelName)\n",
    "\n",
    "# Predict \n",
    "scores_TEST = testModel.evaluate(X_seen, y_seen, verbose=1, batch_size=1000)\n",
    "print(SequenceDate)\n",
    "print('- Test score:', scores_TEST[0])\n",
    "print('- Test accuracy:', scores_TEST[1]*100)\n",
    "print(' ')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save prediction (for later top N analysis )\n",
    "y_seen_predicted = testModel.predict(X_seen)\n",
    "\n",
    "# save \n",
    "filename_y_seen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_seen_predicted'\n",
    "np.save(filename_y_seen_predicted, y_seen_predicted)\n",
    "\n",
    "# save prediction (for later top N analysis )\n",
    "X_unseen = np.array(X_unseen)\n",
    "y_unseen_predicted = testModel.predict(X_unseen)\n",
    "\n",
    "# save \n",
    "filename_y_unseen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_predicted'\n",
    "np.save(filename_y_unseen_predicted, y_unseen_predicted)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "### save GT also \n",
    "\n",
    "# seen \n",
    "filename_y_seen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_seen_GT'\n",
    "np.save(filename_y_seen_GT, y_seen)\n",
    "\n",
    "# unseen\n",
    "filename_y_unseen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_GT'\n",
    "np.save(filename_y_unseen_GT, y_unseen)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}