304 lines
10 KiB
Plaintext
304 lines
10 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import os\n",
|
||
|
"import numpy as np \n",
|
||
|
"import pandas as pd \n",
|
||
|
"import tensorflow as tf\n",
|
||
|
"import keras\n",
|
||
|
"import matplotlib.pyplot as plt"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Data info \n",
|
||
|
"rootDir = '..your_path'\n",
|
||
|
"\n",
|
||
|
"Dataset = 'NCLT'\n",
|
||
|
"TrainOrTest = '/Test/'\n",
|
||
|
"SequenceDate = '2013-04-05'\n",
|
||
|
"\n",
|
||
|
"SCImiddlePath = '/4. SCI_jet0to15/'\n",
|
||
|
"\n",
|
||
|
"GridCellSize = '10'\n",
|
||
|
"\n",
|
||
|
"DataPath = rootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n",
|
||
|
"print(DataPath)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"DataPath = ICRArootDir + Dataset + TrainOrTest + SequenceDate + SCImiddlePath + GridCellSize + '/'\n",
|
||
|
"print(DataPath)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train): \n",
|
||
|
"\n",
|
||
|
" # load training label encoding information \n",
|
||
|
" from sklearn.preprocessing import LabelEncoder\n",
|
||
|
" ClassesTrainingSequenceHave = lbl_enc_train.classes_\n",
|
||
|
" nClassesTrainingSequenceHave = len(ClassesTrainingSequenceHave)\n",
|
||
|
"\n",
|
||
|
" # info\n",
|
||
|
" WholeData = os.listdir(DataPath)\n",
|
||
|
" nWholeData = len(WholeData)\n",
|
||
|
" print(str(nWholeData) + ' data exist in ' + SequenceDate)\n",
|
||
|
" \n",
|
||
|
" # read \n",
|
||
|
" X_seen = []\n",
|
||
|
" y_seen = []\n",
|
||
|
" X_unseen = []\n",
|
||
|
" y_unseen = []\n",
|
||
|
" \n",
|
||
|
" for ii in range(nWholeData):\n",
|
||
|
" dataName = WholeData[ii]\n",
|
||
|
" dataPath = DataPath + dataName\n",
|
||
|
" \n",
|
||
|
" dataTrajNodeOrder = int(dataName[0:5])\n",
|
||
|
"\n",
|
||
|
" SCI = plt.imread(dataPath)\n",
|
||
|
" dataPlaceIndex = int(dataName[6:11])\n",
|
||
|
" \n",
|
||
|
" # if label is in the train, then save into the seen (seen is only subset to be tested)\n",
|
||
|
" if dataPlaceIndex in ClassesTrainingSequenceHave:\n",
|
||
|
" X_seen.append(SCI)\n",
|
||
|
" y_seen.append(dataPlaceIndex)\n",
|
||
|
" else:\n",
|
||
|
" X_unseen.append(SCI)\n",
|
||
|
" y_unseen.append(dataPlaceIndex)\n",
|
||
|
" \n",
|
||
|
" # progress message \n",
|
||
|
" if ii%1000==0:\n",
|
||
|
" print(str(format((ii/nWholeData)*100, '.1f')), '% loaded.')\n",
|
||
|
" \n",
|
||
|
" dataShape = SCI.shape\n",
|
||
|
" \n",
|
||
|
" # X\n",
|
||
|
" nSeenData = len(X_seen)\n",
|
||
|
" X_nd = np.zeros(shape=(nSeenData, dataShape[0], dataShape[1], dataShape[2]))\n",
|
||
|
" for jj in range(nSeenData):\n",
|
||
|
" X_nd[jj, :, :] = X_seen[jj]\n",
|
||
|
" X_nd = X_nd.astype('float32')\n",
|
||
|
" \n",
|
||
|
" # y (one-hot encoded) \n",
|
||
|
" y_seen = lbl_enc_train.transform(y_seen)\n",
|
||
|
" y_nd = keras.utils.np_utils.to_categorical(y_seen, num_classes=nClassesTrainingSequenceHave)\n",
|
||
|
"\n",
|
||
|
" # log message \n",
|
||
|
" print('Data size: %s' % nWholeData)\n",
|
||
|
" print('- Seen data: %s' % len(X_seen))\n",
|
||
|
" print('- Uneen data: %s' % len(X_unseen))\n",
|
||
|
" print(' ')\n",
|
||
|
" print('Data shape:', X_nd.shape)\n",
|
||
|
" print('Label shape:', y_nd.shape)\n",
|
||
|
" \n",
|
||
|
" return X_nd, y_nd, X_unseen, y_unseen\n",
|
||
|
" "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 63,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# load training label encoding information for discriminate seen/unseen of test \n",
|
||
|
"import pickle\n",
|
||
|
"TrainingDate = '2012-01-15'\n",
|
||
|
"TrainingDataPath = 'data_pickle/Train_' + TrainingDate + '_SCI_color.pkl'\n",
|
||
|
"\n",
|
||
|
"with open(TrainingDataPath, 'rb') as f: # Python 3: open(..., 'rb')\n",
|
||
|
" X_train, y_train, lbl_enc_train = pickle.load(f)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# Load test data using training label encoder information \n",
|
||
|
"[X_seen, y_seen, X_unseen, y_unseen] = getTestDataNCLT(DataPath, SequenceDate, lbl_enc_train)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"/home/gskim/anaconda3/envs/tfkeras/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
|
||
|
" from ._conv import register_converters as _register_converters\n",
|
||
|
"Using TensorFlow backend.\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"_________________________________________________________________\n",
|
||
|
"Layer (type) Output Shape Param # \n",
|
||
|
"=================================================================\n",
|
||
|
"input_1 (InputLayer) (None, 40, 120, 3) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"conv2d_1 (Conv2D) (None, 40, 120, 64) 4864 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"max_pooling2d_1 (MaxPooling2 (None, 20, 60, 64) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"batch_normalization_1 (Batch (None, 20, 60, 64) 256 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"conv2d_2 (Conv2D) (None, 20, 60, 128) 204928 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"max_pooling2d_2 (MaxPooling2 (None, 10, 30, 128) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"batch_normalization_2 (Batch (None, 10, 30, 128) 512 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"conv2d_3 (Conv2D) (None, 10, 30, 256) 819456 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"max_pooling2d_3 (MaxPooling2 (None, 5, 15, 256) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"flatten_1 (Flatten) (None, 19200) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"dropout_1 (Dropout) (None, 19200) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"dense_1 (Dense) (None, 64) 1228864 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"dropout_2 (Dropout) (None, 64) 0 \n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"dense_2 (Dense) (None, 579) 37635 \n",
|
||
|
"=================================================================\n",
|
||
|
"Total params: 2,296,515\n",
|
||
|
"Trainable params: 2,296,131\n",
|
||
|
"Non-trainable params: 384\n",
|
||
|
"_________________________________________________________________\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from keras.models import load_model\n",
|
||
|
"modelName = 'pre_trained_model/base0.h5'\n",
|
||
|
"testModel = load_model(modelName)\n",
|
||
|
"\n",
|
||
|
"testModel.summary()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {
|
||
|
"scrolled": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"5170/5170 [==============================] - 1s 182us/step\n",
|
||
|
"2012-02-04\n",
|
||
|
"- Test score: 1.008079155962518\n",
|
||
|
"- Test accuracy: 82.76595741913904\n",
|
||
|
" \n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Load Trained net \n",
|
||
|
"from keras.models import load_model\n",
|
||
|
"modelName = 'model/base0.h5'\n",
|
||
|
"testModel = load_model(modelName)\n",
|
||
|
"\n",
|
||
|
"# Predict \n",
|
||
|
"scores_TEST = testModel.evaluate(X_seen, y_seen, verbose=1, batch_size=1000)\n",
|
||
|
"print(SequenceDate)\n",
|
||
|
"print('- Test score:', scores_TEST[0])\n",
|
||
|
"print('- Test accuracy:', scores_TEST[1]*100)\n",
|
||
|
"print(' ')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 21,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# save prediction (for later top N analysis )\n",
|
||
|
"y_seen_predicted = testModel.predict(X_seen)\n",
|
||
|
"\n",
|
||
|
"# save \n",
|
||
|
"filename_y_seen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_seen_predicted'\n",
|
||
|
"np.save(filename_y_seen_predicted, y_seen_predicted)\n",
|
||
|
"\n",
|
||
|
"# save prediction (for later top N analysis )\n",
|
||
|
"X_unseen = np.array(X_unseen)\n",
|
||
|
"y_unseen_predicted = testModel.predict(X_unseen)\n",
|
||
|
"\n",
|
||
|
"# save \n",
|
||
|
"filename_y_unseen_predicted = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_predicted'\n",
|
||
|
"np.save(filename_y_unseen_predicted, y_unseen_predicted)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 72,
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"### save GT also \n",
|
||
|
"\n",
|
||
|
"# seen \n",
|
||
|
"filename_y_seen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_seen_GT'\n",
|
||
|
"np.save(filename_y_seen_GT, y_seen)\n",
|
||
|
"\n",
|
||
|
"# unseen\n",
|
||
|
"filename_y_unseen_GT = 'results_predictionvectors/base0/' + SequenceDate + '_unseen_GT'\n",
|
||
|
"np.save(filename_y_unseen_GT, y_unseen)\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.5.2"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|