#!/usr/bin/env python
from keras.models import load_model
from keras.layers.core import Reshape, Flatten
from keras.callbacks import ModelCheckpoint
#from data_helpers import load_data
from keras.optimizers import Adam
from keras.models import Model
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report
from keras.layers.merge import Concatenate
from sklearn.model_selection import train_test_split
import numpy as np
import re
import sys
def clean_str(string):
"""
Tokenization/string cleaning for datasets.
Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
"""
string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
string = re.sub(r"\'s", " \'s", string)
string = re.sub(r"\'ve", " \'ve", string)
string = re.sub(r"n\'t", " n\'t", string)
string = re.sub(r"\'re", " \'re", string)
string = re.sub(r"\'d", " \'d", string)
string = re.sub(r"\'ll", " \'ll", string)
string = re.sub(r",", " , ", string)
string = re.sub(r"!", " ! ", string)
string = re.sub(r"\(", " \( ", string)
string = re.sub(r"\)", " \) ", string)
string = re.sub(r"\?", " \? ", string)
string = re.sub(r"\s{2,}", " ", string)
return string.strip().lower()
def pad_sentences(sentences, padding_word="<PAD/>"):
"""
Pads all sentences to the same length. The length is defined by the longest sentence.
Returns padded sentences.
"""
sequence_length = max(len(x) for x in sentences)
padded_sentences = []
#print len(sentences)
for i in range(len(sentences)):
sentence = sentences[i]
num_padding = 85 - len(sentence)
new_sentence = sentence + [padding_word] * num_padding
padded_sentences.append(new_sentence)
return padded_sentences
#print ('Loading data')
#x, y, vocabulary, vocabulary_inv = load_data()
#x="@MoriTaheripour shut up nigger whore! Hope u get raped by one of those animals. Might change your tune."
x = sys.argv[1]
#print x
x_text = [clean_str(x)]
#print (x_text)
x_text = [s.split(" ") for s in x_text]
#print x_text
sentences_padded = pad_sentences(x_text)
#print (sentences_padded)
vocabulary = np.load('data123-vocab-servertest2.npy').item()
#for word in sentences_padded:
# for word2 in word:
# print vocabulary[word2]
x2 = np.array([[vocabulary[word2] for word2 in word]for word in sentences_padded] )
#print x2
#X_train, X_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=42)
#print type(vocabulary)
#np.save('data1-vocab.npy', vocabulary)
#sequence_length = x.shape[1]
#vocabulary_size = len(vocabulary_inv)
#embedding_dim = 256
#filter_sizes = [3,4,5]
#num_filters = 512
#drop = 0.5
#nb_epoch = 10
#batch_size = 30
a
model = load_model('cnn2D-data123-multi-servertest2.hdf5')
#print(X_test)
y_pred = model.predict(x2)
cc=['Hate','Offensive','Neutral']
#f = open("outputfile.csv","w+")
for xx in y_pred:
count=0
for yy in xx:
print (cc[count] +" " + str(format(yy*100,'.2f')) + "%")
#f.write(cc[count] +", " + str(format(yy*100,'.2f')) + "%")
count = count + 1
#f.close()
#y_pred = y_pred.round()
#y_pred = y_pred.astype('int')
#print(y_pred)
#print(y_test)
#y_pred1 = model.predict(X_test).argmax(axis=1)
#y_test1 = y_test.argmax(axis =1)
#print(y_pred1)
#print(y_test1)
#score = model.evaluate(X_test, y_test,verbose=1)
#report = classification_report( y_test1, y_pred1 )
#print(report)
#cm = confusion_matrix(y_test1, y_pred1)
#print(cm)
#precision_score(y_test, y_pred)
#print(score)