Достался мне небольшой гавнопроект по нейронкам. Что-бы не терять времени бычтренько запилю тут основные моменты.
входные данные имеют примерно такой вид и структуру
ID, DateTime, Data1, Data2
Данные снимаются каждые пять минут, и в результате выходит файлы, в котором куча разных идшников с графиками двух параметров переменной длинны.
Ессно в таком виде данные на вход нейронки мы падать не можем. поэтому используем интерполяцию. я использую 10 точек для интерполяции и передачи времени от начала эксперемента средних значений для точки интерполяции параметра1 и минимальных, максимальных средних значений для параметра два для каждой точки интерполяции преобразуя рав дату в дата ассет вида. Вообще можно использовать не только минимальные, максимальные но и всякого рода дисперсии и отклонения в зависимости от задачи
TargetCalssNUM, Dapa1_point1 .. Data1_point_10, Data2_point1 .. Data2_Point10, Data2min_point1 .. Data2min_point10, Data2max_point1 .. Data2max_point10, Time1 .. Time10
кусок гавнокода решающий задачу создания датасета из рав данных
import csv import re import datetime; import sys; import time; from random import randint def writeToFile(filename, fieldnames, records,interpolation_points): with open(filename, 'w', newline='') as csvfile: #fieldnames=['first_name', 'last_name'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for record in records: row={'label': record[0]}; for i in range(0,interpolation_points): row.update({'p1_'+str(i): record[i+1]}) for i in range(0, interpolation_points): row.update({'p2_'+str(i): record[i+1+interpolation_points]}) for i in range(0, interpolation_points): row.update({'p2l_'+str(i): record[i+1+interpolation_points * 2]}) for i in range(0, interpolation_points): row.update({'p2h_'+str(i): record[i+1+interpolation_points * 3]}) for i in range(0, interpolation_points): row.update({'t1_'+str(i): record[i+1+interpolation_points * 4]}) writer.writerow(row) def writeTestDataFile(filename, fieldnames, records,interpolation_points): with open(filename, 'w', newline='') as csvfile: #fieldnames=['first_name', 'last_name'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for record in records: row={'label': record[0]}; for i in range(0,interpolation_points): row.update({'p1_'+str(i): record[i+1]}) for i in range(0, interpolation_points): row.update({'p2_'+str(i): record[i+1+interpolation_points]}) for i in range(0, interpolation_points): row.update({'p2l_'+str(i): record[i+1+interpolation_points * 2]}) for i in range(0, interpolation_points): row.update({'p2h_'+str(i): record[i+1+interpolation_points * 3]}) for i in range(0, interpolation_points): row.update({'t1_'+str(i): record[i+1+interpolation_points * 4]}) writer.writerow(row) def num(s): try: return float(s) except ValueError: return float(0); def zer(s): if(s==''): return '00'; if(len(s)<2): return '0'+str(s)+'' else: return s; def calculate_times(param1, param2, times, intervals, sucess): endtime=times[len(times)-1] begintime=times[0] deltatime = endtime-begintime span = deltatime / intervals timeNum=0; p1list=[] p2list=[] p2l_list=[] p2h_list=[] t1list=[] # print("endtime: " + str(endtime)) # print("begintime: " + str(begintime)) # print("timedelta: " + str(deltatime)) # print("divisordelta: " + str(deltatime/10)) # print("start+delta: " + str((begintime + deltatime)/10 )) subcounter=0; currtime = begintime; s1=0 s2=0 s2min=10005000 s2max=-10005000 for i in range(0, len(times)): if times[i] < currtime + span: s1+=param1[i] s2+=param2[i] if s2max < param2[i]: s2max = param2[i] if s2min > param2[i]: s2min = param2[i]; subcounter=subcounter+1; else: p1list.append(s1/i) p2list.append(s2/i) p2l_list.append(s2min); p2h_list.append(s2max); s2min=10005000 s2max=-10005000 t1list.append((currtime + span / 2 - begintime ).total_seconds() / 60.0) subcounter=0; currtime = currtime + span; # print("p1list: " + str(p1list)) # print("p2list: " + str(p2list)) # print("t1list: " + str(t1list)) # print("p2l_list: " + str(p2l_list)) # print("p2h_list: "+ str(p2h_list)) # print("delta: " + str(deltatime)) outlist=[] if(sucess): outlist.append(1) else: outlist.append(0) outlist.extend(p1list) outlist.extend(p2list) outlist.extend(p2l_list) outlist.extend(p2h_list) outlist.extend(t1list) # print(outlist) return outlist def readFromFile(rise, data1, data2, times, interpolation_points, sucess): final_items=[] with open(rise , newline='')as f: reader=csv.reader(f, delimiter=';', quoting=csv.QUOTE_NONE) seed_id=-1; new_seed=False; #for header line=0; for row in reader: if len(row) <3: continue; if seed_id == -1: seed_id=row[0] elif seed_id != row[0]: seed_id=row[0] new_seed = True else: new_seed = False if new_seed: #print("data1 " + str(data1)) #print("data2 " + str(data2)) #current seed ends if len(times)>0: el=calculate_times(data1, data2, times, interpolation_points, sucess) final_items.append(el) times=[] data1=[] data2=[] try: date = re.search('(\d{4})\-(\d{2})\-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})', row[1]) # print("date " + date.group(1)+ " " +date.group(2)+" "+ date.group(3)+" "+date.group(4)+":"+date.group(5)+":" + "00"); maildatetime = datetime.datetime.strptime(date.group(3)+ " " +date.group(2)+" "+ date.group(1)+" "+date.group(4)+":"+date.group(5)+":" +date.group(6),"%d %m %Y %H:%M:%S") times.append(maildatetime) # print(maildatetime) except Exception as e: print("something wrong with data " + str(e)) #new seed begins print("seed " + seed_id) if row[2].strip()=='': data1.append(0); else: data1.append(num(row[2].replace(',','.'))) if row[3].strip()=='': data2.append(0); else: data2.append(num(row[3].replace(',','.'))) else: #seed body try: date = re.search('(\d{4})\-(\d{2})\-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})', row[1]) # print("date " + date.group(1)+ " " +date.group(2)+" "+ date.group(3)+" "+date.group(4)+":"+date.group(5)+":" + "00"); maildatetime = datetime.datetime.strptime(date.group(3)+ " " +date.group(2)+" "+ date.group(1)+" "+date.group(4)+":"+date.group(5)+":" +date.group(6),"%d %m %Y %H:%M:%S") times.append(maildatetime) # print(maildatetime) except Exception as e: print("something wrong with data " + str(e)) print("seed " + seed_id) if row[2].strip()=='': data1.append(0); else: data1.append(num(row[2].replace(',','.'))) if row[3].strip()=='': data2.append(0); else: data2.append(num(row[3].replace(',','.'))) line=line+1 # print(count) return final_items; sucess_data1=[] sucess_data2=[] sucess_times=[] failed_data1=[] failed_data2=[] failed_times=[] interpolation_points=10; test_amount=0.1 record_list=[] records = readFromFile('sucess.csv', sucess_data1, sucess_data2, sucess_times, interpolation_points, True) records2 = readFromFile('failed.csv', failed_data1, failed_data2, failed_times, interpolation_points,False) records.extend(records2) #from random import shuffle #shuffle(records) labels=[] labels.append('label') for i in range(0,interpolation_points): labels.append('p1_'+str(i)); for i in range(0,interpolation_points): labels.append('p2_'+str(i)); for i in range(0,interpolation_points): labels.append('p2l_'+str(i)); for i in range(0,interpolation_points): labels.append('p2h_'+str(i)); for i in range(0,interpolation_points): labels.append('t1_'+str(i)) test_records=[] #for i in range(0, int(test_amount * len(records))): # int = randint(0, len(records)) # z= records[i] # test_records.append(z) # records.remove(z) writeToFile('train.csv', labels, records,interpolation_points)
Имея достаточное количество записей, конвертированных в датасет, с первым полем в качестве метки класса, можем приступать к обучению сети
from keras.utils import np_utils from keras.layers.core import Dense, Activation, Dropout import pandas as pd import numpy as np from keras.datasets import boston_housing from keras.models import Sequential from keras.utils.np_utils import to_categorical import sys import os pbatch_size = int(sys.argv[1]) pepochs = int(sys.argv[2]) np.random.seed(42) train = pd.read_csv('train.csv') labels = train.ix[:,0].values.astype('int32') x_train = (train.ix[:,1:].values).astype('float32') y_train = np_utils.to_categorical(labels) nb_classes = y_train.shape[1] mean = x_train.mean(axis=0) std = x_train.std(axis=0) x_train -= mean x_train /= std model = Sequential() model.add(Dense(512, activation='relu', input_shape=(x_train.shape[1],))) model.add(Dense(128)) model.add(Dense(128)) model.add(Dense(nb_classes, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer="SGD", metrics=["accuracy"]) print(model.summary()) import os.path model.fit(x_train,y_train, batch_size=pbatch_size, epochs=pepochs, verbose=1) model.save_weights('seeds.hdf5') pred = model.predict(x_train) print (str(pred)) suc=0; filed=0; count=0; i=0; for row in pred: if row[0]> row[1]: v=0 else : v=1 # label = int(str(test_labels[i])) #test label = int(str(labels[i])) #train if v== label: stat = "success" suc = suc+1; else : stat = "filed" filed = filed + 1; count=count+1; print("lerned data resulg") print(stat +" "+str(row) + ">>"+ str(v) + "==="+str(label)) i=i+1 print("=====================================================================") print("sucess rate " + str(suc/count)) import os os.system("pause")
Дальнейшие действия сводятся к созданию датасетов из тестовых данных, аналогично обучаемым. и передачу их в сеть с загрузкой файла весов из .hdf5 файла