обработка выборочных данных, нахождение интервальных оценок параметров распределения. обработка_выборочных_данных. Обработка выборочных данных. Нахождение интервальных оценок параметров распределения. Проверка статистической гипотезы
Скачать 41.76 Kb.
|
xk = self.ranged_seq[0] xk1 = xk + self.h for i in range(Something.k): ni = 0 for j in self.ranged_seq: if i == 0: if xk <= j <= xk1: ni += 1 else: if xk < j <= xk1: ni += 1 self.freq[i] = ni xk = xk1 xk1 += self.h def calc_rel_freq(self): for i in range(Something.k): self.rel_freq[i] = self.freq[i] / Something.v def calc_cum_freq(self): for i in range(Something.k): if i == 0: self.cum_freq[i] = self.freq[i] else: self.cum_freq[i] = self.cum_freq[i-1] + self.freq[i] def calc_cum_rel_freq(self): for i in range(Something.k): if i == 0: self.cum_rel_freq[i] = self.rel_freq[i] else: self.cum_rel_freq[i] = self.cum_rel_freq[i - 1] + self.rel_freq[i] def calc_conf_interval_of_exp_v(self, conf_accuracy): eps = (sts.t.ppf((1 + conf_accuracy) / 2, Something.v - 1)) * self.fixed_sd / math.sqrt(Something.v) conf_interval = '(' + str(self.x_avr_mean - eps) + '-' + str(self.x_avr_mean + eps) + ')' self.conf_interval_accuracy.update({conf_interval: conf_accuracy}) def calc_conf_interval_of_sd(self, conf_accuracy, q): if q < 1: conf_interval = '(' + str(self.fixed_sd*(1 - q)) + ';' + str(self.fixed_sd*(1 + q)) + ')' elif q > 1: conf_interval = '(' + str(0) + '-' + str(self.fixed_sd * (1 + q)) + ')' self.conf_interval_accuracy_sd.update({conf_interval: conf_accuracy}) def dict_to_csv(self): pd.DataFrame({'Confident interval': self.conf_interval_accuracy.keys(), 'Confident accuracy': self.conf_interval_accuracy.values()}).to_csv('resources/conf_interval_exp_value_' + self.name + '.csv', index=False, encoding='utf-8') def dict_sd_to_csv(self): pd.DataFrame({'Confident interval': self.conf_interval_accuracy_sd.keys(), 'Confident accuracy': self.conf_interval_accuracy_sd.values()}).to_csv('resources/conf_interval_sd_' + self.name + '.csv', index=False, encoding='utf-8') def ranged_to_csv(self): pd.DataFrame({self.name: self.ranged_seq}).to_csv('resources/rs_' + self.name + '.csv', index=False, encoding='utf-8') def lab2_p1(self): pd.DataFrame({self.name: self.var_srs, 'middle of intervals': self.xi_plus_half_of_h, 'Accumulated frequency': self.cum_freq, 'Frequency': self.freq}).to_csv('resources/l2_p1_' + self.name + '.csv', index=False, encoding='utf-8') def lab2_p2(self): pd.DataFrame({'Conditional variants': self.ui, 'Frequency': self.freq}).to_csv('resources/l2_ui_' + self.name + '.csv', index=False, encoding='utf-8') def lab2_p3(self): pd.DataFrame({'Conditional empirical moments': self.vi, 'Central empirical moments': self.mu_i}).to_csv('resources/l2_mu_v_' + self.name + '.csv', index=False, encoding='utf-8') def ivs_to_csv(self): pd.DataFrame({self.name: self.var_srs, 'Частота': self.freq, 'Частость': self.rel_freq}).to_csv('resources/ivs_' + self.name + '.csv', index=False, encoding='utf-8') def draw_polygon(self, param): fig, ax = plt.subplots() ax.set_xlabel(self.name) if param == self.freq: ax.set_ylabel('Absolute frequency') ax.set_title('Frequency polygon') ax.plot(self.xi_plus_half_of_h, self.freq, 'o-') fig.savefig('resources/freq_poly_' + self.name + '.png') elif param == self.rel_freq: ax.set_ylabel('Relative frequency') ax.set_title('Relative Frequency polygon') ax.plot(self.xi_plus_half_of_h, self.rel_freq, 'o-') fig.savefig('resources/rel_freq_poly_' + self.name + '.png') def draw_hist(self, param): fig, ax = plt.subplots() ax.set_xlabel(self.name) if param == self.freq: ax.set_ylabel('Absolute frequency') ax.set_title('Absolute Frequency hist') ax.hist(self.ranged_seq, bins=Something.k, weights=np.zeros_like(self.ranged_seq) + 1. / self.h, edgecolor='black') fig.savefig('resources/freq_hist_' + self.name + '.png') elif param == self.rel_freq: ax.set_ylabel('Relative frequency') ax.set_title('Relative Frequency hist') ax.hist(self.ranged_seq, bins=Something.k, weights=np.zeros_like(self.ranged_seq) + 1. / (len(self.ranged_seq) * self.h), edgecolor='black') fig.savefig('resources/rel_freq_hist_' + self.name + '.png') def draw_cumulates(self, param): xi = [min(self.xi) - self.h / 2] + self.xi + [max(self.xi) + self.h] fig, ax = plt.subplots() ax.set_xlabel(self.name) ax.set_xlim([min(xi) + self.h / 5, max(xi) - self.h / 5]) if param == self.freq: yi = [0.0] * 2 + self.cum_freq + [max(self.cum_freq)] ax.set_ylabel('Absolute accumulated frequency') ax.set_title('Cumulates of acc_abs_freq') ax.plot(xi, yi, 'o-') fig.savefig('resources/cumulates_freq_' + self.name + '.png') elif param == self.rel_freq: yi = [0.0] * 2 + self.cum_rel_freq + [max(self.cum_rel_freq)] ax.set_ylabel('Relative accumulated frequency') ax.set_title('Cumulates of acc_rel_freq') ax.plot(xi, yi, 'o-') fig.savefig('resources/cumulates_rel_freq_' + self.name + '.png') def draw_emp_func(self): xi = [min(self.xi) - self.h / 2] + self.xi + [max(self.xi) + self.h] fig, ax = plt.subplots() ax.set_xlabel(self.name) ax.set_xlim([min(xi) + self.h / 5, max(xi) - self.h / 5]) yi = [0.0] * 2 + self.cum_rel_freq + [max(self.cum_rel_freq)] ax.set_ylabel('Relative accumulated frequency') ax.set_title('Empirical function of the interval variation series') ax.plot(xi, yi, 'o-') fig.savefig('resources/empirical_function_' + self.name + '.png') def check_pirson_test_normal_law(self, sign_level): file = open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/Pirson_checkNormal_' + self.name, "w+", encoding='utf-8') file.write("H0 : Случайная величина {} распределена нормально " "с параметрами a = {}, disp = {}\n" "H1 : Случайная величина {} не распределена по нормальному закону".format(self.name, self.x_avr_mean, self.fixed_sd, self.name)) l = Something.k - 3 var_sq = self.calc_new_vs() pi = [0] * Something.k align_mi = [0] * Something.k diff_mi = [0] * Something.k diff_div_mi = [0] * Something.k for i in range(Something.k): if i == 0: pi[i] = sts.norm.cdf((self.xi[i + 1] - self.x_avr_mean) / sp.fixed_sd) elif i == Something.k - 1: pi[i] = 1 - sts.norm.cdf((self.xi[i] - self.x_avr_mean) / sp.fixed_sd) else: pi[i] = sts.norm.cdf((self.xi[i + 1] - self.x_avr_mean) / sp.fixed_sd) - sts.norm.cdf((self.xi[i] - self.x_avr_mean) / sp.fixed_sd) align_mi[i] = pi[i] * Something.v diff_mi[i] = math.pow((self.freq[i] - align_mi[i]), 2) diff_div_mi[i] = diff_mi[i] / align_mi[i] pd.DataFrame({'Intervals': var_sq, 'mi': self.freq, 'pi': pi, 'mi_aligned': align_mi, '((mi - mi\')^2': diff_mi, '((mi - mi\')^2 / mi\'': diff_div_mi}).to_csv('resources/l3_p3_' + self.name + '.csv', index=False, encoding='utf-8') ksi_observed = sum(diff_div_mi) ksi_critical = chi2.ppf(1 - sign_level, l) file.write("\n Результат проверки гипотезы:") if ksi_observed < ksi_critical: file.write("\nksi_observed < ksi_critical => нет оснований отвергнуть H0 ") else: file.write("\nksi_observed > ksi_critical => случайная величина {}" "не распределена по нормальному закону ".format(self.name)) file.close() def make_cool_dict(self): self.cool_dict.update({'expected value': self.x_avr_mean, 'dispersion': self.dispersion, 'sd': self.sd, 'asm cef': self.asm_cef, 'kurtosis cef': self.kurtosis_cef, 'mode': self.mode, 'median': self.median, 'variation cef': self.variation_cef}) class Somebody: def __init__(self): self.mij = [[0] * Something.k for i in range(Something.k)] self.ig_mean = 0 self.ig_mean_exp = 0 self.ig_dispersion_x = 0 self.ig_dispersion_y = 0 self.cme = None self.param = None self.cor_cef = None self.conf_interval_cor_cef = {} self.pxy = None self.pyx = None self.nyx = None self.nxy = None self.x_avr_y = Something.k * [0] self.y_avr_x = Something.k * [0] def lab4_p1(self, dict1, dict2): self.param = [dict1, dict2] with open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/sp_sfm_params.csv', "w+", newline='', encoding='utf-8') as csvfile: header_key = ['', 'sp', 'sfm'] new_val = csv.DictWriter(csvfile, fieldnames=header_key) new_val.writeheader() for new_k in dict1: new_val.writerow({'': new_k, 'sp': dict1[new_k], 'sfm': dict2[new_k]}) def calc_mij(self, data1, data2, xi, yi): for i in range(Something.v): for j in range(Something.k): for l in range(Something.k): if j == 0: if l == 0: if xi[j] <= data1[i] <= xi[j + 1] and yi[l] <= data2[i] <= yi[l + 1]: self.mij[j][l] += 1 else: if xi[j] <= data1[i] <= xi[j + 1] and yi[l] < data2[i] <= yi[l + 1]: self.mij[j][l] += 1 else: if l == 0: if xi[j] < data1[i] <= xi[j + 1] and yi[l] <= data2[i] <= yi[l + 1]: self.mij[j][l] += 1 else: if xi[j] < data1[i] <= xi[j + 1] and yi[l] < data2[i] <= yi[l + 1]: self.mij[j][l] += 1 def calc_intergroup_mean(self, xi, yi): for i in range(Something.k): for j in range(Something.k): self.ig_mean += xi[i] * yi[j] * self.mij[i][j] self.ig_mean /= Something.v def calc_intergroup_mean_exp(self, xi, yi): for i in range(Something.k): for j in range(Something.k): self.ig_mean_exp += xi[i] * math.log(yi[j]) * self.mij[i][j] self.ig_mean_exp /= Something.v def calc_cme(self, x_avr, y_avr): self.cme = self.ig_mean - x_avr * y_avr def calc_condit_mean(self, data_x, data_y): s_y = 0 m_sum_y = 0 m_sum_x = 0 s_x = 0 for i in range(Something.k): for j in range(Something.k): s_y += data_x[j] * self.mij[j][i] |