обработка выборочных данных, нахождение интервальных оценок параметров распределения. обработка_выборочных_данных. Обработка выборочных данных. Нахождение интервальных оценок параметров распределения. Проверка статистической гипотезы
Скачать 41.76 Kb.
|
s_x += data_y[j] * self.mij[i][j] m_sum_y += self.mij[j][i] m_sum_x += self.mij[i][j] self.x_avr_y[i] = s_y / m_sum_y self.y_avr_x[i] = s_x / m_sum_x s_y = 0 s_x = 0 m_sum_x = 0 m_sum_y = 0 def calc_intergroup_dispersion(self, x_avr, y_avr, freq_x, freq_y): for i in range(Something.k): self.ig_dispersion_x += math.pow((self.x_avr_y[i] - x_avr), 2) * freq_y[i] self.ig_dispersion_y += math.pow((self.y_avr_x[i] - y_avr), 2) * freq_x[i] self.ig_dispersion_x /= Something.v self.ig_dispersion_y /= Something.v def calc_cor_cef(self, sd_x, sd_y): self.cor_cef = self.cme / (sd_x * sd_y) def calc_trust_int(self, s): z = 0.5 * math.log((1 + self.cor_cef) / (1 - self.cor_cef)) se = 1 / math.sqrt(Something.v) zl = math.tanh(z - sts.t.ppf((1 + s) / 2, 1000000) * se) zu = math.tanh(z + sts.t.ppf((1 + s) / 2, 1000000) * se) interval = '(' + str(zl) + '-' + str(zu) + ')' self.conf_interval_cor_cef.update({interval: s}) def calc_p(self, disp_x, disp_y): self.pxy = self.cme / disp_y self.pyx = self.cme / disp_x def calc_n(self, sd_x, sd_y): self.nxy = math.sqrt(self.ig_dispersion_x) / sd_x self.nyx = math.sqrt(self.ig_dispersion_y ) / sd_y def dict_to_csv(self): pd.DataFrame({'Confident interval': self.conf_interval_cor_cef.keys(), 'Confident accuracy': self.conf_interval_cor_cef.values()}).to_csv('resources/conf_interval_cor_cef_' + '.csv', index=False, encoding='utf-8') def lab4_p2(self, ivs1, ivs2): d = ['sp\\sfm'] + ivs2 d2 = [0] * Something.k for i in range(Something.k): d2[i] = [ivs1[i]] + self.mij[i] with open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/2dim_ivs.csv', "w+", newline='', encoding='utf-8') as csvfile: f = csv.writer(csvfile) f.writerow(d) for i in range(Something.k): f.writerow(d2[i]) csvfile.close() def lab4_p3(self, xi_plus_half, yi_plus_half, x_freq, y_freq): d = [''] + yi_plus_half + ['mi_sp'] d2 = [0] * (Something.k + 1) for i in range(Something.k): d2[i] = [xi_plus_half[i]] + self.mij[i] + [x_freq[i]] d2[Something.k] = ['mi_sfm'] + y_freq + ['sum(mi_sp) = sum(mi_sfm) = ' + str(Something.v)] with open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/correlation_table.csv', "w+", newline='', encoding='utf-8') as csvfile: f = csv.writer(csvfile) f.writerow(d) for i in range(Something.k + 1): f.writerow(d2[i]) csvfile.close() def check_hypothesis(self, s): t_observed = self.cor_cef * math.sqrt(Something.v - 2) / math.sqrt(1 - math.pow(self.cor_cef, 2)) t_crit = sts.t.ppf((1 + s) / 2, df=Something.v - 2) print(t_observed) print(t_crit) file = open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/hypothesis_check', "w+", encoding='utf-8') file.write("H0 : r_g = 0 - линейная связь отсутствует\n" "H1 : r_g != 0 - линейная связь присутствует\n") if abs(t_observed) <= t_crit: file.write("|t_observed| <= t_crit => H0 принимаем") else: file.write("|t_observed| > t_crit => H0 отвергаем") file.close() def draw_plot(self, x_name, y_name, data_x, data_y): fig, ax = plt.subplots() ax.set_xlabel(x_name) ax.set_ylabel(y_name) ax.set_title('Двумерная выборка') ax.plot(data_x, data_y, 'o') fig.savefig('resources/bivariate_sampling.png') def draw_plot2(self, x_name, y_name, data_x, data_y, x_avr, y_avr): y_x = self.pyx * (data_x - x_avr) + y_avr x_y = self.pxy * (data_y - y_avr) + x_avr d2 = 0 s2 = 0 e_y = [0] * Something.v for i in range(Something.v): e_y[i] = data_y[i] - y_x[i] d2 += (y_x[i] - y_avr) ** 2 s2 += (data_y[i] - y_avr) ** 2 s_ey = math.sqrt((1 / (Something.v - 2)) * sum(np.array(e_y) ** 2)) a_y = 1 / Something.v * sum(np.abs(np.array(e_y) / np.array(data_y))) d2 /= Something.v s2 /= Something.v print('\nЛинейная регрессия '.format(d2)) print('R^2_y - {}'.format(d2 / s2)) print('S_ey - {}'.format(s_ey)) print('A_y - {}'.format(a_y)) fig, ax = plt.subplots() ax.set_xlabel(x_name) ax.set_ylabel(y_name) ax.set_title('Lin Regression') ax.plot(data_x, data_y, 'o') line1, = ax.plot(data_x, y_x, '-g', label='y_x') line2, = ax.plot(x_y, data_y, '-0', label='x_y') ax.legend(handles=[line1, line2]) fig.savefig('resources/lin_regression.png') def lab5_p4(self, data_x, freq_x, data_y, freq_y, d_x, d_y, ranged_x, ranged_y, x_avr, y_avr): cef_matrix = [[0] * 3 for i in range(3)] addit_matrix = [0] * 3 power = 4 addit_power = 2 for i in range(3): addit_matrix[i] = sum(((np.array(data_x) ** addit_power) * np.array(freq_x)) * (np.array(self.y_avr_x))) for j in range(3): cef_matrix[i][j] = sum((np.array(data_x) ** power) * np.array(freq_x)) power -= 1 power += 2 addit_power -= 1 addit_matrix[2] = sum(np.array(self.y_avr_x) * np.array(freq_x)) cef_matrix = np.array(cef_matrix) addit_matrix = np.array(addit_matrix) answer = np.linalg.solve(cef_matrix, addit_matrix) y_x = list(answer[0] * (np.array(ranged_x) ** 2) + answer[1] * np.array(ranged_x) + answer[2]) d2 = 0 s2 = 0 e_y = [0] * Something.v for i in range(Something.v): e_y[i] = ranged_y[i] - y_x[i] d2 += (y_x[i] - y_avr) ** 2 s2 += (ranged_y[i] - y_avr) ** 2 s_ey = math.sqrt((1 / (Something.v - 2)) * sum(np.array(e_y) ** 2)) a_y = 1 / Something.v * sum(np.abs(np.array(e_y) / np.array(ranged_y))) d2 /= Something.v s2 /= Something.v print('\nПараболическая регрессия') print('R^2_y - {}'.format(d2 / s2)) print('S_ey - {}'.format(s_ey)) print('A_y - {}'.format(a_y)) fig, ax = plt.subplots() ax.set_xlabel('sp') ax.set_ylabel('sfm') # ax.set_title('sss') ax.plot(ranged_x, y_x, '-') ax.plot(d_x, d_y, 'o') fig.savefig('resources/parab_regression.png') def lab5_p5(self, data_x, data_y, x_avr, y_ln_avr, disp_x, ranged_x, ranged_y, y_avr): k = (self.ig_mean_exp - x_avr * y_ln_avr) / disp_x b = math.exp(y_ln_avr - k * x_avr) y_x = list(b * np.exp(k * np.array(ranged_x))) d2 = 0 s2 = 0 e_y = [0] * Something.v for i in range(Something.v): e_y[i] = ranged_y[i] - y_x[i] d2 += (y_x[i] - y_avr) ** 2 s2 += (ranged_y[i] - y_avr) ** 2 s_ey = math.sqrt((1 / (Something.v - 2)) * sum(np.array(e_y) ** 2)) a_y = 1 / Something.v * sum(np.abs(np.array(e_y) / np.array(ranged_y))) d2 /= Something.v s2 /= Something.v print('\nПоказательная регрессия') print('R^2_y - {}'.format(d2 / s2)) print('S_ey - {}'.format(s_ey)) print('A_y - {}'.format(a_y)) fig, ax = plt.subplots() ax.set_xlabel('sp') ax.set_ylabel('sfm') ax.plot(data_x, data_y, 'o') ax.plot(ranged_x, y_x, '-0') fig.savefig('resources/pokazat.png') sp = Something(df1, 0) # work staff sp.calc_ivs() sp.calc_freq() sp.calc_rel_freq() sp.calc_xi_arr() sp.calc_xi_plus_half_of_h() sp.calc_cum_freq() sp.calc_cum_rel_freq() sp.calc_ui() # lab1 sp.ranged_to_csv() sp.ivs_to_csv() sp.draw_polygon(sp.freq) sp.draw_polygon(sp.rel_freq) sp.draw_hist(sp.freq) sp.draw_hist(sp.rel_freq) sp.draw_cumulates(sp.freq) sp.draw_cumulates(sp.rel_freq) sp.draw_emp_func() # lab2 sp.lab2_p1() sp.lab2_p2() sp.calc_cond_vi() sp.calc_mu_i() sp.lab2_p3() sp.calc_x_avr_mean_stand() sp.calc_dispersion_stand() sp.calc_sd() sp.calc_fixed_dispersion() sp.calc_fixed_sd() sp.calc_asm_cef() sp.calc_kurtosis_cef() sp.calc_mode() sp.calc_median() sp.calc_variation_cef() print(sp.calc_x_avr_mean_cond()) print(sp.calc_dispersion_cond()) print(sp.x_avr_mean) print(sp.dispersion) print(sp.vi) print(sp.mu_i) print(sp.dispersion) print(sp.sd) print(sp.fixed_dispersion) print(sp.fixed_sd) print(sp.asm_cef) print(sp.kurtosis_cef) print(sp.xi) print(sp.mode) print(sp.median) print(sp.variation_cef) # lab 3 sp.calc_conf_interval_of_exp_v(0.95) sp.calc_conf_interval_of_exp_v(0.99) sp.dict_to_csv() sp.calc_conf_interval_of_sd(0.95, 0.138) sp.calc_conf_interval_of_sd(0.99, 0.190) sp.dict_sd_to_csv() sp.check_pirson_test_normal_law(0.05) |