Главная страница

обработка выборочных данных, нахождение интервальных оценок параметров распределения. обработка_выборочных_данных. Обработка выборочных данных. Нахождение интервальных оценок параметров распределения. Проверка статистической гипотезы


Скачать 41.76 Kb.
НазваниеОбработка выборочных данных. Нахождение интервальных оценок параметров распределения. Проверка статистической гипотезы
Анкоробработка выборочных данных, нахождение интервальных оценок параметров распределения
Дата05.05.2022
Размер41.76 Kb.
Формат файлаdocx
Имя файлаобработка_выборочных_данных.docx
ТипОтчет
#514032
страница2 из 3
1   2   3

xk = self.ranged_seq[0]
xk1 = xk + self.h
for i in range(Something.k):
ni = 0
for j in self.ranged_seq:
if i == 0:
if xk <= j <= xk1:
ni += 1
else:
if xk < j <= xk1:
ni += 1
self.freq[i] = ni
xk = xk1
xk1 += self.h

def calc_rel_freq(self):
for i in range(Something.k):
self.rel_freq[i] = self.freq[i] / Something.v

def calc_cum_freq(self):
for i in range(Something.k):
if i == 0:
self.cum_freq[i] = self.freq[i]
else:
self.cum_freq[i] = self.cum_freq[i-1] + self.freq[i]

def calc_cum_rel_freq(self):
for i in range(Something.k):
if i == 0:
self.cum_rel_freq[i] = self.rel_freq[i]
else:
self.cum_rel_freq[i] = self.cum_rel_freq[i - 1] + self.rel_freq[i]

def calc_conf_interval_of_exp_v(self, conf_accuracy):

eps = (sts.t.ppf((1 + conf_accuracy) / 2, Something.v - 1)) * self.fixed_sd / math.sqrt(Something.v)
conf_interval = '(' + str(self.x_avr_mean - eps) + '-' + str(self.x_avr_mean + eps) + ')'
self.conf_interval_accuracy.update({conf_interval: conf_accuracy})

def calc_conf_interval_of_sd(self, conf_accuracy, q):
if q < 1:
conf_interval = '(' + str(self.fixed_sd*(1 - q)) + ';' + str(self.fixed_sd*(1 + q)) + ')'
elif q > 1:
conf_interval = '(' + str(0) + '-' + str(self.fixed_sd * (1 + q)) + ')'
self.conf_interval_accuracy_sd.update({conf_interval: conf_accuracy})

def dict_to_csv(self):
pd.DataFrame({'Confident interval': self.conf_interval_accuracy.keys(),
'Confident accuracy': self.conf_interval_accuracy.values()}).to_csv('resources/conf_interval_exp_value_' + self.name + '.csv',
index=False, encoding='utf-8')

def dict_sd_to_csv(self):
pd.DataFrame({'Confident interval': self.conf_interval_accuracy_sd.keys(),
'Confident accuracy': self.conf_interval_accuracy_sd.values()}).to_csv('resources/conf_interval_sd_' + self.name + '.csv',
index=False, encoding='utf-8')

def ranged_to_csv(self):
pd.DataFrame({self.name: self.ranged_seq}).to_csv('resources/rs_' + self.name + '.csv',
index=False, encoding='utf-8')

def lab2_p1(self):
pd.DataFrame({self.name: self.var_srs,
'middle of intervals': self.xi_plus_half_of_h,
'Accumulated frequency': self.cum_freq,
'Frequency': self.freq}).to_csv('resources/l2_p1_' + self.name + '.csv',
index=False, encoding='utf-8')

def lab2_p2(self):
pd.DataFrame({'Conditional variants': self.ui,
'Frequency': self.freq}).to_csv('resources/l2_ui_' + self.name + '.csv',
index=False, encoding='utf-8')

def lab2_p3(self):
pd.DataFrame({'Conditional empirical moments': self.vi,
'Central empirical moments': self.mu_i}).to_csv('resources/l2_mu_v_' + self.name + '.csv',
index=False, encoding='utf-8')

def ivs_to_csv(self):
pd.DataFrame({self.name: self.var_srs,
'Частота': self.freq,
'Частость': self.rel_freq}).to_csv('resources/ivs_'
+ self.name + '.csv',
index=False, encoding='utf-8')

def draw_polygon(self, param):
fig, ax = plt.subplots()
ax.set_xlabel(self.name)
if param == self.freq:
ax.set_ylabel('Absolute frequency')
ax.set_title('Frequency polygon')
ax.plot(self.xi_plus_half_of_h, self.freq, 'o-')
fig.savefig('resources/freq_poly_' + self.name + '.png')
elif param == self.rel_freq:
ax.set_ylabel('Relative frequency')
ax.set_title('Relative Frequency polygon')
ax.plot(self.xi_plus_half_of_h, self.rel_freq, 'o-')
fig.savefig('resources/rel_freq_poly_' + self.name + '.png')

def draw_hist(self, param):
fig, ax = plt.subplots()
ax.set_xlabel(self.name)
if param == self.freq:
ax.set_ylabel('Absolute frequency')
ax.set_title('Absolute Frequency hist')
ax.hist(self.ranged_seq, bins=Something.k,
weights=np.zeros_like(self.ranged_seq) + 1. / self.h,
edgecolor='black')
fig.savefig('resources/freq_hist_' + self.name + '.png')
elif param == self.rel_freq:
ax.set_ylabel('Relative frequency')
ax.set_title('Relative Frequency hist')
ax.hist(self.ranged_seq, bins=Something.k,
weights=np.zeros_like(self.ranged_seq) + 1. / (len(self.ranged_seq) * self.h),
edgecolor='black')
fig.savefig('resources/rel_freq_hist_' + self.name + '.png')

def draw_cumulates(self, param):
xi = [min(self.xi) - self.h / 2] + self.xi + [max(self.xi) + self.h]
fig, ax = plt.subplots()
ax.set_xlabel(self.name)
ax.set_xlim([min(xi) + self.h / 5, max(xi) - self.h / 5])
if param == self.freq:
yi = [0.0] * 2 + self.cum_freq + [max(self.cum_freq)]
ax.set_ylabel('Absolute accumulated frequency')
ax.set_title('Cumulates of acc_abs_freq')
ax.plot(xi, yi, 'o-')
fig.savefig('resources/cumulates_freq_' + self.name + '.png')
elif param == self.rel_freq:
yi = [0.0] * 2 + self.cum_rel_freq + [max(self.cum_rel_freq)]
ax.set_ylabel('Relative accumulated frequency')
ax.set_title('Cumulates of acc_rel_freq')
ax.plot(xi, yi, 'o-')
fig.savefig('resources/cumulates_rel_freq_' + self.name + '.png')

def draw_emp_func(self):
xi = [min(self.xi) - self.h / 2] + self.xi + [max(self.xi) + self.h]
fig, ax = plt.subplots()
ax.set_xlabel(self.name)
ax.set_xlim([min(xi) + self.h / 5, max(xi) - self.h / 5])
yi = [0.0] * 2 + self.cum_rel_freq + [max(self.cum_rel_freq)]
ax.set_ylabel('Relative accumulated frequency')
ax.set_title('Empirical function of the interval variation series')
ax.plot(xi, yi, 'o-')
fig.savefig('resources/empirical_function_' + self.name + '.png')

def check_pirson_test_normal_law(self, sign_level):
file = open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/Pirson_checkNormal_' + self.name, "w+",
encoding='utf-8')
file.write("H0 : Случайная величина {} распределена нормально "
"с параметрами a = {}, disp = {}\n"
"H1 : Случайная величина {} не распределена по нормальному закону".format(self.name, self.x_avr_mean, self.fixed_sd, self.name))
l = Something.k - 3
var_sq = self.calc_new_vs()
pi = [0] * Something.k
align_mi = [0] * Something.k
diff_mi = [0] * Something.k
diff_div_mi = [0] * Something.k

for i in range(Something.k):
if i == 0:
pi[i] = sts.norm.cdf((self.xi[i + 1] - self.x_avr_mean) / sp.fixed_sd)
elif i == Something.k - 1:
pi[i] = 1 - sts.norm.cdf((self.xi[i] - self.x_avr_mean) / sp.fixed_sd)
else:
pi[i] = sts.norm.cdf((self.xi[i + 1] - self.x_avr_mean) / sp.fixed_sd) - sts.norm.cdf((self.xi[i] - self.x_avr_mean) / sp.fixed_sd)
align_mi[i] = pi[i] * Something.v
diff_mi[i] = math.pow((self.freq[i] - align_mi[i]), 2)
diff_div_mi[i] = diff_mi[i] / align_mi[i]
pd.DataFrame({'Intervals': var_sq,
'mi': self.freq,
'pi': pi,
'mi_aligned': align_mi,
'((mi - mi\')^2': diff_mi,
'((mi - mi\')^2 / mi\'': diff_div_mi}).to_csv('resources/l3_p3_' + self.name + '.csv',
index=False, encoding='utf-8')
ksi_observed = sum(diff_div_mi)
ksi_critical = chi2.ppf(1 - sign_level, l)

file.write("\n Результат проверки гипотезы:")

if ksi_observed < ksi_critical:
file.write("\nksi_observed < ksi_critical => нет оснований отвергнуть H0 ")
else:
file.write("\nksi_observed > ksi_critical => случайная величина {}"
"не распределена по нормальному закону ".format(self.name))

file.close()

def make_cool_dict(self):
self.cool_dict.update({'expected value': self.x_avr_mean,
'dispersion': self.dispersion,
'sd': self.sd,
'asm cef': self.asm_cef,
'kurtosis cef': self.kurtosis_cef,
'mode': self.mode,
'median': self.median,
'variation cef': self.variation_cef})


class Somebody:

def __init__(self):
self.mij = [[0] * Something.k for i in range(Something.k)]
self.ig_mean = 0
self.ig_mean_exp = 0
self.ig_dispersion_x = 0
self.ig_dispersion_y = 0
self.cme = None
self.param = None
self.cor_cef = None
self.conf_interval_cor_cef = {}
self.pxy = None
self.pyx = None
self.nyx = None
self.nxy = None
self.x_avr_y = Something.k * [0]
self.y_avr_x = Something.k * [0]

def lab4_p1(self, dict1, dict2):
self.param = [dict1, dict2]
with open('C:/Users/sidor/PycharmProjects/math_stat_labs/resources/sp_sfm_params.csv', "w+",
newline='', encoding='utf-8') as csvfile:
header_key = ['', 'sp', 'sfm']
new_val = csv.DictWriter(csvfile, fieldnames=header_key)
new_val.writeheader()
for new_k in dict1:
new_val.writerow({'': new_k, 'sp': dict1[new_k],
'sfm': dict2[new_k]})

def calc_mij(self, data1, data2, xi, yi):
for i in range(Something.v):
for j in range(Something.k):
for l in range(Something.k):
if j == 0:
if l == 0:
if xi[j] <= data1[i] <= xi[j + 1] and yi[l] <= data2[i] <= yi[l + 1]:
self.mij[j][l] += 1
else:
if xi[j] <= data1[i] <= xi[j + 1] and yi[l] < data2[i] <= yi[l + 1]:
self.mij[j][l] += 1
else:
if l == 0:
if xi[j] < data1[i] <= xi[j + 1] and yi[l] <= data2[i] <= yi[l + 1]:
self.mij[j][l] += 1
else:
if xi[j] < data1[i] <= xi[j + 1] and yi[l] < data2[i] <= yi[l + 1]:
self.mij[j][l] += 1

def calc_intergroup_mean(self, xi, yi):
for i in range(Something.k):
for j in range(Something.k):
self.ig_mean += xi[i] * yi[j] * self.mij[i][j]
self.ig_mean /= Something.v

def calc_intergroup_mean_exp(self, xi, yi):
for i in range(Something.k):
for j in range(Something.k):
self.ig_mean_exp += xi[i] * math.log(yi[j]) * self.mij[i][j]
self.ig_mean_exp /= Something.v

def calc_cme(self, x_avr, y_avr):
self.cme = self.ig_mean - x_avr * y_avr

def calc_condit_mean(self, data_x, data_y):
s_y = 0
m_sum_y = 0
m_sum_x = 0
s_x = 0
for i in range(Something.k):
for j in range(Something.k):
s_y += data_x[j] * self.mij[j][i]
1   2   3


написать администратору сайта