728x90
์ ํํ๊ท ๊ฐ์
1. ํจํค์ง ์ํฌํธ
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.graphics.api as smg
import patsy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
2. ๋ฐ์ดํฐ ์์ฑ
y=np.array([1,2,3,4,5])
x1=np.array([6,7,8,9,10])
x2=np.array([11,12,13,14,15])
data={"y":y, "x1":x1, "x2":x2}
3. ๋ชจ๋ธ ์์ฑ
y, X=patsy.dmatrices("y~1+x1+x2+x1:x2", data, return_type="dataframe")
model=sm.OLS(y,X)
result=model.fit()
result.params
~":" ๋์ ์ "*"์ ์จ์ค~
y,X=patsy.dmatrices("y~x1*x2", data, return_type="dataframe")
model2=sm.OLS(y,X)
result2=model2.fit()
result2.params
์์ ์์๊ณผ ๊ฐ์ ๊ฒฐ๊ณผ๋ฅผ ๋ํ๋..!
y,X=patsy.dmatrices("y~x1+x2", data, return_type="dataframe")
model3=sm.OLS(y,X)
result3=model3.fit()
result3.params
~๋ก๊ทธ/์ผ๊ฐํจ์๋ ํํ ๊ฐ๋ฅ~
y,X=patsy.dmatrices("y~np.log(x1)+np.cos(x2)+np.sin(x1+x2)", data, return_type="dataframe")
model4=sm.OLS(y,X)
result4=model4.fit()
result4.params
๋ฒ์ฃผ ์์ฑ
: Patsy ํจํค์ง๋ ๋ฒ์ฃผํ ๋ณ์๋ฅผ ์์ฑํ ์ ์๋ค.
์๋์ผ๋ก ์ซ์๊ฐ์ C(x1)์ด๋ ๊ฒ ์ง์ ํ๋ฉด ์นดํ ๊ณ ๋ฆฌํ๋จ..!
y,X=patsy.dmatrices("y~-1+C(x1)", data=data, return_type="dataframe")
print(X)
1. ์ ํํ๊ท ๋ถ์
# ์์ด์คํฌ๋ฆผ ๋ฐ์ดํฐ : ์์ด์คํฌ๋ฆผ ์๋น๋, ๊ณ ๊ฐ์ ์์ , ์์ด์คํฌ๋ฆผ ๊ฐ๊ฒฉ ๋ฐ ํ๋ฃจ์ ์จ๋
dataset=sm.datasets.get_rdataset("Icecream", "Ecdat")
model=smf.ols("cons~1+income+price+temp", data=dataset.data)
result=model.fit()
print(result.summary())
~ income์ ์ ์ธํ๊ณ ์๋น๋~๊ฐ๊ฒฉ, ์จ๋์ ํ๊ท~
#์๋น๋~๊ฐ๊ฒฉ, ์จ๋์ ํ๊ท
model=smf.ols("cons~1+price+temp", data=dataset.data)
result=model.fit()
print(result.summary())
2. ์ด์ฐํ๊ท๋ถ์ : ๋ก์ง์คํฑํ๊ท๋ถ์
~iris ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ~
df = sm.datasets.get_rdataset("iris").data
df_subset=df[df.Species.isin(["versicolor","virginica"])].copy()
df_subset.Species = df_subset.Species.map({"versicolor":1,"virginica":0})
df_subset.rename(columns={"Sepal.Length": "Sepal_Length","Sepal.Width": "Sepal_Width","Petal.Length": "Petal_Length","Petal.Width": "Petal_Width"}, inplace=True)
~๋ก์ง์คํฑํ๊ท ๋ชจ๋ธ ์์ฑ~
model = smf.logit("Species ~ Petal_Length + Petal_Width", data=df_subset)
result = model.fit()
print(result.summary())
~plot~
#scatter plot
params=result.params
alpha0 = -params['Intercept']/params['Petal_Width']
alpha1 = -params['Petal_Length']/params['Petal_Width']
_x=np.array([3.0, 7.0])
fig, ax = plt.subplots(1,1, figsize=(8,4))
ax.plot(df_subset[df_subset.Species==0].Petal_Length.values, df_subset[df_subset.Species==0].Petal_Width.values,'s', label='virginica')
ax.plot(df_subset[df_subset.Species==1].Petal_Length.values, df_subset[df_subset.Species==1].Petal_Width.values,'s', label='versicolor')
ax.plot(_x,alpha0+alpha1 * _x)
ax.set_xlabel('Petal length')
ax.set_ylabel('Petal width')
ax.legend()
=> ์ ๋ถ๋ฅํ๊ณ ์์์ ํ์ธ..!
728x90
'๐ ๋ฐ์ดํฐ ๋ถ์ > 04. Data Analysis' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[ํต๊ณ์ ๋ชจ๋ธ๋ง] ์๊ณ์ด ๋ถ์ - ์ ์์ฑ(stationary)๊ณผ ์ฐจ๋ถ (0) | 2022.03.19 |
---|---|
[ํต๊ณ์ ๋ชจ๋ธ๋ง] ์๊ณ์ด ๋ถ์์ ๋ถํด (0) | 2022.03.18 |
[Python] ์ผ์ ๋ถ์ฐ ๋ถ์(ANOVA) (0) | 2022.03.17 |
[Python] ์๊ด ๊ด๊ณ ๋ถ์ (0) | 2022.03.17 |
[Python] ๋ฑ๋ถ์ฐ ๊ฒ์ (0) | 2022.03.16 |