Author : tmlab / Date : 2017. 3. 25. 17:36 / Category : Analytics
a = "hel"
b = "lo"
a+b
a*2
print(a)
print(a[1])
print(a[-1])
a = "Life is too short, You eat Chiken"
a[0:4] # 4번째 원소는 미포함
print(a[5:])
print(a[:11])
print(a[11:-1])
print(a[:])
print("I eat %d apples" % 3)
print("i eat %d apples, %d peaches" % (3,4))
print("i drink %d%% of waters"% 50)
a = [1,"a",[1,2,3],{1,2,3},(1,2,3)]
a
print(a[0])
print(a[2])
print(a[2][1])
a=[1,2,3]
b=[2,3,4]
print(a+b)
print(a*2)
print(str(a[2])+"hi")
print(a[2]+"hi")
t1=()
t2=(1,)
t3=(1,2,3)
t4=1,2,3
t5=('a','b',('ab','cd'))
t2[0]=-1
t2[0]=()
dic = {'name':'chj','age':'29','gender':'male'}
dic
# 쌍 추가
a = {1:'a'}
print(a)
a[2]='b'
print(a)
a["hi"]="test"
print(a)
# 쌍 삭제
del a[1]
print(a)
del a['hi']
print(a)
a={1:'a',1:'b'}
print(a)
a = set("Hello")
a
b = list(a)
print(b)
b[1]
s1 = set([1,2,3,4,5,6])
s2 = set([4,5,6,7,8,9])
# 교집합
print(s1&s2)
print(s1.intersection(s2))
# 합집합
print(s1|s2)
print(s1.union(s2))
# 차집합
print(s1-s2)
print(s1.difference(s2))
a = "hi"
if a == "oh":
print("hello")
elif a == "hi":
print("world")
else:
print("!!")
a = "oh"
if a is "oh":
print("hello")
elif a is "hi":
print("world")
else:
print("!!")
a = 10
if a is 0 | a is 10:
print("hello")
elif a is 5 | a is 1:
print("world")
else:
print("!!")
###############################
a = 10
if a is 0 or a is 10:
print("hello")
elif a is 5 or a is 1:
print("world")
else:
print("!!")
a = 7
if a in [1,2,3]:
print("hello")
elif a in [7,8,9]:
print("world")
else:
print("!!")
a=1
while a<10:
print(a)
a+=1
test_list = ["one","two","three"]
for i in test_list:
print(i)
test_list = [(1,2,3),(4,5,6),[7,8,9]]
for (first,second,third) in test_list:
print(first+second+third)
test_list = [1,2,3,4,5,6]
for i in test_list:
if i is 5: continue
print(i)
print(range(5,10,2))
for i in range(5,10,2):
print(i)
range(10)
import statistics as stat
print(sum(x+1 for x in range(10)))
print(stat.mean(x for x in range(10)))
(x for x in range(10))
import numpy
numpy.version.full_version
import numpy as np
a = np.array([0,1,2,3,4,5])
a
print(a.ndim)
print(a.shape)
b = a.reshape((3,2))
print(b)
print(b.ndim)
print(b.shape)
b[1][0]=77
print(b)
print(a)
c = a.reshape((3,2)).copy()
print(c)
c[0][0]=-99
print(c)
print(a)
a*2
a**2
[1,2,3,4,5]*2
[1,2,3,4,5]**2
print(a)
print(a[np.array([2,3,4])])
a>4
a[a>4]
a[a>4]=4
a
a.clip(0,4)
import timeit
normal_py_sec = timeit.timeit('sum(x*x for x in range(1000))',
number=10000)
naive_np_sec = timeit.timeit('sum(na*na)',
setup="import numpy as np; na=np.arange(1000)",
number=10000)
good_np_sec = timeit.timeit("na.dot(na)",
setup="import numpy as np; na=np.arange(1000)",
number=10000)
print("Normal Python: %f sec" % normal_py_sec)
print("Naive NumPy: %f sec" % naive_np_sec)
print("Good NumPy: %f sec" % good_np_sec)
a= np.array([1,2,3])
a.dtype
np.array([1,"stringy"])
np.array([1,"stringy",set([1,2,3])])
import scipy, numpy
scipy.version.full_version
scipy.dot is numpy.dot
import scipy as sp
data = sp.genfromtxt("web_traffic.tsv",
delimiter="\t")
print(data[:10])
print(data.shape)
x= data[:,0]
y= data[:,1]
sp.sum(sp.isnan(y))
x = x[~sp.isnan(y)]
y = y[~sp.isnan(y)]
sp.sum(sp.isnan(y))
import matplotlib.pyplot as plt
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
plt.show()
import matplotlib.pyplot as plt
## inline 실행 코드
%matplotlib nbagg
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
plt.show()
def error(f,x,y):
return sp.sum((f(x)-y)**2)
fp1, residuals, rank, sv, rcond = sp.polyfit(x,y,1,full=True)
print("Model parameter: %s" % fp1)
print(residuals)
f1 = sp.poly1d(fp1)
print(error(f1,x,y))
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1],1000)
plt.show()
plt.plot(fx,f1(fx),linewidth=4)
plt.legend(["d = %i" % f1.order], loc="upper left")
f2p = sp.polyfit(x,y,2)
print(f2p)
f2=sp.poly1d(f2p)
print(error(f2,x,y))
from matplotlib.legend_handler import HandlerLine2D
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1],1000)
plt.show()
line1,=plt.plot(fx,f1(fx),linewidth=4, color="blue",label="d = 1")
line2,=plt.plot(fx,f2(fx),linewidth=4,color="red",label="d = 2")
plt.legend(handler_map={line2:HandlerLine2D(numpoints=4)}, loc="upper left")
f3p = sp.polyfit(x,y,3)
f3=sp.poly1d(f3p)
print(error(f3,x,y))
f10p = sp.polyfit(x,y,10)
f10=sp.poly1d(f10p)
print(error(f10,x,y))
f100p = sp.polyfit(x,y,100)
f100 = sp.poly1d(f100p)
print(error(f100,x,y))
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1],1000)
plt.show()
line1,=plt.plot(fx,f1(fx),linewidth=3, label="d = 1")
line2,=plt.plot(fx,f2(fx),linewidth=3,color="red",label="d = 2")
line3,=plt.plot(fx,f3(fx),linewidth=3,color="green",label="d = 3")
line4,=plt.plot(fx,f10(fx),linewidth=3,color="yellow",label="d = 10")
line5,=plt.plot(fx,f100(fx),linewidth=3,color="orange",label="d = %i" % f100.order)
plt.legend(handler_map={line1:HandlerLine2D(numpoints=5)}, loc="upper left")
inflection = 3.5*7*24 # 시간으로 변곡점을 계산
xa = x[:inflection] # 변곡점 이전 데이터
ya = y[:inflection]
xb = x[inflection:] # 변곡점 이후 데이터
yb = y[inflection:]
fa = sp.poly1d(sp.polyfit(xa,ya,1))
fb = sp.poly1d(sp.polyfit(xb,yb,1))
fa_error = error(fa,xa,ya)
fb_error = error(fb,xb,yb)
print("Error inflection=%f"%(fa_error+fb_error))
from matplotlib.legend_handler import HandlerLine2D
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1],1000)
plt.show()
line1,=plt.plot(fx,fa(fx),linewidth=4, color="blue",label="d = 1")
line2,=plt.plot(fx[730:],fb(fx[730:]),linewidth=4,color="red",label="d = 1")
plt.legend(handler_map={line2:HandlerLine2D(numpoints=4)}, loc="upper left")
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
axes=plt.gca()
axes.set_xlim([0,x[-1]+300])
axes.set_ylim([0,10000])
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1]+300,1000)
plt.show()
line1,=plt.plot(fx,f1(fx),linewidth=4, label="d = 1")
line2,=plt.plot(fx,f2(fx),linewidth=4,color="red",label="d = 2")
line3,=plt.plot(fx,f3(fx),linewidth=4,color="green",label="d = 3")
line4,=plt.plot(fx,f10(fx),linewidth=4,color="yellow",label="d = 10")
line5,=plt.plot(fx,f100(fx),linewidth=4,color="orange",label="d = %i" % f100.order)
plt.legend(handler_map={line1:HandlerLine2D(numpoints=5)}, loc="upper left")
print("Error inflection=%f"%fb_error)
# 2차 다항식
f2b = sp.poly1d(sp.polyfit(xb,yb,2))
f2b_error = error(f2b,xb,yb)
print("Error inflection=%f"%f2b_error)
# 3차 다항식
f3b = sp.poly1d(sp.polyfit(xb,yb,3))
f3b_error = error(f3b,xb,yb)
print("Error inflection=%f"%f3b_error)
# 10차 다항식
f10b = sp.poly1d(sp.polyfit(xb,yb,10))
f10b_error = error(f10b,xb,yb)
print("Error inflection=%f"%(f10b_error))
# 100차 다항식
f100b = sp.poly1d(sp.polyfit(xb,yb,100))
f100b_error = error(f100b,xb,yb)
print("Error inflection=%f"%f100b_error)
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
axes=plt.gca()
axes.set_xlim([0,x[-1]+300])
axes.set_ylim([0,10000])
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1]+300,1000)
plt.show()
line1,=plt.plot(fx,fb(fx),linewidth=4, label="d = 1")
line2,=plt.plot(fx,f2b(fx),linewidth=4,color="red",label="d = 2")
line3,=plt.plot(fx,f3b(fx),linewidth=4,color="green",label="d = 3")
line4,=plt.plot(fx,f10b(fx),linewidth=4,color="yellow",label="d = 10")
line5,=plt.plot(fx,f100b(fx),linewidth=4,color="orange",label="d = %i" % f100.order)
plt.legend(handler_map={line1:HandlerLine2D(numpoints=5)}, loc="upper left")
frac = 0.3
split_idx = int(frac * len(xb))
shuffled = sp.random.permutation(list(range(len(xb))))
test = sorted(shuffled[:split_idx])
train = sorted(shuffled[split_idx:])
fbt1 = sp.poly1d(sp.polyfit(xb[train], yb[train], 1))
fbt2 = sp.poly1d(sp.polyfit(xb[train], yb[train], 2))
print("fbt2(x)= \n%s"%fbt2)
print("fbt2(x)-100,000= \n%s"%(fbt2-100000))
fbt3 = sp.poly1d(sp.polyfit(xb[train], yb[train], 3))
fbt10 = sp.poly1d(sp.polyfit(xb[train], yb[train], 10))
fbt100 = sp.poly1d(sp.polyfit(xb[train], yb[train], 100))
print("Test errors for only the time after inflection point")
for f in [fbt1, fbt2, fbt3, fbt10, fbt100]:
print("Error d=%i: %f" % (f.order, error(f, xb[test], yb[test])))
# 크기가 10인 점으로 (x,y) 그리기
plt.scatter(x,y,s=10)
plt.title("Web traffic over the last")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)],
['week %i' % w for w in range(10)])
plt.autoscale(tight=True)
axes=plt.gca()
axes.set_xlim([0,x[-1]+300])
axes.set_ylim([0,10000])
# 약간 불투명한 점선 격자를 그리기
plt.grid(True, linestyle="-", color="0.75")
fx = sp.linspace(0,x[-1]+300,1000)
plt.show()
line1,=plt.plot(fx,fbt1(fx),linewidth=4, label="d = 1")
line2,=plt.plot(fx,fbt2(fx),linewidth=4,color="red",label="d = 2")
line3,=plt.plot(fx,fbt3(fx),linewidth=4,color="green",label="d = 3")
line4,=plt.plot(fx,fbt10(fx),linewidth=4,color="yellow",label="d = 10")
line5,=plt.plot(fx,fbt100(fx),linewidth=4,color="orange",label="d = %i" % f100.order)
plt.legend(handler_map={line1:HandlerLine2D(numpoints=5)}, loc="upper left")
fbt2=sp.poly1d(sp.polyfit(xb[train],yb[train],2))
print("fbt2(x)=\n%s"%fbt2)
print("fbt2(x)-100,000 = \n%s"%(fbt2-100000))
from scipy.optimize import fsolve
reached_max = fsolve(fbt2-100000,x0=800)/(7*24)
print("100,000 hits/hour expected at week %f" % reached_max[0])