Browse Source

move to opensource

tobby48 5 years ago
parent
commit
435a3308d6

+ 0
- 75
src/kr/co/swh/lecture/mathorithm/crawler.py View File

@@ -1,75 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-class Movie:
5
-    def __init__(self, n, u, rank, reples):
6
-        self.name = n
7
-        self.url = u
8
-        self.ranking = rank
9
-        self.reples = reples
10
-    def __str__(self):
11
-        return str(self.ranking) + "위\t제목:" + self.name + "\t주소:" + self.url + "\n리플:" + str(self.reples)
12
-
13
-class Naver:
14
-
15
-    # 기본 정보
16
-    naverHomeUrl = 'https://www.naver.com/'
17
-    movieHomeUrl = 'https://movie.naver.com'
18
-    movieRankingHomeUrl = 'https://movie.naver.com/movie/sdb/rank/rmovie.nhn'
19
-    soupHtmlParser = 'html.parser'
20
-
21
-    # 영화 태그패턴
22
-    moviePattern = 'div[class=tit3]'
23
-    replePattern = 'div[class=score_reple]'
24
-
25
-    def __init__(self):
26
-        naverResponse = requests.get(self.naverHomeUrl)
27
-        naverHtml = naverResponse.text
28
-        self.naverSoup = BeautifulSoup(naverHtml, self.soupHtmlParser)
29
-
30
-        movieResponse = requests.get(self.movieRankingHomeUrl)
31
-        movieHtml = movieResponse.text
32
-        self.movieSoup = BeautifulSoup(movieHtml, self.soupHtmlParser)
33
-
34
-    def getMovieReple(self, url):
35
-        movieReples = []
36
-        repleText = requests.get(self.movieHomeUrl + url).text
37
-        repleSoup = BeautifulSoup(repleText, self.soupHtmlParser)
38
-        for tag in repleSoup.select(self.replePattern):
39
-            rep = tag.select('p').pop(0).text
40
-            movieReples.append(rep)
41
-        return movieReples
42
-
43
-    def getTitles(self):
44
-        titles = []
45
-        for tag in self.movieSoup.select(self.moviePattern):
46
-            titles.append(tag.text.strip().replace("\n", ""))
47
-        return titles
48
-
49
-    def getAlls(self):
50
-        movies = []
51
-        ranking = 1
52
-        for tag in self.movieSoup.select(self.moviePattern):
53
-            n = tag.text.strip().replace("\n", "")
54
-            u = tag.find('a').get('href')
55
-            t = n, u, ranking, self.getMovieReple(u)                #   튜플
56
-            print(t)
57
-            m = Movie(n, u, ranking, self.getMovieReple(u))         #   Movie 클래스
58
-            movies.append(m)
59
-            ranking = ranking + 1
60
-        return movies
61
-
62
-
63
-    def realTimeSearchWord(self):
64
-        words = []
65
-        ranking = 1
66
-        realTimeWords = self.naverSoup.select('ul[class=ah_l] li[class=ah_item] a')
67
-        for tag in realTimeWords:
68
-            if tag.get('data-ssl'):
69
-                relatedWord = requests.get(tag.get('href')).text
70
-                relatedSoup = BeautifulSoup(relatedWord, self.soupHtmlParser)
71
-                relatedWords = []
72
-                for relatedTag in relatedSoup.select('ul[class=_related_keyword_ul] li a'):
73
-                    relatedWords.append(relatedTag.text)
74
-                words.append((tag.select('span[class=ah_r]')[0].text, tag.select('span[class=ah_k]')[0].text, tag.get('href'), relatedWords))
75
-        return words

+ 0
- 65
src/kr/co/swh/lecture/mathorithm/naverMovieClass_step1.py View File

@@ -1,65 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-class Movie:
5
-    def __init__(self, n, u, rank, reples):
6
-        self.name = n
7
-        self.url = u
8
-        self.ranking = rank
9
-        self.reples = reples
10
-
11
-    def __str__(self):
12
-        return str(self.ranking) + "위\t제목:" + self.name + "\t주소:" + self.url + "\n리플:" + str(self.reples)
13
-
14
-class Naver:
15
-
16
-    # 기본 정보
17
-    movieHomeUrl = 'https://movie.naver.com'
18
-    movieRankingHomeUrl = 'https://movie.naver.com/movie/sdb/rank/rmovie.nhn'
19
-    soupHtmlParser = 'html.parser'
20
-
21
-    # 영화 태그패턴
22
-    moviePattern = 'div[class=tit3]'
23
-    replePattern = 'div[class=score_reple]'
24
-
25
-    def __init__(self):
26
-        movieResponse = requests.get(self.movieRankingHomeUrl)
27
-        movieHtml = movieResponse.text
28
-        self.movieSoup = BeautifulSoup(movieHtml, self.soupHtmlParser)
29
-
30
-    def getMovieReple(self, url):
31
-        movieReples = []
32
-        repleText = requests.get(self.movieHomeUrl + url).text
33
-        repleSoup = BeautifulSoup(repleText, self.soupHtmlParser)
34
-        for tag in repleSoup.select(self.replePattern):
35
-            rep = tag.select('p').pop().text
36
-            movieReples.append(rep)
37
-        return movieReples
38
-
39
-    def getTitles(self):
40
-        titles = []
41
-        for tag in self.movieSoup.select(self.moviePattern):
42
-            titles.append(tag.text.strip().replace("\n", ""))
43
-        return titles
44
-
45
-    def getAlls(self):
46
-        movies = []
47
-        ranking = 1
48
-        for tag in self.movieSoup.select(self.moviePattern):
49
-            n = tag.text.strip().replace("\n", "")
50
-            u = tag.find('a').get('href')
51
-            repless = self.getMovieReple(u)
52
-            t = n, u, ranking, repless                #   튜플
53
-            print(t)
54
-            m = Movie(n, u, ranking, repless)         #   Movie 클래스
55
-            movies.append(m)
56
-            ranking = ranking + 1
57
-        return movies
58
-
59
-
60
-# 클래스 사용
61
-naver = Naver()
62
-print(naver.getTitles())
63
-
64
-for b in naver.getAlls():
65
-    print(b)

+ 0
- 85
src/kr/co/swh/lecture/mathorithm/naverMovieClass_step2.py View File

@@ -1,85 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-class Movie:
5
-    def __init__(self, n, u, rank, reples):
6
-        self.name = n
7
-        self.url = u
8
-        self.ranking = rank
9
-        self.reples = reples
10
-    def __str__(self):
11
-        return str(self.ranking) + "위\t제목:" + self.name + "\t주소:" + self.url + "\n리플:" + str(self.reples)
12
-
13
-class Naver:
14
-
15
-    # 기본 정보
16
-    naverHomeUrl = 'https://www.naver.com/'
17
-    movieHomeUrl = 'https://movie.naver.com'
18
-    movieRankingHomeUrl = 'https://movie.naver.com/movie/sdb/rank/rmovie.nhn'
19
-    soupHtmlParser = 'html.parser'
20
-
21
-    # 영화 태그패턴
22
-    moviePattern = 'div[class=tit3]'
23
-    replePattern = 'div[class=score_reple]'
24
-
25
-    def __init__(self):
26
-        naverResponse = requests.get(self.naverHomeUrl)
27
-        naverHtml = naverResponse.text
28
-        self.naverSoup = BeautifulSoup(naverHtml, self.soupHtmlParser)
29
-
30
-        movieResponse = requests.get(self.movieRankingHomeUrl)
31
-        movieHtml = movieResponse.text
32
-        self.movieSoup = BeautifulSoup(movieHtml, self.soupHtmlParser)
33
-
34
-    def getMovieReple(self, url):
35
-        movieReples = []
36
-        repleText = requests.get(self.movieHomeUrl + url).text
37
-        repleSoup = BeautifulSoup(repleText, self.soupHtmlParser)
38
-        for tag in repleSoup.select(self.replePattern):
39
-            rep = tag.select('p').pop(0).text
40
-            movieReples.append(rep)
41
-        return movieReples
42
-
43
-    def getTitles(self):
44
-        titles = []
45
-        for tag in self.movieSoup.select(self.moviePattern):
46
-            titles.append(tag.text.strip().replace("\n", ""))
47
-        return titles
48
-
49
-    def getAlls(self):
50
-        movies = []
51
-        ranking = 1
52
-        for tag in self.movieSoup.select(self.moviePattern):
53
-            n = tag.text.strip().replace("\n", "")
54
-            u = tag.find('a').get('href')
55
-            t = n, u, ranking, self.getMovieReple(u)                #   튜플
56
-            print(t)
57
-            m = Movie(n, u, ranking, self.getMovieReple(u))         #   Movie 클래스
58
-            movies.append(m)
59
-            ranking = ranking + 1
60
-        return movies
61
-
62
-
63
-    def realTimeSearchWord(self):
64
-        words = []
65
-        ranking = 1
66
-        realTimeWords = self.naverSoup.select('ul[class=ah_l] li[class=ah_item] a')
67
-        for tag in realTimeWords:
68
-            if tag.get('data-ssl'):
69
-                relatedWord = requests.get(tag.get('href')).text
70
-                relatedSoup = BeautifulSoup(relatedWord, self.soupHtmlParser)
71
-                relatedWords = []
72
-                for relatedTag in relatedSoup.select('ul[class=_related_keyword_ul] li a'):
73
-                    relatedWords.append(relatedTag.text)
74
-                words.append((tag.select('span[class=ah_r]')[0].text, tag.select('span[class=ah_k]')[0].text, tag.get('href'), relatedWords))
75
-        return words
76
-
77
-
78
-# 클래스 사용
79
-naver = Naver()
80
-print(naver.getTitles())
81
-for b in naver.realTimeSearchWord():
82
-    print(b)
83
-
84
-for b in naver.getAlls():
85
-    print(b)

+ 0
- 26
src/kr/co/swh/lecture/mathorithm/naverMovieClass_step3.py View File

@@ -1,26 +0,0 @@
1
-import schedule
2
-import time
3
-import requests
4
-from bs4 import BeautifulSoup  # BeautifulSoup import
5
-
6
-def getTitles():
7
-    response = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn')
8
-    html = response.text
9
-    soup = BeautifulSoup(html, 'html.parser')  # html.parser를 사용해서 soup에 넣겠다
10
-    ranking = 1
11
-    for tag in soup.select('div[class=tit3]'):
12
-        url = tag.get('href')
13
-        print(str(ranking) + '위 : ' + tag.text.strip().replace("\n", ""))
14
-        ranking = ranking + 1
15
-
16
-schedule.every(1).seconds.do(getTitles)
17
-# schedule.every(10).minutes.do(getTitles)
18
-# schedule.every().hour.do(getTitles)
19
-# schedule.every().day.at("10:30").do(getTitles)
20
-# schedule.every(5).to(10).minutes.do(getTitles)
21
-# schedule.every().monday.do(getTitles)
22
-# schedule.every().wednesday.at("13:15").do(getTitles)
23
-
24
-while True:
25
-    schedule.run_pending()
26
-    time.sleep(1)

+ 0
- 13
src/kr/co/swh/lecture/mathorithm/naverMovieClass_step4.py View File

@@ -1,13 +0,0 @@
1
-import schedule
2
-import time
3
-import requests
4
-import crawler
5
-
6
-# 클래스 사용
7
-naver = crawler.Naver()
8
-print(naver.getTitles())
9
-for b in naver.realTimeSearchWord():
10
-    print(b)
11
-
12
-for b in naver.getAlls():
13
-    print(b)

+ 0
- 21
src/kr/co/swh/lecture/mathorithm/naverMovieClass_step5.py View File

@@ -1,21 +0,0 @@
1
-import schedule
2
-import time
3
-import requests
4
-import crawler
5
-
6
-# 클래스 사용
7
-naver = crawler.Naver()
8
-def result():
9
-    print(naver.getTitles())
10
-    
11
-schedule.every(1).seconds.do(result)
12
-# schedule.every(10).minutes.do(getTitles)
13
-# schedule.every().hour.do(getTitles)
14
-# schedule.every().day.at("10:30").do(getTitles)
15
-# schedule.every(5).to(10).minutes.do(getTitles)
16
-# schedule.every().monday.do(getTitles)
17
-# schedule.every().wednesday.at("13:15").do(getTitles)
18
-
19
-while True:
20
-    schedule.run_pending()
21
-    time.sleep(1)

+ 0
- 70
src/kr/co/swh/lecture/mathorithm/naverMovieCrawler.py View File

@@ -1,70 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-class Movie:
5
-    def __init__(self, n, u, rank, reples):
6
-        self.name = n
7
-        self.url = u
8
-        self.ranking = rank
9
-        self.reples = reples
10
-    def __str__(self):
11
-        return str(self.ranking) + "위\t제목:" + self.name + "\t주소:" + self.url + "\n리플:" + str(self.reples)
12
-
13
-class Naver:
14
-
15
-    # 기본 정보
16
-    naverHomeUrl = 'https://www.naver.com/'
17
-    movieHomeUrl = 'https://movie.naver.com'
18
-    movieRankingHomeUrl = 'https://movie.naver.com/movie/sdb/rank/rmovie.nhn'
19
-    soupHtmlParser = 'html.parser'
20
-
21
-    # 영화 태그패턴
22
-    moviePattern = 'div[class=tit3]'
23
-    replePattern = 'div[class=score_reple]'
24
-
25
-    def __init__(self):
26
-        naverResponse = requests.get(self.naverHomeUrl)
27
-        naverHtml = naverResponse.text
28
-        self.naverSoup = BeautifulSoup(naverHtml, self.soupHtmlParser)
29
-
30
-        movieResponse = requests.get(self.movieRankingHomeUrl)
31
-        movieHtml = movieResponse.text
32
-        self.movieSoup = BeautifulSoup(movieHtml, self.soupHtmlParser)
33
-
34
-    def getMovieReple(self, url):
35
-        movieReples = []
36
-        repleText = requests.get(self.movieHomeUrl + url).text
37
-        repleSoup = BeautifulSoup(repleText, self.soupHtmlParser)
38
-        for tag in repleSoup.select(self.replePattern):
39
-            rep = tag.select('p').pop(0).text
40
-            movieReples.append(rep)
41
-        return movieReples
42
-
43
-    def getTitles(self):
44
-        titles = []
45
-        for tag in self.movieSoup.select(self.moviePattern):
46
-            titles.append(tag.text.strip().replace("\n", ""))
47
-        return titles
48
-
49
-    def getAlls(self):
50
-        movies = []
51
-        ranking = 1
52
-        for tag in self.movieSoup.select(self.moviePattern):
53
-            n = tag.text.strip().replace("\n", "")
54
-            u = tag.find('a').get('href')
55
-            t = n, u, ranking, self.getMovieReple(u)                #   튜플
56
-            print(t)
57
-            m = Movie(n, u, ranking, self.getMovieReple(u))         #   Movie 클래스
58
-            movies.append(m)
59
-            ranking = ranking + 1
60
-        return movies
61
-
62
-
63
-
64
-
65
-# 클래스 사용
66
-naver = Naver()
67
-print(naver.getTitles())
68
-
69
-for b in naver.getAlls():
70
-    print(b)

+ 0
- 21
src/kr/co/swh/lecture/mathorithm/naverMovieReple.py View File

@@ -1,21 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-def reple(url):
5
-    repleText = requests.get('https://movie.naver.com' + url).text
6
-    soup = BeautifulSoup(repleText, 'html.parser')
7
-    print('[리플]')
8
-    for tag in soup.select('div[class=score_reple]'):
9
-        rep = tag.select('p').pop(0).text
10
-        print(rep)
11
-
12
-response = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn')
13
-html = response.text
14
-soup = BeautifulSoup(html, 'html.parser')  # html.parser를 사용해서 soup에 넣겠다
15
-ranking = 1
16
-for tag in soup.select('div[class=tit3]'):
17
-    url = tag.find('a').get('href')
18
-    print(url)
19
-    print("\n" + str(ranking) + '위 : ' + tag.text.strip())
20
-    reple(url)
21
-    ranking = ranking + 1

+ 0
- 11
src/kr/co/swh/lecture/mathorithm/naverMovieTitle.py View File

@@ -1,11 +0,0 @@
1
-import requests
2
-from bs4 import BeautifulSoup  # BeautifulSoup import
3
-
4
-response = requests.get('https://movie.naver.com/movie/sdb/rank/rmovie.nhn')
5
-html = response.text
6
-soup = BeautifulSoup(html, 'html.parser')  # html.parser를 사용해서 soup에 넣겠다
7
-ranking = 1
8
-for tag in soup.select('div[class=tit3]'):
9
-    url = tag.get('href')
10
-    print("\n" + str(ranking) + '위 : ' + tag.text.strip())
11
-    ranking = ranking + 1