tobby48 5 years ago
parent
commit
99273f2e18

+ 0
- 9
src/main/python/kr/co/swh/lecture/opensource/konlpy/konlpy-ex2.py View File

@@ -1,9 +0,0 @@
1
-from konlpy.tag import Kkma
2
-kkma = Kkma()
3
-
4
-def tokenize(doc):
5
-    # norm은 정규화, stem은 근어로 표시하기를 나타냄
6
-    return ['/'.join(t) for t in kkma.pos(doc)]
7
-
8
-tweet = 'RT @marcobonzanini: just an example! :D http://example.com #NLP'
9
-print(tokenize(tweet))

+ 13
- 0
src/main/python/kr/co/swh/lecture/opensource/konlpy/konlpy-hannanum.py View File

@@ -0,0 +1,13 @@
1
+# import os
2
+# os.environ['_JAVA_OPTIONS'] = '-Xmx1024M'
3
+# from konlpy import init_jvm
4
+# init_jvm("<JAVA_HOME>")
5
+
6
+from konlpy.tag import Hannanum
7
+hannanum = Hannanum()
8
+noun = hannanum.nouns('안녕하세요 SWH코딩학원입니다.')
9
+print(noun)
10
+pos = hannanum.pos('안녕하세요 SWH코딩학원입니다.')
11
+print(pos)
12
+morph = hannanum.morphs('안녕하세요 SWH코딩학원입니다.')
13
+print(morph)

src/main/python/kr/co/swh/lecture/opensource/konlpy/konlpy-ex1.py → src/main/python/kr/co/swh/lecture/opensource/konlpy/konlpy-kkma.py View File


+ 8
- 0
src/main/python/kr/co/swh/lecture/opensource/konlpy/konlpy-quiz1.py View File

@@ -0,0 +1,8 @@
1
+from konlpy.tag import Kkma
2
+kkma = Kkma()
3
+
4
+def tokenize(doc):
5
+    return ['/'.join(t) for t in kkma.pos(doc)]
6
+
7
+tweet = '초등학생부터 대학생까지 인공지능 교육이 필수화된다. 전 국민의 AI 기본소양을 함양한다는 취지다.'
8
+print(tokenize(tweet))

+ 0
- 1
src/main/python/kr/co/swh/lecture/opensource/konlpy/user_dic.txt View File

@@ -1 +0,0 @@
1
-SWH코딩학원	NNP

+ 18
- 0
src/main/python/kr/co/swh/lecture/opensource/naver/blog.py View File

@@ -0,0 +1,18 @@
1
+import os
2
+import sys
3
+import urllib.request
4
+client_id = "OJgN42xxZiJXpnZtCH1j"
5
+client_secret = "crXggqJhhW"
6
+encText = urllib.parse.quote("코딩")
7
+url = "https://openapi.naver.com/v1/search/blog?query=" + encText # json 결과
8
+# url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # xml 결과
9
+request = urllib.request.Request(url)
10
+request.add_header("X-Naver-Client-Id",client_id)
11
+request.add_header("X-Naver-Client-Secret",client_secret)
12
+response = urllib.request.urlopen(request)
13
+rescode = response.getcode()
14
+if(rescode==200):
15
+    response_body = response.read()
16
+    print(response_body.decode('utf-8'))
17
+else:
18
+    print("Error Code:" + rescode)

+ 42
- 0
src/main/python/kr/co/swh/lecture/opensource/project/naver-blog-nlp-ranking.py View File

@@ -0,0 +1,42 @@
1
+from konlpy.tag import Kkma
2
+import os
3
+import sys
4
+import urllib.request
5
+import json
6
+import re
7
+
8
+def naver_blog_search(client_id, client_secret, text):
9
+    encText = urllib.parse.quote(text)
10
+    url = "https://openapi.naver.com/v1/search/blog?query=" + encText # json 결과
11
+    # url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # xml 결과
12
+    request = urllib.request.Request(url)
13
+    request.add_header("X-Naver-Client-Id",client_id)
14
+    request.add_header("X-Naver-Client-Secret",client_secret)
15
+    response = urllib.request.urlopen(request)
16
+    rescode = response.getcode()
17
+    if(rescode==200):
18
+        response_body = response.read()
19
+        return response_body.decode('utf-8')
20
+    else:
21
+        print("Error Code:" + rescode)
22
+    
23
+def striphtml(data):
24
+    p = re.compile(r'<.*?>')
25
+    return p.sub('', data)
26
+
27
+result = []
28
+kkma = Kkma()
29
+naver_result = naver_blog_search('OJgN42xxZiJXpnZtCH1j', 'crXggqJhhW', '코딩')
30
+y = json.loads(naver_result)
31
+for b in y['items']:
32
+    for word, pos in kkma.pos(striphtml(b['description'])):
33
+        if pos == 'NNG' or pos == 'NNP':
34
+            result.append(word)
35
+            
36
+words = set(result)
37
+loofWords = list(words)
38
+countList = []
39
+for b in loofWords:
40
+    if result.count(b) > 1:
41
+        countList.append([b, result.count(b)])
42
+print(countList)