Browse Source

java, python twitter 수집

tobby48 5 years ago
parent
commit
057a467b22

+ 15
- 0
pom.xml View File

@@ -220,6 +220,21 @@
220 220
 			<version>2.9.0</version>
221 221
 		</dependency>
222 222
 		
223
+		<!-- https://mvnrepository.com/artifact/org.twitter4j/twitter4j-core -->
224
+		<dependency>
225
+		    <groupId>org.twitter4j</groupId>
226
+		    <artifactId>twitter4j-core</artifactId>
227
+		    <version>4.0.7</version>
228
+		</dependency>
229
+		
230
+		<!-- https://mvnrepository.com/artifact/org.twitter4j/twitter4j-stream -->
231
+		<dependency>
232
+		    <groupId>org.twitter4j</groupId>
233
+		    <artifactId>twitter4j-stream</artifactId>
234
+		    <version>4.0.7</version>
235
+		</dependency>
236
+		
237
+		
223 238
 		<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
224 239
 		<dependency>
225 240
 		    <groupId>com.rabbitmq</groupId>

+ 96
- 0
src/main/java/kr/co/swh/lecture/opensource/twitter/TwitterBasic.java View File

@@ -0,0 +1,96 @@
1
+package kr.co.swh.lecture.opensource.twitter; 
2
+
3
+import java.util.ArrayList;
4
+import java.util.List;
5
+
6
+import twitter4j.Query;
7
+import twitter4j.QueryResult;
8
+import twitter4j.Status;
9
+import twitter4j.Twitter;
10
+import twitter4j.TwitterException;
11
+import twitter4j.TwitterFactory;
12
+import twitter4j.conf.ConfigurationBuilder;
13
+
14
+/**
15
+ * <pre>
16
+ * kr.co.swh.lecture.opensource.twitter 
17
+ * TwitterBasic.java
18
+ *
19
+ * 설명 :	https://github.com/Twitter4J/Twitter4J
20
+ * </pre>
21
+ * 
22
+ * @since : 2019. 12. 13.
23
+ * @author : tobby48
24
+ * @version : v1.0
25
+ */
26
+public class TwitterBasic {
27
+
28
+	private Twitter getInstance() {
29
+		//	twitter 계정이 있다면 앱을 만들어서 등록
30
+		//	그렇지 않을 시에는 데이터베이스에 있는 정보 등록
31
+		String consumerKey = "xx";
32
+        String consumerSecret = "xx";
33
+        String acessToken = "xx";
34
+        String acessTokenSecret = "xx";
35
+
36
+        ConfigurationBuilder configurationBuilder = new ConfigurationBuilder();
37
+        configurationBuilder.setDebugEnabled(true)
38
+                .setOAuthConsumerKey(consumerKey)
39
+                .setOAuthConsumerSecret(consumerSecret)
40
+                .setOAuthAccessToken(acessToken)
41
+                .setOAuthAccessTokenSecret(acessTokenSecret);
42
+        TwitterFactory tf = new TwitterFactory(configurationBuilder.build());
43
+        Twitter twitter = tf.getInstance();
44
+        return twitter;
45
+	}
46
+	
47
+	public List<Status> getTwitterContents(Query query, int totalCount){
48
+		List<Status> tweetList = null;
49
+	    if(totalCount < 100){
50
+	        query.setCount(totalCount);
51
+	        tweetList = this.getTweetBlock(query);
52
+	    }else{
53
+	    	//	Quiz.
54
+	    	// 	따로 만들어야 함. 트위터는 한번에 100개씩 제한
55
+	    }
56
+		return tweetList;
57
+	}
58
+	
59
+	private List<Status> getTweetBlock(Query query){
60
+        if(query.getMaxId() != 0){
61
+            query.setMaxId(query.getMaxId());
62
+        }
63
+        List<Status> result = null;
64
+        QueryResult queryResult = null;
65
+        try{
66
+        	Twitter twitter = getInstance();
67
+            queryResult = twitter.search(query);
68
+        }catch (TwitterException e){
69
+            e.printStackTrace();
70
+        }
71
+
72
+        if(queryResult != null){
73
+            result = new ArrayList<>();
74
+            for (Status status : queryResult.getTweets()) {
75
+                result.add(status);
76
+            }
77
+        }
78
+        return result;
79
+    }
80
+
81
+	public static void main(String[] args) {
82
+		// TODO Auto-generated method stub
83
+		TwitterBasic twitter = new TwitterBasic();
84
+        Query query = new Query();
85
+        query.setLang("ko");
86
+        query.setQuery("손흥민");
87
+        query.setSince("2019-09-28");
88
+        
89
+        List<Status> result = twitter.getTwitterContents(query, 90);
90
+        for(Status s : result) {
91
+        	System.out.println(s);
92
+        	//	VO 클래스로 객체형태로 변환하여 다양하게 활용
93
+        }
94
+	}
95
+
96
+}

+ 84
- 0
src/main/java/kr/co/swh/lecture/opensource/twitter/TwitterStreaming.java View File

@@ -0,0 +1,84 @@
1
+package kr.co.swh.lecture.opensource.twitter; 
2
+
3
+import twitter4j.StallWarning;
4
+import twitter4j.Status;
5
+import twitter4j.StatusDeletionNotice;
6
+import twitter4j.StatusListener;
7
+import twitter4j.TwitterStream;
8
+import twitter4j.TwitterStreamFactory;
9
+import twitter4j.conf.ConfigurationBuilder;
10
+
11
+/**
12
+ * <pre>
13
+ * kr.co.swh.lecture.opensource.twitter 
14
+ * TwitterStreaming.java
15
+ *
16
+ * 설명 : https://github.com/Twitter4J/Twitter4J
17
+ * </pre>
18
+ * 
19
+ * @since : 2019. 12. 13.
20
+ * @author : tobby48
21
+ * @version : v1.0
22
+ */
23
+public class TwitterStreaming {
24
+
25
+	private TwitterStream getInstance() {
26
+		//	twitter 계정이 있다면 앱을 만들어서 등록
27
+		//	그렇지 않을 시에는 데이터베이스에 있는 정보 등록
28
+		String consumerKey = "xx";
29
+        String consumerSecret = "xx";
30
+        String acessToken = "xx";
31
+        String acessTokenSecret = "xx";
32
+
33
+        ConfigurationBuilder configurationBuilder = new ConfigurationBuilder();
34
+        configurationBuilder.setDebugEnabled(true)
35
+                .setOAuthConsumerKey(consumerKey)
36
+                .setOAuthConsumerSecret(consumerSecret)
37
+                .setOAuthAccessToken(acessToken)
38
+                .setOAuthAccessTokenSecret(acessTokenSecret);
39
+        TwitterStreamFactory tf = new TwitterStreamFactory(configurationBuilder.build());
40
+        TwitterStream twitterStream = tf.getInstance();
41
+        return twitterStream;
42
+	}
43
+	
44
+	public static void main(String[] args) {
45
+		// TODO Auto-generated method stub
46
+		TwitterStreaming twitter = new TwitterStreaming();
47
+		StatusListener listener = new StatusListener(){
48
+			@Override
49
+            public void onStatus(Status status) {
50
+                System.out.println("@" + status.getUser().getScreenName() + " - " + status.getText());
51
+            }
52
+
53
+            @Override
54
+            public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
55
+                System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
56
+            }
57
+
58
+            @Override
59
+            public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
60
+                System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
61
+            }
62
+
63
+            @Override
64
+            public void onScrubGeo(long userId, long upToStatusId) {
65
+                System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
66
+            }
67
+
68
+            @Override
69
+            public void onStallWarning(StallWarning warning) {
70
+                System.out.println("Got stall warning:" + warning);
71
+            }
72
+
73
+            @Override
74
+            public void onException(Exception ex) {
75
+                ex.printStackTrace();
76
+            }
77
+	    };
78
+	    TwitterStream twitterStream = twitter.getInstance();
79
+	    twitterStream.addListener(listener);
80
+	    // sample() method internally creates a thread which manipulates TwitterStream and calls these adequate listener methods continuously.
81
+	    twitterStream.sample("ko");
82
+	}
83
+
84
+}

+ 1
- 0
src/main/python/kr/co/swh/lecture/opensource/twitter/sample.json
File diff suppressed because it is too large
View File


+ 25
- 0
src/main/python/kr/co/swh/lecture/opensource/twitter/twitter_crawler_basic.py View File

@@ -0,0 +1,25 @@
1
+# https://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/comment-page-1/
2
+import tweepy
3
+from tweepy import OAuthHandler
4
+import json
5
+
6
+# twitter 계정이 있다면 앱을 만들어서 등록
7
+# 그렇지 않을 시에는 데이터베이스에 있는 정보 등록
8
+consumer_key = 'xx'
9
+consumer_secret = 'xx'
10
+access_token = 'xx'
11
+access_secret = 'xx'
12
+
13
+auth = OAuthHandler(consumer_key, consumer_secret)
14
+auth.set_access_token(access_token, access_secret)
15
+api = tweepy.API(auth)
16
+
17
+def toJSON(tweet):
18
+    print(json.dumps(tweet, ensure_ascii=False))
19
+
20
+# 팔로잉 친구 목록
21
+for friend in tweepy.Cursor(api.friends).items():
22
+    toJSON(friend._json)
23
+# 트윗 및 답글 목록
24
+for tweet in tweepy.Cursor(api.user_timeline).items():
25
+    toJSON(tweet._json)

+ 45
- 0
src/main/python/kr/co/swh/lecture/opensource/twitter/twitter_crawler_streaming.py View File

@@ -0,0 +1,45 @@
1
+# https://imasoftwareengineer.tistory.com/97?category=791547
2
+# conda install -c conda-forge tweepy
3
+import tweepy
4
+from tweepy.streaming import StreamListener
5
+from tweepy import OAuthHandler
6
+from tweepy import Stream
7
+import json
8
+
9
+# twitter 계정이 있다면 앱을 만들어서 등록
10
+# 그렇지 않을 시에는 데이터베이스에 있는 정보 등록
11
+consumer_key = 'xx'
12
+consumer_secret = 'xx'
13
+access_token = 'xx'
14
+access_secret = 'xx'
15
+
16
+# 리스너 - 스트리밍 API로 부터 값이 들어오면 아래의 리스너가 실행된다.
17
+class Listener(StreamListener):
18
+    def on_data(self, data):
19
+        try:
20
+            with open('sample.json', 'a', encoding='utf-8') as f:
21
+                json_data = json.loads(data)    # str -> json으로 변환
22
+                json.dump(json_data, f, ensure_ascii=False) # json을 원형 그대로 파일에 저장(인코딩 깨짐을 방지)
23
+                print(json_data)
24
+                return True
25
+        except BaseException as e:
26
+            print("Error on_data: %s" % str(e))
27
+        return True
28
+
29
+    def on_error(self, status):
30
+        print("ERROR: " + str(status))
31
+
32
+def main():
33
+    # 리스너 생성
34
+    l = Listener()
35
+    auth = OAuthHandler(consumer_key, consumer_secret)
36
+    auth.set_access_token(access_token, access_secret)
37
+    stream = Stream(auth, l) # 리스너와 인증키을 통해 스트리밍 객체 생성
38
+
39
+    # 언어=한국어(ko), 해시태그=필터할 단어
40
+    stream.filter(languages=["ko"], track=['손흥민'])
41
+
42
+if __name__ == '__main__':
43
+    main()
44
+
45
+main()

+ 6
- 0
src/main/python/kr/co/swh/lecture/opensource/twitter/twitter_json_load.py View File

@@ -0,0 +1,6 @@
1
+import json
2
+ 
3
+with open('sample.json', 'r', encoding='utf-8') as f:
4
+    line = f.readline() # read only the first tweet/line
5
+    tweet = json.loads(line) # load it as Python dict
6
+    print(tweet)