From 452484c4bf31fe530aae1342635c3ab7d5c1a519 Mon Sep 17 00:00:00 2001 From: "g.minoccari" Date: Tue, 18 May 2021 15:16:47 +0200 Subject: [PATCH] Aggiunto collegamento tra i tweet scaricati e le condizioni che nello specifico sonon collegate ad essi --- nest_crawler/__main__.py | 87 +++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/nest_crawler/__main__.py b/nest_crawler/__main__.py index 6323651..a802683 100644 --- a/nest_crawler/__main__.py +++ b/nest_crawler/__main__.py @@ -34,9 +34,9 @@ def search_repo_conditions(repository_id): for condition in conditions: # print(condition.id) if condition.type not in conditions_type.keys(): - conditions_type[condition.type] = [condition.content] + conditions_type[condition.type] = [condition] else: - conditions_type[condition.type].append(condition.content) + conditions_type[condition.type].append(condition) queryString = "" @@ -49,7 +49,7 @@ def search_repo_conditions(repository_id): coordinates_string = "" if ConditionType.hashtag in conditions_type.keys(): for condition_content in conditions_type[ConditionType.hashtag]: - queryString += ("#" + condition_content + " " + queryConjunction + " ") + queryString += ("#" + condition_content.content + " " + queryConjunction + " ") if ConditionType.coordinates in conditions_type.keys(): if evaluation_mode == ConditionMode.all_and: if len(conditions_type[ConditionType.coordinates]) == 1: @@ -59,42 +59,93 @@ def search_repo_conditions(repository_id): return None elif evaluation_mode == ConditionMode.all_or: for condition_content in conditions_type[ConditionType.coordinates]: - coordinates_tweet = condition_content.split() + coordinates_tweet = condition_content.content.split() coordinates_string = coordinates_tweet[2] + "," + coordinates_tweet[3] + "," + str(float(coordinates_tweet[1])/1000) + "km" print(coordinates_string) - for tweet in tw.Cursor(method=api.search, geocode=coordinates_string).items(10): - tweetsFound.append(tweet) + for tweet in tw.Cursor(method=api.search, q="", geocode=coordinates_string).items(10): + if not Tweet.query.filter_by(snowflake=str(tweet.id)).all(): + tweetDB = Tweet(snowflake=tweet.id, content=tweet.text, + location=tweet.geo['coordinates'] if tweet.geo is not None else None, + place=tweet.place.full_name if tweet.place is not None else None, + insert_time=str(datetime.now()), + poster=tweet.author.screen_name) + ext.session.add(tweetDB) + ext.session.commit() + if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all(): + condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() + if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all(): + composed = Composed(rid=repository_id, snowflake=tweet.id) + ext.session.add(composed) + ext.session.commit() if ConditionType.user in conditions_type.keys(): for condition_content in conditions_type[ConditionType.user]: - queryString += ("from:" + condition_content + " " + queryConjunction + " ") + queryString += ("from:" + condition_content.content + " " + queryConjunction + " ") if ConditionType.time in conditions_type.keys(): for condition_content in conditions_type[ConditionType.time]: - if condition_content[0] == '<': - queryString += ("until:" + condition_content + " " + queryConjunction + " ") - elif condition_content[0] == '>': - queryString += ("since:" + condition_content + " " + queryConjunction + " ") + if condition_content.content[0] == '<': + queryString += ("until:" + condition_content.content[2:] + " " + queryConjunction + " ") + elif condition_content.content[0] == '>': + queryString += ("since:" + condition_content.content[2:] + " " + queryConjunction + " ") queryString = queryString[:-len(queryConjunction) - 1] print(queryString) if evaluation_mode == ConditionMode.all_or: - for tweet in tw.Cursor(method=api.search, q=queryString).items(10): - tweetsFound.append(tweet) - print(tweet.user.name + ' : ' + tweet.text) + if queryString != "": + for tweet in tw.Cursor(method=api.search, q=queryString).items(10): + tweetsFound.append(tweet) + print(tweet.user.name + ' : ' + tweet.text + ' : ' + tweet.geo) elif evaluation_mode == ConditionMode.all_and: for tweet in tw.Cursor(method=api.search, q=queryString, geocode=coordinates_string).items(10): tweetsFound.append(tweet) - print(tweet.user.name + ' : ' + tweet.text) + print(tweet.user.name + ' : ' + tweet.text + ' : ' + str(tweet.geo)) for tweet in tweetsFound: if not Tweet.query.filter_by(snowflake=str(tweet.id)).all(): tweetDB = Tweet(snowflake=tweet.id, content=tweet.text, - location=tweet.geo.coordinate.coordinates if tweet.geo is not None else "", - insert_time=str(datetime.now())) + location=tweet.geo['coordinates'] if tweet.geo is not None else None, + place=tweet.place.full_name if tweet.place is not None else None, + insert_time=str(datetime.now()), + poster=tweet.author.screen_name) ext.session.add(tweetDB) + ext.session.commit() + if evaluation_mode == ConditionMode.all_or: + if ConditionType.hashtag in conditions_type.keys(): + for condition_content in conditions_type[ConditionType.hashtag]: + if condition_content.content in [hashtag['text'] for hashtag in tweet.entities['hashtags']]: + condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() + if ConditionType.user in conditions_type.keys(): + for condition_content in conditions_type[ConditionType.user]: + if condition_content.content == tweet.author.screen_name: + condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() + + if ConditionType.time in conditions_type.keys(): + for condition_content in conditions_type[ConditionType.time]: + condition_date_time = datetime.fromisoformat(condition_content.content[2:]) + if condition_content.content[0] == '<': + if tweet.created_at < condition_date_time: + condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() + elif condition_content.content[0] == '>': + if tweet.created_at > condition_date_time: + condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() + elif evaluation_mode == ConditionMode.all_and: + for condition in conditions: + if Contains.query.filter_by(snowflake=str(tweet.id), cid=condition.id).all(): + condition_associated = Contains(cid=condition.id, snowflake=tweet.id) + ext.session.add(condition_associated) + ext.session.commit() if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all(): composed = Composed(rid=repository_id, snowflake=tweet.id) ext.session.add(composed) ext.session.commit() - if __name__ == "__main__": search_repo_conditions(16) with app.app_context():