1
Fork 0
mirror of https://github.com/pds-nest/nest.git synced 2024-11-22 04:54:18 +00:00

Refactor della funzione per associare i tweet alle condizioni che ne hanno scaturito il download

This commit is contained in:
g.minoccari 2021-05-28 11:48:58 +02:00
parent 027964825a
commit f9430470cb
2 changed files with 72 additions and 33 deletions

View file

@ -0,0 +1,52 @@
from datetime import datetime
from math import cos, radians
from nest_backend.database import *
def associate_condition_tweet(conditions_type, tweet):
if ConditionType.hashtag in conditions_type.keys():
for condition_content in conditions_type[ConditionType.hashtag]:
if condition_content.content in [hashtag['text'] for hashtag in tweet.entities['hashtags']]:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
if ConditionType.user in conditions_type.keys():
for condition_content in conditions_type[ConditionType.user]:
if condition_content.content == tweet.author.screen_name:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
if ConditionType.time in conditions_type.keys():
for condition_content in conditions_type[ConditionType.time]:
condition_date_time = datetime.fromisoformat(condition_content.content[2:])
if condition_content.content[0] == '<':
if tweet.created_at < condition_date_time:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
elif condition_content.content[0] == '>':
if tweet.created_at > condition_date_time:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
if ConditionType.coordinates in conditions_type.keys():
for condition_content in conditions_type[ConditionType.coordinates]:
coordinates = condition_content.content.split()
if tweet.geo is not None and is_coordinate_inside_bounding_box(float(coordinates[2]), float(coordinates[3]), float(coordinates[1])/1000, tweet.geo['coordinates'][0], tweet.geo['coordinates'][1]):
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
def is_coordinate_inside_bounding_box(latitude, longitude, radius, tweet_latitude, tweet_longitude):
earth_radius_km = 6371
dLatitude = 360 * radius / earth_radius_km
dLongitude = dLatitude * cos(radians(latitude))
if (latitude - dLatitude < tweet_latitude < latitude+dLatitude) and (longitude-dLongitude < tweet_longitude < longitude+dLongitude):
return True

View file

@ -2,10 +2,10 @@ from nest_backend.database import *
from authentication import authenticate from authentication import authenticate
from datetime import datetime, timedelta from datetime import datetime, timedelta
import tweepy as tw import tweepy as tw
from associate_condition_tweet import associate_condition_tweet
def search_repo_conditions(repository_id): def search_repo_conditions(repository_id):
api = authenticate() api = authenticate()
geocode = "44.3591600,11.7132000,20km"
repo = Repository.query.filter_by(id=repository_id).first() repo = Repository.query.filter_by(id=repository_id).first()
if repo is None: if repo is None:
print("Non esiste una repository con questo id") print("Non esiste una repository con questo id")
@ -15,9 +15,9 @@ def search_repo_conditions(repository_id):
return False return False
evaluation_mode = repo.evaluation_mode evaluation_mode = repo.evaluation_mode
conditions_type = dict() conditions_type = dict()
# Dividing condition into condition types # Dividing condition into condition types
for condition in conditions: for condition in conditions:
# print(condition.id)
if condition.type not in conditions_type.keys(): if condition.type not in conditions_type.keys():
conditions_type[condition.type] = [condition] conditions_type[condition.type] = [condition]
else: else:
@ -32,9 +32,12 @@ def search_repo_conditions(repository_id):
for types in conditions_type.keys(): for types in conditions_type.keys():
print(types, ":", conditions_type[types]) print(types, ":", conditions_type[types])
coordinates_string = "" coordinates_string = ""
# Adding to the query string the hashtag conditions
if ConditionType.hashtag in conditions_type.keys(): if ConditionType.hashtag in conditions_type.keys():
for condition_content in conditions_type[ConditionType.hashtag]: for condition_content in conditions_type[ConditionType.hashtag]:
queryString += ("#" + condition_content.content + " " + queryConjunction + " ") queryString += ("#" + condition_content.content + " " + queryConjunction + " ")
# Adding to the coordinates string the coordinates condition
if ConditionType.coordinates in conditions_type.keys(): if ConditionType.coordinates in conditions_type.keys():
if evaluation_mode == ConditionMode.all_and: if evaluation_mode == ConditionMode.all_and:
if len(conditions_type[ConditionType.coordinates]) == 1: if len(conditions_type[ConditionType.coordinates]) == 1:
@ -56,7 +59,6 @@ def search_repo_conditions(repository_id):
image_url_list = image_url_list[:-1] image_url_list = image_url_list[:-1]
else: else:
image_url_list = None image_url_list = None
tweetDB = Tweet(snowflake=tweet.id, content=tweet.text, tweetDB = Tweet(snowflake=tweet.id, content=tweet.text,
location=tweet.geo['coordinates'] if tweet.geo is not None else None, location=tweet.geo['coordinates'] if tweet.geo is not None else None,
place=tweet.place.full_name if tweet.place is not None else None, place=tweet.place.full_name if tweet.place is not None else None,
@ -72,17 +74,21 @@ def search_repo_conditions(repository_id):
composed = Composed(rid=repository_id, snowflake=tweet.id) composed = Composed(rid=repository_id, snowflake=tweet.id)
ext.session.add(composed) ext.session.add(composed)
ext.session.commit() ext.session.commit()
# Adding to the query string the user condition
if ConditionType.user in conditions_type.keys(): if ConditionType.user in conditions_type.keys():
for condition_content in conditions_type[ConditionType.user]: for condition_content in conditions_type[ConditionType.user]:
queryString += ("from:" + condition_content.content + " " + queryConjunction + " ") queryString += ("from:" + condition_content.content + " " + queryConjunction + " ")
# Adding to the query string the time condition
if ConditionType.time in conditions_type.keys(): if ConditionType.time in conditions_type.keys():
for condition_content in conditions_type[ConditionType.time]: for condition_content in conditions_type[ConditionType.time]:
if condition_content.content[0] == '<': if condition_content.content[0] == '<':
queryString += ("until:" + condition_content.content[2:] + " " + queryConjunction + " ") queryString += ("until:" + condition_content.content[2:] + " " + queryConjunction + " ")
elif condition_content.content[0] == '>': elif condition_content.content[0] == '>':
queryString += ("since:" + condition_content.content[2:] + " " + queryConjunction + " ") queryString += ("since:" + condition_content.content[2:] + " " + queryConjunction + " ")
# End of query string
queryString = queryString[:-len(queryConjunction) - 1] queryString = queryString[:-len(queryConjunction) - 1]
print(queryString) print(queryString)
if evaluation_mode == ConditionMode.all_or: if evaluation_mode == ConditionMode.all_or:
if queryString != "": if queryString != "":
for tweet in tw.Cursor(method=api.search, q=queryString).items(10): for tweet in tw.Cursor(method=api.search, q=queryString).items(10):
@ -109,42 +115,23 @@ def search_repo_conditions(repository_id):
ext.session.add(tweetDB) ext.session.add(tweetDB)
ext.session.commit() ext.session.commit()
if evaluation_mode == ConditionMode.all_or: if evaluation_mode == ConditionMode.all_or:
if ConditionType.hashtag in conditions_type.keys(): associate_condition_tweet(conditions_type, tweet)
for condition_content in conditions_type[ConditionType.hashtag]:
if condition_content.content in [hashtag['text'] for hashtag in tweet.entities['hashtags']]:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
if ConditionType.user in conditions_type.keys():
for condition_content in conditions_type[ConditionType.user]:
if condition_content.content == tweet.author.screen_name:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
if ConditionType.time in conditions_type.keys():
for condition_content in conditions_type[ConditionType.time]:
condition_date_time = datetime.fromisoformat(condition_content.content[2:])
if condition_content.content[0] == '<':
if tweet.created_at < condition_date_time:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
elif condition_content.content[0] == '>':
if tweet.created_at > condition_date_time:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
ext.session.add(condition_associated)
ext.session.commit()
elif evaluation_mode == ConditionMode.all_and: elif evaluation_mode == ConditionMode.all_and:
for condition in conditions: for condition in conditions:
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition.id).all(): if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition.id).all():
condition_associated = Contains(cid=condition.id, snowflake=tweet.id) condition_associated = Contains(cid=condition.id, snowflake=tweet.id)
ext.session.add(condition_associated) ext.session.add(condition_associated)
ext.session.commit() ext.session.commit()
alerts = [alert for alert in repo.alerts]
for alert in alerts:
alert_conditions = [condition.condition for condition in alert.conditions]
alert_conditions_type = dict()
for condition in alert_conditions:
if condition.type not in alert_conditions_type.keys():
alert_conditions_type[condition.type] = [condition]
else:
alert_conditions_type[condition.type].append(condition)
associate_condition_tweet(alert_conditions_type, tweet)
if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all(): if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all():
composed = Composed(rid=repository_id, snowflake=tweet.id) composed = Composed(rid=repository_id, snowflake=tweet.id)
ext.session.add(composed) ext.session.add(composed)