mirror of
https://github.com/pds-nest/nest.git
synced 2024-11-22 04:54:18 +00:00
Merge remote-tracking branch 'origin/main' into main
# Conflicts: # nest_crawler/repo_search.py
This commit is contained in:
commit
7229761587
2 changed files with 79 additions and 35 deletions
52
nest_crawler/associate_condition_tweet.py
Normal file
52
nest_crawler/associate_condition_tweet.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from math import cos, radians
|
||||||
|
|
||||||
|
from nest_backend.database import *
|
||||||
|
|
||||||
|
|
||||||
|
def associate_condition_tweet(conditions_type, tweet):
|
||||||
|
if ConditionType.hashtag in conditions_type.keys():
|
||||||
|
for condition_content in conditions_type[ConditionType.hashtag]:
|
||||||
|
if condition_content.content in [hashtag['text'] for hashtag in tweet.entities['hashtags']]:
|
||||||
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
||||||
|
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
||||||
|
ext.session.add(condition_associated)
|
||||||
|
ext.session.commit()
|
||||||
|
if ConditionType.user in conditions_type.keys():
|
||||||
|
for condition_content in conditions_type[ConditionType.user]:
|
||||||
|
if condition_content.content == tweet.author.screen_name:
|
||||||
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
||||||
|
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
||||||
|
ext.session.add(condition_associated)
|
||||||
|
ext.session.commit()
|
||||||
|
if ConditionType.time in conditions_type.keys():
|
||||||
|
for condition_content in conditions_type[ConditionType.time]:
|
||||||
|
condition_date_time = datetime.fromisoformat(condition_content.content[2:])
|
||||||
|
if condition_content.content[0] == '<':
|
||||||
|
if tweet.created_at < condition_date_time:
|
||||||
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
||||||
|
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
||||||
|
ext.session.add(condition_associated)
|
||||||
|
ext.session.commit()
|
||||||
|
elif condition_content.content[0] == '>':
|
||||||
|
if tweet.created_at > condition_date_time:
|
||||||
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
||||||
|
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
||||||
|
ext.session.add(condition_associated)
|
||||||
|
ext.session.commit()
|
||||||
|
if ConditionType.coordinates in conditions_type.keys():
|
||||||
|
for condition_content in conditions_type[ConditionType.coordinates]:
|
||||||
|
coordinates = condition_content.content.split()
|
||||||
|
if tweet.geo is not None and is_coordinate_inside_bounding_box(float(coordinates[2]), float(coordinates[3]), float(coordinates[1])/1000, tweet.geo['coordinates'][0], tweet.geo['coordinates'][1]):
|
||||||
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
||||||
|
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
||||||
|
ext.session.add(condition_associated)
|
||||||
|
ext.session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def is_coordinate_inside_bounding_box(latitude, longitude, radius, tweet_latitude, tweet_longitude):
|
||||||
|
earth_radius_km = 6371
|
||||||
|
dLatitude = 360 * radius / earth_radius_km
|
||||||
|
dLongitude = dLatitude * cos(radians(latitude))
|
||||||
|
if (latitude - dLatitude < tweet_latitude < latitude+dLatitude) and (longitude-dLongitude < tweet_longitude < longitude+dLongitude):
|
||||||
|
return True
|
|
@ -1,23 +1,27 @@
|
||||||
from nest_backend.database import *
|
from nest_backend.database import *
|
||||||
from nest_crawler.authentication import authenticate
|
import authentication
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import tweepy as tw
|
import tweepy as tw
|
||||||
|
from associate_condition_tweet import associate_condition_tweet
|
||||||
|
|
||||||
def search_repo_conditions(repository_id):
|
def search_repo_conditions(repository_id):
|
||||||
api = authenticate()
|
|
||||||
geocode = "44.3591600,11.7132000,20km"
|
api = authentication.authenticate()
|
||||||
repo = Repository.query.filter_by(id=repository_id).first()
|
repo = Repository.query.filter_by(id=repository_id).first()
|
||||||
|
|
||||||
if repo is None:
|
if repo is None:
|
||||||
print("Non esiste una repository con questo id")
|
print("Non esiste una repository con questo id")
|
||||||
return False
|
return False
|
||||||
conditions = [use for use in repo.conditions]
|
conditions = [use for use in repo.conditions]
|
||||||
if len(conditions) == 0:
|
if len(conditions) == 0:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
print(f"Searching tweets from repo: {repo.name}")
|
||||||
evaluation_mode = repo.evaluation_mode
|
evaluation_mode = repo.evaluation_mode
|
||||||
conditions_type = dict()
|
conditions_type = dict()
|
||||||
|
|
||||||
# Dividing condition into condition types
|
# Dividing condition into condition types
|
||||||
for condition in conditions:
|
for condition in conditions:
|
||||||
# print(condition.id)
|
|
||||||
if condition.type not in conditions_type.keys():
|
if condition.type not in conditions_type.keys():
|
||||||
conditions_type[condition.type] = [condition]
|
conditions_type[condition.type] = [condition]
|
||||||
else:
|
else:
|
||||||
|
@ -32,9 +36,12 @@ def search_repo_conditions(repository_id):
|
||||||
for types in conditions_type.keys():
|
for types in conditions_type.keys():
|
||||||
print(types, ":", conditions_type[types])
|
print(types, ":", conditions_type[types])
|
||||||
coordinates_string = ""
|
coordinates_string = ""
|
||||||
|
# Adding to the query string the hashtag conditions
|
||||||
if ConditionType.hashtag in conditions_type.keys():
|
if ConditionType.hashtag in conditions_type.keys():
|
||||||
for condition_content in conditions_type[ConditionType.hashtag]:
|
for condition_content in conditions_type[ConditionType.hashtag]:
|
||||||
queryString += ("#" + condition_content.content + " " + queryConjunction + " ")
|
queryString += ("#" + condition_content.content + " " + queryConjunction + " ")
|
||||||
|
|
||||||
|
# Adding to the coordinates string the coordinates condition
|
||||||
if ConditionType.coordinates in conditions_type.keys():
|
if ConditionType.coordinates in conditions_type.keys():
|
||||||
if evaluation_mode == ConditionMode.all_and:
|
if evaluation_mode == ConditionMode.all_and:
|
||||||
if len(conditions_type[ConditionType.coordinates]) == 1:
|
if len(conditions_type[ConditionType.coordinates]) == 1:
|
||||||
|
@ -56,7 +63,6 @@ def search_repo_conditions(repository_id):
|
||||||
image_url_list = image_url_list[:-1]
|
image_url_list = image_url_list[:-1]
|
||||||
else:
|
else:
|
||||||
image_url_list = None
|
image_url_list = None
|
||||||
|
|
||||||
tweetDB = Tweet(snowflake=tweet.id, content=tweet.text,
|
tweetDB = Tweet(snowflake=tweet.id, content=tweet.text,
|
||||||
location=tweet.geo['coordinates'] if tweet.geo is not None else None,
|
location=tweet.geo['coordinates'] if tweet.geo is not None else None,
|
||||||
place=tweet.place.full_name if tweet.place is not None else None,
|
place=tweet.place.full_name if tweet.place is not None else None,
|
||||||
|
@ -72,17 +78,21 @@ def search_repo_conditions(repository_id):
|
||||||
composed = Composed(rid=repository_id, snowflake=tweet.id)
|
composed = Composed(rid=repository_id, snowflake=tweet.id)
|
||||||
ext.session.add(composed)
|
ext.session.add(composed)
|
||||||
ext.session.commit()
|
ext.session.commit()
|
||||||
|
# Adding to the query string the user condition
|
||||||
if ConditionType.user in conditions_type.keys():
|
if ConditionType.user in conditions_type.keys():
|
||||||
for condition_content in conditions_type[ConditionType.user]:
|
for condition_content in conditions_type[ConditionType.user]:
|
||||||
queryString += ("from:" + condition_content.content + " " + queryConjunction + " ")
|
queryString += ("from:" + condition_content.content + " " + queryConjunction + " ")
|
||||||
|
# Adding to the query string the time condition
|
||||||
if ConditionType.time in conditions_type.keys():
|
if ConditionType.time in conditions_type.keys():
|
||||||
for condition_content in conditions_type[ConditionType.time]:
|
for condition_content in conditions_type[ConditionType.time]:
|
||||||
if condition_content.content[0] == '<':
|
if condition_content.content[0] == '<':
|
||||||
queryString += ("until:" + condition_content.content[2:] + " " + queryConjunction + " ")
|
queryString += ("until:" + condition_content.content[2:] + " " + queryConjunction + " ")
|
||||||
elif condition_content.content[0] == '>':
|
elif condition_content.content[0] == '>':
|
||||||
queryString += ("since:" + condition_content.content[2:] + " " + queryConjunction + " ")
|
queryString += ("since:" + condition_content.content[2:] + " " + queryConjunction + " ")
|
||||||
|
# End of query string
|
||||||
queryString = queryString[:-len(queryConjunction) - 1]
|
queryString = queryString[:-len(queryConjunction) - 1]
|
||||||
print(queryString)
|
print(queryString)
|
||||||
|
|
||||||
if evaluation_mode == ConditionMode.all_or:
|
if evaluation_mode == ConditionMode.all_or:
|
||||||
if queryString != "":
|
if queryString != "":
|
||||||
for tweet in tw.Cursor(method=api.search, q=queryString).items(10):
|
for tweet in tw.Cursor(method=api.search, q=queryString).items(10):
|
||||||
|
@ -109,43 +119,25 @@ def search_repo_conditions(repository_id):
|
||||||
ext.session.add(tweetDB)
|
ext.session.add(tweetDB)
|
||||||
ext.session.commit()
|
ext.session.commit()
|
||||||
if evaluation_mode == ConditionMode.all_or:
|
if evaluation_mode == ConditionMode.all_or:
|
||||||
if ConditionType.hashtag in conditions_type.keys():
|
associate_condition_tweet(conditions_type, tweet)
|
||||||
for condition_content in conditions_type[ConditionType.hashtag]:
|
|
||||||
if condition_content.content in [hashtag['text'] for hashtag in tweet.entities['hashtags']]:
|
|
||||||
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
|
||||||
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
|
||||||
ext.session.add(condition_associated)
|
|
||||||
ext.session.commit()
|
|
||||||
if ConditionType.user in conditions_type.keys():
|
|
||||||
for condition_content in conditions_type[ConditionType.user]:
|
|
||||||
if condition_content.content == tweet.author.screen_name:
|
|
||||||
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
|
||||||
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
|
||||||
ext.session.add(condition_associated)
|
|
||||||
ext.session.commit()
|
|
||||||
|
|
||||||
if ConditionType.time in conditions_type.keys():
|
|
||||||
for condition_content in conditions_type[ConditionType.time]:
|
|
||||||
condition_date_time = datetime.fromisoformat(condition_content.content[2:])
|
|
||||||
if condition_content.content[0] == '<':
|
|
||||||
if tweet.created_at < condition_date_time:
|
|
||||||
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
|
||||||
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
|
||||||
ext.session.add(condition_associated)
|
|
||||||
ext.session.commit()
|
|
||||||
elif condition_content.content[0] == '>':
|
|
||||||
if tweet.created_at > condition_date_time:
|
|
||||||
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition_content.id).all():
|
|
||||||
condition_associated = Contains(cid=condition_content.id, snowflake=tweet.id)
|
|
||||||
ext.session.add(condition_associated)
|
|
||||||
ext.session.commit()
|
|
||||||
elif evaluation_mode == ConditionMode.all_and:
|
elif evaluation_mode == ConditionMode.all_and:
|
||||||
for condition in conditions:
|
for condition in conditions:
|
||||||
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition.id).all():
|
if not Contains.query.filter_by(snowflake=str(tweet.id), cid=condition.id).all():
|
||||||
condition_associated = Contains(cid=condition.id, snowflake=tweet.id)
|
condition_associated = Contains(cid=condition.id, snowflake=tweet.id)
|
||||||
ext.session.add(condition_associated)
|
ext.session.add(condition_associated)
|
||||||
ext.session.commit()
|
ext.session.commit()
|
||||||
|
alerts = [alert for alert in repo.alerts]
|
||||||
|
for alert in alerts:
|
||||||
|
alert_conditions = [condition.condition for condition in alert.conditions]
|
||||||
|
alert_conditions_type = dict()
|
||||||
|
for condition in alert_conditions:
|
||||||
|
if condition.type not in alert_conditions_type.keys():
|
||||||
|
alert_conditions_type[condition.type] = [condition]
|
||||||
|
else:
|
||||||
|
alert_conditions_type[condition.type].append(condition)
|
||||||
|
associate_condition_tweet(alert_conditions_type, tweet)
|
||||||
if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all():
|
if not Composed.query.filter_by(snowflake=str(tweet.id), rid=repository_id).all():
|
||||||
composed = Composed(rid=repository_id, snowflake=tweet.id)
|
composed = Composed(rid=repository_id, snowflake=tweet.id)
|
||||||
ext.session.add(composed)
|
ext.session.add(composed)
|
||||||
ext.session.commit()
|
ext.session.commit()
|
||||||
|
print(f"Done searching tweets from repo: {repo.name}")
|
||||||
|
|
Loading…
Reference in a new issue