Wallnots Twitter-bot finder delte artikler fra Politiken og Zetland på Twitter og deler dem med verden. Det fungerer sådan her:
# Author: Morten Helmstedt. E-mail: helmstedt@gmail.com import requests from bs4 import BeautifulSoup from datetime import datetime from datetime import date from datetime import timedelta import json import time import random from TwitterAPI import TwitterAPI from nested_lookup import nested_lookup # CONFIGURATION # # List to store articles to post to Twitter articlestopost = [] # Search tweets from last 3 hours now = datetime.utcnow() since_hours = 3 since = now - timedelta(hours=since_hours) since_string = since.strftime("%Y-%m-%dT%H:%M:%SZ") # Search configuration # https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent # https://github.com/twitterdev/Twitter-API-v2-sample-code/tree/master/Recent-Search tweet_fields = "tweet.fields=entities" media_fields = "media.fields=url" max_results = "max_results=100" start_time = "start_time=" + since_string # Twitter API login client_key = '' client_secret = '' access_token = '' access_secret = '' api = TwitterAPI(client_key, client_secret, access_token, access_secret) bearer_token = '' # POLITIKEN # # Run search query = 'politiken.dk/del' url = "https://api.twitter.com/2/tweets/search/recent?query={}&{}&{}&{}&{}".format( query, tweet_fields, media_fields, max_results, start_time ) headers = {"Authorization": "Bearer {}".format(bearer_token)} response = requests.request("GET", url, headers=headers) json_response = response.json() urllist = list(set(nested_lookup('expanded_url', json_response))) # Only proces urls that were not in our last Twitter query proceslist = [] with open("./pol_lastbatch.json", "r", encoding="utf8") as fin: lastbatch = list(json.load(fin)) for url in urllist: if url not in lastbatch and query in url: proceslist.append(url) # Save current query to use for next time with open("./pol_lastbatch.json", "wt", encoding="utf8") as fout: lastbatch = json.dumps(urllist) fout.write(lastbatch) # Request articles and get titles and dates and sort by dates articlelist = [] pol_therewasanerror = False for url in proceslist: try: if 'https://www.google.com' in url: start = url.find('url=')+4 end = url.find('&', start) url = url[start:end] if not len(url) == 37: url = url[:37] data = requests.get(url) result = data.text if '"isAccessibleForFree": "True"' not in result: realurl = data.history[0].headers['Location'] if not "/article" in realurl and not ".ece" in realurl: start_of_unique_id = realurl.index("/art")+1 end_of_unique_id = realurl[start_of_unique_id:].index("/") unique_id = realurl[start_of_unique_id:start_of_unique_id+end_of_unique_id] elif "/article" in realurl and ".ece" in realurl: start_of_unique_id = realurl.index("/article")+1 end_of_unique_id = realurl[start_of_unique_id:].index(".ece") unique_id = realurl[start_of_unique_id:start_of_unique_id+end_of_unique_id] articlelist.append({"id": unique_id, "url": url}) except Exception as e: print(url) print(e) pol_therewasanerror = True #If something fails, we'll process everything again next time if pol_therewasanerror == True: with open("./pol_lastbatch.json", "wt", encoding="utf8") as fout: urllist = [] lastbatch = json.dumps(urllist) fout.write(lastbatch) # Check if article is already posted and update list of posted articles with open("./pol_published_v2.json", "r", encoding="utf8") as fin: alreadypublished = list(json.load(fin)) # File below used for paywall.py to update wallnot.dk for article in articlelist: hasbeenpublished = False for published_article in alreadypublished: if article['id'] == published_article['id']: hasbeenpublished = True break if hasbeenpublished == False: alreadypublished.append(article) articlestopost.append(article) # Save updated already published links with open("./pol_published_v2.json", "wt", encoding="utf8") as fout: alreadypublishedjson = json.dumps(alreadypublished) fout.write(alreadypublishedjson) # ZETLAND # # Run search query = 'zetland.dk/historie' url = "https://api.twitter.com/2/tweets/search/recent?query={}&{}&{}&{}&{}".format( query, tweet_fields, media_fields, max_results, start_time ) headers = {"Authorization": "Bearer {}".format(bearer_token)} response = requests.request("GET", url, headers=headers) json_response = response.json() urllist = list(set(nested_lookup('expanded_url', json_response))) # Only proces urls that were not in our last Twitter query proceslist = [] with open("./zet_lastbatch.json", "r", encoding="utf8") as fin: lastbatch = list(json.load(fin)) for url in urllist: if url not in lastbatch and query in url: proceslist.append(url) # Save current query to use for next time with open("./zet_lastbatch.json", "wt", encoding="utf8") as fout: lastbatch = json.dumps(urllist) fout.write(lastbatch) # Request articles and get titles and dates and sort by dates articlelist = [] titlecheck = [] zet_therewasanerror = False for url in proceslist: try: if 'https://www.google.com' in url: start = url.find('url=')+4 end = url.find('&', start) url = url[start:end] data = requests.get(url) result = data.text soup = BeautifulSoup(result, "lxml") title = soup.find('meta', attrs={'property':'og:title'}) title = title['content'] timestamp = soup.find('meta', attrs={'property':'article:published_time'}) timestamp = timestamp['content'] timestamp = timestamp[:timestamp.find("+")] dateofarticle = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f') if title not in titlecheck: articlelist.append({"title": title, "url": url, "date": dateofarticle}) titlecheck.append(title) except Exception as e: print(url) print(e) zet_therewasanerror = True #If something fails, we'll process everything again next time if zet_therewasanerror == True: with open("./zet_lastbatch.json", "wt", encoding="utf8") as fout: urllist = [] lastbatch = json.dumps(urllist) fout.write(lastbatch) articlelist_sorted = sorted(articlelist, key=lambda k: k['date']) # Check if article is already posted and update list of posted articles with open("./zet_published.json", "r", encoding="utf8") as fin: alreadypublished = list(json.load(fin)) for art in articlelist_sorted: title = art['title'] if title not in alreadypublished: alreadypublished.append(title) articlestopost.append(art) # Save updated already published links with open("./zet_published.json", "wt", encoding="utf8") as fout: alreadypublishedjson = json.dumps(alreadypublished, ensure_ascii=False) fout.write(alreadypublishedjson) # POST TO TWITTER AND FACEBOOK# friendlyterms = ["flink","rar","gavmild","velinformeret","intelligent","sød","afholdt","bedårende","betagende","folkekær","godhjertet","henrivende","smagfuld","tækkelig","hjertensgod","graciøs","galant","tiltalende","prægtig","kær","godartet","human","indtagende","fortryllende","nydelig","venlig","udsøgt","klog","kompetent","dygtig","ejegod","afholdt","omsorgsfuld","elskværdig","prægtig","skattet","feteret"] enjoyterms = ["God fornøjelse!", "Nyd den!", "Enjoy!", "God læsning!", "Interessant!", "Spændende!", "Vidunderligt!", "Fantastisk!", "Velsignet!", "Glæd dig!", "Læs den!", "Godt arbejde!", "Wauv!"] if articlestopost: for art in articlestopost: if "zetland" in art['url']: medium = "@ZetlandMagasin" else: medium = "@politiken" friendlyterm = random.choice(friendlyterms) enjoyterm = random.choice(enjoyterms) status = "En " + friendlyterm + " abonnent på " + medium + " har delt en artikel. " + enjoyterm twitterstatus = status + " " + art['url'] try: twitterupdate = api.request('statuses/update', {'status': twitterstatus}) except Exception as e: print(e) time.sleep(15)