RSS-feeds med Django

En Wallnot-bruger spurgte mig om ikke Wallnot burde have et RSS-feed? Jo da.

Det viste sig at den slags er indbygget i Django og meget nemt at lave.

Jeg oprettede feeds.py med to forskellige feeds. Et for alle artikler fra Wallnot, et for artikler for bestemte medier:

from django.contrib.syndication.views import Feed
from wall.models import Article
from django.urls import reverse

class RssFeed(Feed):
	title = "Nyeste artikler fra wallnot.dk"
	link = "/rss/"
	description = "De allernyeste artikler uden paywall fra wallnot.dk"
	def items(self):
		return Article.objects.filter(paywall_detected=False).order_by('-date')[:20]

	def item_title(self, item):
		return item.title

	def item_description(self, item):
		return "Artikel fra " + item.get_medium_display()

	def item_link(self, item):
		return item.url

class RssFeedMedium(Feed):
	title = "Nyeste artikler fra wallnot.dk"
	description = "De allernyeste artikler uden paywall fra wallnot.dk"
	def get_object(self, request, medium):
		return medium
	
	def link(self, obj):
		return "/rss/" + obj
	
	def items(self, obj):
		return Article.objects.filter(paywall_detected=False, medium=obj).order_by('-date')[:20]

	def item_title(self, item):
		return item.title

	def item_description(self, item):
		return "Artikel fra " + item.get_medium_display()

	def item_link(self, item):
		return item.url

Og pegede på dem fra urls.py:

from django.urls import path
from . import views
from . import feeds

urlpatterns = [
	path('', views.index, name='index'),
	path('links', views.linkindex, name='linkindex'),
	path('privatliv', views.privacy, name='privacy'),
	path('om', views.about, name='wabout'),
	path('rss', feeds.RssFeed(), name='rssfeed'),
	path('rss/<str:medium>', feeds.RssFeedMedium(), name='rssfeed_medium'),
]

Og vupti!

Du finder RSS-feeds på wallnot.dk lige ved siden af Twitter-logoet.

ETF’er og fonde med aktiebeskatning 2021

For et par år siden blev det muligt at købe og tjene/tabe penge på aktiebaserede ETF’er og udenlandske investeringsfonde som aktieindkomst og ikke længere som kapitalindkomst.

Det eneste problem er/var, er at det velmenende regneark, der viser aktiebaserede investeringsselskaber, som er godkendt til den lavere beskatningskat.dk, er en lille smule svært at bruge, når man gerne vil sammenligne værdipapirerne og finde ud af, hvor de kan købes.

Derfor har jeg lavet https://wallnot.dk/stocks.

Her kan du læse om, hvordan jeg gjorde.

  1. Jeg downloaded excelarket fra skat.dk
  2. Jeg tilføjede nogle kolonner og gemte som CSV-fil
  3. Jeg brugte Python til at hente data og links til værdipapirer hos Saxo Bank, Nordnet og Morningstar
  4. Jeg oprettede en app i Django og definerede en datamodel tilsvarende excelarket
  5. Jeg importerede data til Django
  6. Jeg byggede visningen

Nogle timers arbejde for mig. Forhåbentlig nogle sparede timer for dig.

Download af excelark

https://skat.dk/getfile.aspx?id=145013&type=xlsx

Tilføje nogle kolonner og gemme som CSV-fil

Lidt upædagogisk, men hvad:

Registreringsland/Skattemæssigt hjemsted;ISIN-kode;Navn;LEI kode;ASIDENT;CVR/SE/TIN;Venligt navn;Første registreringsår;Morningstar_id;Saxo_id;Nordnet_url;Nordnet_id;Nordnet_ÅOP;Nordnet_udbyttepolitik;Nordnet_prospekt;Saxo_url;Morningstar_prospekt;Morningstar_url;Morningstar_ÅOP

Hente data og links til værdipapirer

Ret sjusket Python-program. Men fungerer OK:

import csv
import requests
import re
import json
from bs4 import BeautifulSoup

def nordnet_cookies():
	# Nordnet user account credentials
	user = ''
	password = ''

	# A cookie dictionary for storing cookies
	cookies = {}
	
	# First part of cookie setting prior to login
	url = 'https://classic.nordnet.dk/mux/login/start.html?cmpi=start-loggain&state=signin'
	request = requests.get(url)
	cookies['LOL'] = request.cookies['LOL']
	cookies['TUX-COOKIE'] = request.cookies['TUX-COOKIE']

	# Second part of cookie setting prior to login
	url = 'https://classic.nordnet.dk/api/2/login/anonymous'
	request = requests.post(url, cookies=cookies)
	cookies['NOW'] = request.cookies['NOW']

	# Actual login that gets us cookies required for later use
	url = "https://classic.nordnet.dk/api/2/authentication/basic/login"
	request = requests.post(url,cookies=cookies, data = {'username': user, 'password': password})
	cookies['NOW'] = request.cookies['NOW']
	cookies['xsrf'] = request.cookies['xsrf']

	# Getting a NEXT cookie
	url = "https://classic.nordnet.dk/oauth2/authorize?client_id=NEXT&response_type=code&redirect_uri=https://www.nordnet.dk/oauth2/"
	request = requests.get(url, cookies=cookies)
	cookies['NEXT'] = request.history[1].cookies['NEXT']

	return cookies

def saxo_headers():
	# Saxo user account credentials
	user = ''
	password = ''

	# Visit login page and get AuthnRequest token value from input form
	url = 'https://www.saxoinvestor.dk/Login/da/'
	request = requests.get(url)
	soup = BeautifulSoup(request.text, "html.parser")
	input = soup.find_all('input', {"id":"AuthnRequest"})
	authnrequest = input[0]["value"]

	# Login step 1: Submit username, password and token and get another token back
	url = 'https://www.saxoinvestor.dk/Login/da/'
	request = requests.post(url, data = {'field_userid': user, 'field_password': password, 'AuthnRequest': authnrequest})
	soup = BeautifulSoup(request.text, "html.parser")
	input = soup.find_all('input', {"name":"SAMLResponse"})
	# Most of the time this works
	if input:
		samlresponse = input[0]["value"]
	# But sometimes there's a disclaimer that Saxo Bank would like you to accept
	else:
		input = soup.find_all('input')
		inputs = {}
		try:
			for i in input:
				inputs[i['name']] = i['value']
		except:
			pass
		url = 'https://www.saxotrader.com/disclaimer'
		request = requests.post(url, data=inputs)
		cook = request.cookies['DisclaimerApp']
		returnurl = cook[cook.find("ReturnUrl")+10:cook.find("&IsClientStation")]
		url = 'https://live.logonvalidation.net/complete-app-consent/' + returnurl[returnurl.find("complete-app-consent/")+21:]
		request = requests.get(url)
		soup = BeautifulSoup(request.text, "html.parser")
		input = soup.find_all('input', {"name":"SAMLResponse"})
		samlresponse = input[0]["value"]	

	# Login step 2: Get bearer token necessary for API requests
	url = 'https://www.saxoinvestor.dk/investor/login.sso.ashx'
	r = requests.post(url, data = {'SAMLResponse': samlresponse})

	bearer = r.history[0].headers['Location']
	bearer = bearer[bearer.find("BEARER"):bearer.find("/exp/")]
	bearer = bearer.replace("%20"," ")

	# START API CALLS
	# Documentation at https://www.developer.saxo/openapi/learn

	# Set bearer token as header
	headers = {'Authorization': bearer}

	return headers
	

nordnet_cookies = nordnet_cookies()
saxo_headers = saxo_headers()

filename = 'Copy of ABIS liste 2021 - opdateret den 11-01-2021.csv'
output_file = 'stocks.csv'

get_nordnet = True
get_saxo = True
get_morningstar = True


with open(output_file, 'w', newline='') as output_csv:
	paperwriter = csv.writer(output_csv, delimiter=';', quotechar ='"', quoting = csv.QUOTE_MINIMAL)

	with open(filename) as csvfile:
		paperreader = csv.reader(csvfile, delimiter=';')
		for row in paperreader:
			if row[1] != '0' and row[1] != 'ISIN-kode' and row[1] != '':
				isin = row[1]
				if get_morningstar == True:
					morningstar = requests.get('https://www.morningstar.dk/dk/util/SecuritySearch.ashx?q=' + isin)
					morningstar_text = morningstar.text
					if morningstar_text:
						first_hit = morningstar_text[morningstar_text.index("{"):morningstar_text.index("}")+1]
						first_hit_json = json.loads(first_hit)
						morningstar_id = first_hit_json['i']
						morningstar_url = 'https://www.morningstar.dk/dk/funds/snapshot/snapshot.aspx?id=' + morningstar_id
						morningstar_info = requests.get(morningstar_url)
						
						soup = BeautifulSoup(morningstar_info.text, "lxml")
						try:
							aop = soup.find(text=re.compile('Løbende omkostning'))
							aop_value = aop.parent.next.next.next.next.next.next.next.string
							if aop_value:
								cleaned_aop = aop_value.replace(",",".").replace("%","")
							else:
								cleaned_aop = ''
						except:
							cleaned_aop = ''
						
						morningstar_documents = requests.get('https://www.morningstar.dk/dk/funds/snapshot/snapshot.aspx?id=' + morningstar_id + '&tab=12')
						document_soup = BeautifulSoup(morningstar_documents.text, "lxml")
						try:
							prospect = document_soup.find(text=re.compile('CI'))
							prospect_link = prospect.parent.next.next.next.next.next.next.next.next.a['href']
							document_id = prospect_link[prospect_link.index("Id=")+3:prospect_link.rfind("&")]
							document_url = 'https://doc.morningstar.com/document/' + document_id + '.msdoc'
						except:
							try:
								prospect = document_soup.find(text=re.compile('Prospekt'))
								prospect_link = prospect.parent.next.next.next.next.next.next.next.next.a['href']
								document_id = prospect_link[prospect_link.index("Id=")+3:prospect_link.rfind("&")]
								document_url = 'https://doc.morningstar.com/document/' + document_id + '.msdoc'
							except:
								document_url = ''
						
						row[8] = morningstar_id
						row[16] = document_url
						row[17] = morningstar_url
						row[18] = cleaned_aop

				if get_saxo == True:
					saxo = requests.get('https://www.saxotrader.com/openapi/ref/v1/instruments/?$top=201&$skip=0&includeNonTradable=true&AssetTypes=Stock,Bond,MutualFund,Etf,Etc,Etn,Fund,Rights,CompanyWarrant,StockIndex&keywords=' + isin + '&OrderBy=', headers=saxo_headers)
					try:
						saxo_json = saxo.json()
						if saxo_json and saxo.status_code == 200:
							try:
								data = saxo_json['Data']
								if data:
									identifier = data[0]['Identifier']
									assettype = data[0]['AssetType']
									saxo_url = 'https://www.saxotrader.com/d/trading/product-overview?assetType=' + assettype + '&uic=' + str(identifier)
									row[9] = identifier
									row[15] = saxo_url
							except Exception as e:
								print(e)
								breakpoint()
					except:
						pass
				if get_nordnet == True:
					nordnet = requests.get('https://www.nordnet.dk/api/2/main_search?query=' + isin + '&search_space=ALL&limit=60', cookies=nordnet_cookies)
					nordnet_json = nordnet.json()
					if nordnet_json and nordnet.status_code == 200:
						try:
							display_types = [hit['display_group_type'] for hit in nordnet_json]
						except:
							breakpoint()
						good_hit = "wait"
						try:
							good_hit = display_types.index('ETF')
							base_url = 'https://www.nordnet.dk/markedet/etf-lister/'
						except:
							try:
								good_hit = display_types.index('PINV')
								base_url = 'https://www.nordnet.dk/markedet/investeringsforeninger-liste/'
							except:
								try:
									good_hit = display_types.index('FUND')
									base_url = 'https://www.nordnet.dk/markedet/fondslister/'
								except:
									try:
										bad_hit = display_types.index('NEWS')
									except:
										try:
											good_hit = display_types.index('EQUITY')
											base_url = 'https://www.nordnet.dk/markedet/aktiekurser/'
										except:
											breakpoint()
						if good_hit != 'wait':
							results = nordnet_json[good_hit]['results']
							instrument_id = results[0]['instrument_id']
							display_name = results[0]['display_name']

							space_counter = 0
							paper_url = ''
							for letter in display_name:
								if letter == " ":
									space_counter += 1
									if space_counter > 2:
										break
									letter = '-'
									paper_url += letter
								else:
									letter = letter.lower()
									paper_url += letter
							full_url = base_url + str(instrument_id) + '-' + paper_url
							if "&" in full_url:
								full_url = full_url.replace("&","")
							
							check_full_url = requests.get(full_url)
							
							soup = BeautifulSoup(check_full_url.text, "lxml")
							try:
								policy = soup.find('span', text=re.compile('Udbyttepolitik'))
								policy_value = policy.next.next.string
							except:
								policy_value = "Ukendt"
							try:
								prospectus = soup.find('span', text=re.compile('Faktaark'))
								prospectus_value = prospectus.next.next.a['href']
								cleaned_prospectus = prospectus_value[:prospectus_value.rfind("?")].replace('http','https')
							except:
								cleaned_prospectus = "Ukendt"
							try:
								aop = soup.find('span', text=re.compile('Årlig omkostning'))
								aop_value = aop.next.next.get_text()
								cleaned_aop = aop_value.replace(",",".").replace("%","")
							except:
								cleaned_aop = "Ukendt"							
							
							row[10] = check_full_url.url
							row[11] = instrument_id
							row[12] = cleaned_aop
							row[13] = policy_value
							row[14] = cleaned_prospectus
			print(row)
			paperwriter.writerow(row)

Datamodel i Django

Her er models.py:

from django.db import models

class Stock(models.Model):
	country = models.CharField('Registreringsland', max_length=2)
	isin = models.CharField('ISIN-kode', max_length=20, blank=True)
	name = models.CharField('Navn', max_length=200, blank=True)
	lei = models.CharField('LEI-kode', max_length=20, blank=True)
	asident = models.CharField('ASIDENT', max_length=20, blank=True)
	cvr = models.CharField('CVR/SE/TIN', max_length=20, blank=True)
	friendly_name = models.CharField('Venligt navn', max_length=200, blank=True)
	first_registration_year = models.CharField('Første registreringsår', max_length=4, blank=True)
	morningstar_id = models.CharField('Morningstar: Id', max_length=20, blank=True)
	saxo_id = models.CharField('Saxo Bank: Id', max_length=20, blank=True)
	nordnet_id = models.CharField('Nordnet: Id', max_length=20, blank=True)
	morningstar_url = models.URLField('Morningstar: Url', max_length=200, blank=True)
	saxo_url = models.URLField('Saxo Bank: Url', max_length=200, blank=True)
	nordnet_url = models.URLField('Nordnet: Url', max_length=200, blank=True)
	morningstar_aop = models.FloatField('Morningstar: Løbende omkostninger', null=True, blank=True)
	nordnet_aop = models.FloatField('Nordnet: Løbende omkostninger', null=True, blank=True)
	nordnet_dividend = models.CharField('Nordnet: Udbyttepolitik', max_length=20, blank=True)
	nordnet_prospect = models.URLField('Nordnet: Investorinformation', max_length=200, blank=True)
	morningstar_prospect = models.URLField('Morningstar: Investorinformation', max_length=200, blank=True)

Importere data til Django

Her brugte jeg Django’s databasehåndtering i stedet for selv at skrive SQL-sætninger:

import csv
with open('stocks.csv', newline='\n') as csvfile:
	reader = csv.DictReader(csvfile, delimiter=";")
	count = 0
	for row in reader:
		stock = Stock(country = row['Registreringsland/Skattemæssigt hjemsted'])
		if row['ISIN-kode']:
			stock.isin = row['ISIN-kode']
		if row['Navn']:
			stock.name = row['Navn']
		if row['LEI kode']:	
			stock.lei = row['LEI kode']
		if row['ASIDENT']:	
			stock.asident = row['ASIDENT']
		if row['CVR/SE/TIN']:	
			stock.cvr = row['CVR/SE/TIN']
		if row['Venligt navn']:	
			stock.friendly_name = row['Venligt navn']
		if row['Første registreringsår']:	
			stock.first_registration_year = row['Første registreringsår']
		if row['Morningstar_id']:	
			stock.morningstar_id = row['Morningstar_id']
		if row['Saxo_id']:	
			stock.saxo_id = row['Saxo_id']
		if row['Nordnet_id']:	
			stock.nordnet_id = row['Nordnet_id']
		if row['Morningstar_url']:	
			stock.morningstar_url = row['Morningstar_url']
		if row['Saxo_url']:	
			stock.saxo_url = row['Saxo_url']
		if row['Nordnet_url']:	
			stock.nordnet_url = row['Nordnet_url']
		if row['Morningstar_ÅOP']:	
			stock.morningstar_aop = row['Morningstar_ÅOP']
		if row['Nordnet_ÅOP'] and row['Nordnet_ÅOP'] != '-' and row['Nordnet_ÅOP'] != 'Ukendt':	
			stock.nordnet_aop = row['Nordnet_ÅOP']
		if row['Nordnet_udbyttepolitik']:	
			stock.nordnet_dividend = row['Nordnet_udbyttepolitik']
		if row['Nordnet_prospekt']:	
			stock.nordnet_prospect = row['Nordnet_prospekt']
		if row['Morningstar_prospekt']:	
			stock.morningstar_prospect = row['Morningstar_prospekt']

		stock.save()
		count += 1
		print(count)

Bygge visningen

Her er views.py:

from django.shortcuts import render
from .models import Stock

def index(request):
	#FILTER LOGIC
	if request.GET.get('filter'):
		filter = request.GET.get('filter')
		if filter == 'nordnetsaxo':
			stocks = Stock.objects.exclude(nordnet_url='') | Stock.objects.exclude(saxo_url='')
		elif filter == 'nordnet':
			stocks = Stock.objects.exclude(nordnet_url='')
		elif filter == 'saxo':
			stocks = Stock.objects.exclude(saxo_url='')
		elif filter == 'ikkenordnetsaxo':
			stocks = Stock.objects.filter(nordnet_url='').filter(saxo_url='')
		elif filter == 'alle':
			stocks = Stock.objects.all()
	else:
		stocks = Stock.objects.exclude(nordnet_url='') | Stock.objects.exclude(saxo_url='')
	
	#SORT LOGIC
	sort = request.GET.get('sort')
	print(sort)
	if sort == "name" or not sort:
		stocks = stocks.order_by('name')
	elif sort == "-name":
		stocks = stocks.order_by('-name')
	elif sort == "isin":
		stocks = stocks.order_by('isin')
	elif sort == "-isin":
		stocks = stocks.order_by('-isin')
	elif sort == "morningstar_aop":
		stocks = stocks.order_by('morningstar_aop')
	elif sort == "-morningstar_aop":
		stocks = stocks.order_by('-morningstar_aop')
	elif sort == "nordnet_aop":
		stocks = stocks.order_by('nordnet_aop')
	elif sort == "-nordnet_aop":
		stocks = stocks.order_by('-nordnet_aop')
		
	context = {'stocks': stocks}
	return render(request, 'stocks/index.html', context)

Og her er så skabelonen index.html:

{% extends "stocks/base.html" %}
{% load static %}
{% block title %}ETF'er og fonde med aktiebeskatning 2021{% endblock %}
{% block content %}{% spaceless %}

<h1>ETF'er og fonde med aktiebeskatning 2021</h1>

<p>Du har læst om, <a href="https://www.nordnet.dk/blog/nye-regler-for-beskatning-af-investeringsfonde/">at aktiebaserede ETF'er og udenlandske investeringsfonde fra 2020 beskattes som aktieindkomst og ikke længere som kapitalindkomst</a>.</p>

<p>Du har endda fundet <a href="https://skat.dk/getfile.aspx?id=145013&type=xlsx">det fine regneark, der viser aktiebaserede investeringsselskaber</a> på <a href="https://skat.dk/skat.aspx?oid=2244641">skat.dk</a>.</p>

<p>Men det er godt nok svært for dig at få overblik over, hvilke af papirerne du overhovedet kan købe som almindelig hobby-/cryptoinvestor, og at sammenligne omkostninger, ÅOP og hvad det ellers hedder, for at finde det rigtige køb.</p>

<p>Her er et forsøg på at løse dit (og mit) problem. Data kommer fra <a href="https://skat.dk/getfile.aspx?id=145013&type=xlsx">det fine regneark</a> og har samme fejl og mangler, men er suppleret med nyttige informationer og links.</p>

<p><a href="#forbehold">Du kan læse om forbehold nederst på siden</a> og du kan <a href="https://helmstedt.dk/2021/03/etfer-og-fonde-med-aktiebeskatning-2021/">læse om hvordan siden er lavet på min blog</a>.</p>

<p><strong>Vis til salg hos:</strong>
<form id="prefs">

	<input type="radio" id="nordnetsaxo" name="filter" value="nordnetsaxo"{% if request.GET.filter == "nordnetsaxo" or not request.GET.filter %} checked{% endif %}>
	<label title="Værdipapirer til salg hos Nordnet, Saxo Bank eller begge steder" for="nordnetsaxo">Nordnet og/eller Saxo Bank</label>
	<input type="radio" id="nordnet" name="filter" value="nordnet"{% if request.GET.filter == "nordnet" %} checked{% endif %}>
	<label title="Værdipapirer til salg hos Nordnet" for="nordnet">Nordnet</label>		
	<input type="radio" id="saxo" name="filter" value="saxo"{% if request.GET.filter == "saxo" %} checked{% endif %}>
	<label title="Værdipapirer til salg hos Saxo Bank" for="saxo">Saxo Bank</label>
	<input type="radio" id="ikkenordnetsaxo" name="filter" value="ikkenordnetsaxo"{% if request.GET.filter == "ikkenordnetsaxo" %} checked{% endif %}>
	<label title="Værdipapirer, der hverken er til salg hos Nordnet eller Saxo Bank" for="ikkenordnetsaxo">Ikke Nordnet og/eller Saxo</label>
	<input type="radio" id="alle" name="filter" value="alle"{% if request.GET.filter == "alle" %} checked{% endif %}>
	<label title="Alle værdipapirer, både dem der kan købes hos Nordnet/Saxo Bank og de, der ikke kan" for="alle">Hele pivtøjet</label>
</form>
</p>

<table>
	<tr>
		<th><a href="{% url 'stocks_index' %}?sort={% if request.GET.sort == "-name" %}name{% else %}-name{% endif %}">Navn</a></th>
		<th><a href="{% url 'stocks_index' %}?sort={% if request.GET.sort == "isin" %}-isin{% else %}isin{% endif %}">Isin</a></th>
		<th><a href="{% url 'stocks_index' %}?sort={% if request.GET.sort == "morningstar_aop" %}-morningstar_aop{% else %}morningstar_aop{% endif %}">Løbende omkostninger</a></th>
		<th><a href="{% url 'stocks_index' %}?sort={% if request.GET.sort == "nordnet_aop" %}-nordnet_aop{% else %}nordnet_aop{% endif %}">ÅOP</a></th>
		<th>Investorinformation</th>
		<th>Morningstar</th>
		<th>Nordnet</th>
		<th>Saxo</th>
	</tr>
	{% for stock in stocks %}
	<tr>
		<td>{{ stock.name }}</td>
		<td>{{ stock.isin }}</td>
		<td>{% if stock.morningstar_aop %}{{ stock.morningstar_aop }}%{% endif %}</td>
		<td>{% if stock.nordnet_aop %}{{ stock.nordnet_aop }}%{% endif %}</td>
		<td>{% if stock.nordnet_prospect %}<a href="{{ stock.nordnet_prospect }}">Info</a>{% elif stock.morningstar_prospect %}<a href="{{ stock.morningstar_prospect }}">Info</a>{% endif %}</td>
		<td>{% if stock.morningstar_url %}<a href="{{ stock.morningstar_url }}">Link</a>{% endif %}</td>
		<td>{% if stock.nordnet_url %}<a href="{{ stock.nordnet_url }}">Link</a>{% endif %}</td>
		<td>{% if stock.saxo_url %}<a href="{{ stock.saxo_url }}">Link</a>{% endif %}</td>
	</tr>
	{% endfor %}
</table>

<a name="forbehold"></a>
<h2>Forbehold</h2>
<p>Alt hvad du læser på denne side er løgn og fiktion fra ende til anden og har ingen relation til virkeligheden. Hvis du kunne finde på at læse indholdet, som om det omhandlede værdipapirer, eller at købe, sælge eller tage dig af din personlige hygiejne med værdipapirer på grund af indholdet på denne side, er det fuldstændig et hundrede procent på eget ansvar. Alt hvad der findes på siden er fejlbehæftet, forældet og lavet af en uduelig amatør uden forstand på noget som helst. Du skal regne med, at alle links fører til nogle andre værdipapirer, end man skulle tro, og at de værdipapirer som står til salg et sted sikkert ikke sælges der - og omvendt. Alle oplysninger om løbende omkostninger og ÅOP er fundet ved hjælp af hønebingo og dermed så godt som tilfældige.</p>
{% endspaceless %}{% endblock %}

Wallnots Twitterbot, version 3

Wallnots Twitter-bot finder delte artikler fra Politiken og Zetland på Twitter og deler dem med verden. Det fungerer sådan her:

# Author: Morten Helmstedt. E-mail: helmstedt@gmail.com

import requests
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import date
from datetime import timedelta
import json
import time
import random
from TwitterAPI import TwitterAPI
from nested_lookup import nested_lookup

# CONFIGURATION #
# List to store articles to post to Twitter
articlestopost = []

# Search tweets from last 3 hours
now = datetime.utcnow()
since_hours = 3
since = now - timedelta(hours=since_hours)
since_string = since.strftime("%Y-%m-%dT%H:%M:%SZ")

# Search configuration
# https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent
# https://github.com/twitterdev/Twitter-API-v2-sample-code/tree/master/Recent-Search
tweet_fields = "tweet.fields=entities"
media_fields = "media.fields=url"
max_results = "max_results=100"
start_time = "start_time=" + since_string

# Twitter API login
client_key = ''
client_secret = ''
access_token = ''
access_secret = ''
api = TwitterAPI(client_key, client_secret, access_token, access_secret)

bearer_token = ''

# POLITIKEN #
# Run search
query = 'politiken.dk/del'

url = "https://api.twitter.com/2/tweets/search/recent?query={}&{}&{}&{}&{}".format(
	query, tweet_fields, media_fields, max_results, start_time
)
headers = {"Authorization": "Bearer {}".format(bearer_token)}
response = requests.request("GET", url, headers=headers)
json_response = response.json()

urllist = list(set(nested_lookup('expanded_url', json_response)))

# Only proces urls that were not in our last Twitter query
proceslist = []
with open("./pol_lastbatch.json", "r", encoding="utf8") as fin:
	lastbatch = list(json.load(fin))
	for url in urllist:
		if url not in lastbatch and query in url:
			proceslist.append(url)
# Save current query to use for next time
with open("./pol_lastbatch.json", "wt", encoding="utf8") as fout:
	lastbatch = json.dumps(urllist)
	fout.write(lastbatch)

# Request articles and get titles and dates and sort by dates
articlelist = []

pol_therewasanerror = False
for url in proceslist:
	try:
		if 'https://www.google.com' in url:
			start = url.find('url=')+4
			end = url.find('&', start)
			url = url[start:end]	
		if not len(url) == 37:
			url = url[:37]
		data = requests.get(url)
		result = data.text
		if '"isAccessibleForFree": "True"' not in result:
			realurl = data.history[0].headers['Location']
			if not "/article" in realurl and not ".ece" in realurl:
				start_of_unique_id = realurl.index("/art")+1
				end_of_unique_id = realurl[start_of_unique_id:].index("/")
				unique_id = realurl[start_of_unique_id:start_of_unique_id+end_of_unique_id]
			elif "/article"	in realurl and ".ece" in realurl:
				start_of_unique_id = realurl.index("/article")+1
				end_of_unique_id = realurl[start_of_unique_id:].index(".ece")
				unique_id = realurl[start_of_unique_id:start_of_unique_id+end_of_unique_id]
			articlelist.append({"id": unique_id, "url": url})
	except Exception as e:
		print(url)
		print(e)
		pol_therewasanerror = True

#If something fails, we'll process everything again next time			
if pol_therewasanerror == True:
	with open("./pol_lastbatch.json", "wt", encoding="utf8") as fout:
		urllist = []
		lastbatch = json.dumps(urllist)
		fout.write(lastbatch)
	
# Check if article is already posted and update list of posted articles
with open("./pol_published_v2.json", "r", encoding="utf8") as fin:
	alreadypublished = list(json.load(fin))
	# File below used for paywall.py to update wallnot.dk
	for article in articlelist:
		hasbeenpublished = False
		for published_article in alreadypublished:
			if article['id'] == published_article['id']:
				hasbeenpublished = True
				break
		if hasbeenpublished == False:
			alreadypublished.append(article)
			articlestopost.append(article)
	# Save updated already published links
	with open("./pol_published_v2.json", "wt", encoding="utf8") as fout:
		alreadypublishedjson = json.dumps(alreadypublished)
		fout.write(alreadypublishedjson)

# ZETLAND #
# Run search
query = 'zetland.dk/historie'

url = "https://api.twitter.com/2/tweets/search/recent?query={}&{}&{}&{}&{}".format(
	query, tweet_fields, media_fields, max_results, start_time
)
headers = {"Authorization": "Bearer {}".format(bearer_token)}
response = requests.request("GET", url, headers=headers)
json_response = response.json()

urllist = list(set(nested_lookup('expanded_url', json_response)))

# Only proces urls that were not in our last Twitter query
proceslist = []
with open("./zet_lastbatch.json", "r", encoding="utf8") as fin:
	lastbatch = list(json.load(fin))
	for url in urllist:
		if url not in lastbatch and query in url:
			proceslist.append(url)
# Save current query to use for next time
with open("./zet_lastbatch.json", "wt", encoding="utf8") as fout:
	lastbatch = json.dumps(urllist)
	fout.write(lastbatch)

# Request articles and get titles and dates and sort by dates
articlelist = []
titlecheck = []

zet_therewasanerror = False
for url in proceslist:
	try:
		if 'https://www.google.com' in url:
			start = url.find('url=')+4
			end = url.find('&', start)
			url = url[start:end]		
		data = requests.get(url)
		result = data.text
		soup = BeautifulSoup(result, "lxml")
		title = soup.find('meta', attrs={'property':'og:title'})
		title = title['content']
		timestamp = soup.find('meta', attrs={'property':'article:published_time'})
		timestamp = timestamp['content']
		timestamp = timestamp[:timestamp.find("+")]
		dateofarticle = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f')
		if title not in titlecheck:
			articlelist.append({"title": title, "url": url, "date": dateofarticle})
			titlecheck.append(title)
	except Exception as e:
		print(url)
		print(e)
		zet_therewasanerror = True

#If something fails, we'll process everything again next time
if zet_therewasanerror == True:
	with open("./zet_lastbatch.json", "wt", encoding="utf8") as fout:
		urllist = []
		lastbatch = json.dumps(urllist)
		fout.write(lastbatch)


			
articlelist_sorted = sorted(articlelist, key=lambda k: k['date']) 

# Check if article is already posted and update list of posted articles
with open("./zet_published.json", "r", encoding="utf8") as fin:
	alreadypublished = list(json.load(fin))
	for art in articlelist_sorted:
		title = art['title']
		if title not in alreadypublished:
			alreadypublished.append(title)
			articlestopost.append(art)
	# Save updated already published links
	with open("./zet_published.json", "wt", encoding="utf8") as fout:
		alreadypublishedjson = json.dumps(alreadypublished, ensure_ascii=False)
		fout.write(alreadypublishedjson)


# POST TO TWITTER AND FACEBOOK#
friendlyterms = ["flink","rar","gavmild","velinformeret","intelligent","sød","afholdt","bedårende","betagende","folkekær","godhjertet","henrivende","smagfuld","tækkelig","hjertensgod","graciøs","galant","tiltalende","prægtig","kær","godartet","human","indtagende","fortryllende","nydelig","venlig","udsøgt","klog","kompetent","dygtig","ejegod","afholdt","omsorgsfuld","elskværdig","prægtig","skattet","feteret"]
enjoyterms = ["God fornøjelse!", "Nyd den!", "Enjoy!", "God læsning!", "Interessant!", "Spændende!", "Vidunderligt!", "Fantastisk!", "Velsignet!", "Glæd dig!", "Læs den!", "Godt arbejde!", "Wauv!"]

if articlestopost:
	for art in articlestopost:
		if "zetland" in art['url']:
			medium = "@ZetlandMagasin"
		else:
			medium = "@politiken"
		friendlyterm = random.choice(friendlyterms)
		enjoyterm = random.choice(enjoyterms)
		status = "En " + friendlyterm + " abonnent på " + medium + " har delt en artikel. " + enjoyterm
		twitterstatus = status + " " + art['url']
		try:
			twitterupdate = api.request('statuses/update', {'status': twitterstatus})
		except Exception as e:
			print(e)
		time.sleep(15)

En crawler til mappe-visninger på nettet

Hvis du har været på internettet, er du sikkert en gang stødt på sådan ét her:

Mange webadministratorer vælger at skjule disse oversigter over filer på en webserver, som webserversoftwaren Apache kan generere automatisk.

Men jeg opdagede ved et tilfælde, at jeg kunne se, hvad fotoagenturet Magnum havde lagt op i deres WordPress-installation.

Jeg besluttede at forsøge at lave en lokal kopi, så jeg kunne kigge på flotte fotografier uden at skulle vente på downloads fra internettet.

Først forsøgte jeg med Wget, som er et lille program, der er designet til at dublere websteder lokalt. Men Wget havde problemer med at hente og tygge sig igennem de lange lister med filer. En af dem fyldte fx 36 megabytes. Det er altså rigtig mange links.

Derfor lavede jeg et lille Python-program, der kan tygge sig igennem denne type mappe- og filoversigter og downloade dem lokalt.

Her er det:

# apache-directory-downloader.py
# Author: Morten Helmstedt. E-mail: helmstedt@gmail.com
'''A program to fetch files from standard apache directory listings on the internet.
See https://duckduckgo.com/?t=ffab&q=apache%2Bdirectory%2Blisting&ia=images&iax=images
for examples of what this is.'''

import requests					# Send http requests and receive responses
from bs4 import BeautifulSoup	# Parse HTML data structures, e.g. to search for links
import os						# Used to create directories at local destination
import shutil					# Used to copy binary files from http response to local destination
import re						# Regex parser and search functions

# Terms to exclude, files with these strings in them are not downloaded
exclude = [
	"-medium",
	"-overlay",
	"-teaser-",
	"-overlay",
	"-thumbnail",
	"-collaboration",
	"-scaled",
	"-photographer-featured",
	"-photographer-listing",
	"-full-on-mobile",
	"-theme-small-teaser",
	"-post",
	"-large",
	"-breaker",
	]

# Takes an url and collects all links
def request(url, save_location):
	# Print status to let user know that something is going on
	print("Requesting:", url)
	# Fetch url
	response = requests.get(url)
	# Parse response
	soup = BeautifulSoup(response.text, "lxml")
	# Search for all links and exclude certain strings and patterns from links
	urllist = [a['href'] for a in soup.find_all('a', href=True) if not '?C=' in a['href'] and not a['href'][0] == "/" and not any(term in a['href'] for term in exclude) and not re.search("\d\d[x]\d\d",a['href'])]
	# If status code is not 200 (OK), add url to list of errors
	if not response.status_code == 200:
		errorlist.append(url)
	# Send current url, list of links and current local save collection to scrape function
	return scrape(url, urllist, save_location)

def scrape(path, content, save_location):
	# Loop through all links
	for url in content:
		# Print status to let user know that something is going on
		print("Parsing/downloading:", path+url)
		# If there's a slash ("/") in the link, it is a directory
		if "/" in url:
			# Create local directory if it doesn't exists
			try:
				os.mkdir(save_location+url)
			except:
				pass
			# Run request function to fetch contents of directory
			request(path+url, save_location+url)
		# If the link doesn't contain a slash, it's a file and is saved
		else:
			# Check if file already exists, e.g. has been downloaded in a prior run
			if not os.path.isfile(save_location+url):
				# If file doesn't exist, fetch it from remote location
				file = requests.get(path+url, stream=True)
				# Print status to let user know that something is going on
				print("Saving file:", save_location+url)
				# Save file to local destination
				with open(save_location+url, 'wb') as f:
					# Decodes file if received compressed from server
					file.raw.decode_content = True
					# Copies binary file to local destination
					shutil.copyfileobj(file.raw, f)

# List to collect crawling errors
errorlist = []
# Local destination, e.g. 'C:\Downloads' for Windows
save_location = "C:/Downloads/"
# Remote location, e.g. https://example.com/files
url = "https://content.magnumphotos.com/wp-content/uploads/"
# Call function to start crawling
request(url, save_location)
# Print any crawling errors
print(errorlist) 

En lille Google-crawler

Til Wallnot ville jeg gerne have fat i samtlige Zetland-historier, som Google har indekseret.

Til det formål skrev jeg et lille program, der gennemgår Googles søgeresultater. Programmet holder en lille pause mellem hver side med søgeresultater, der hentes. Det skyldes at Google åbenbart ikke selv er vild med robotter, paradoksalt nok.

import requests
from bs4 import BeautifulSoup
import time
import random

linkcollection = []
def google_results(url):
	try:
		result = requests.get(url)
		soup = BeautifulSoup(result.text, "lxml")
		links = soup.find_all('a')

		for link in links:
			if "zetland.dk/historie/" in link['href']:
				full_link = link['href']
				url = full_link[full_link.find("q=")+2:full_link.find("&")]
				linkcollection.append(link['href'])
				print(link['href'])
		next_page = soup.find('a', attrs={'aria-label': 'Næste side'})
		time_to_sleep = random.randrange(3,7)
		print("Sleeping " + str(time_to_sleep) + " seconds")
		time.sleep(time_to_sleep)
		google_results('https://www.google.com'+next_page['href'])
	except TypeError:
		print("No more results it seems")

url = 'https://www.google.com/search?q=site:zetland.dk/historie'
google_results(url)

with open("./googlelist.txt", "wt", encoding="utf8") as fout:
	fout.write(str(linkcollection))

Kortlinkværktøj med Django/Python

Der er nok ikke mange mennesker efterhånden, der ikke har deres egen kortlinkservice. En af de mest kendte er https://bitly.com/.

Som en øvelse har jeg lavet kortlinkservicen https://wallnot.dk/link. Linkene bliver godt nok ikke specielt korte, men indtil videre sparer jeg udgiften til et selvstændigt domænenavn. Det er ikke fordi, der mangler muligheder andre steder.

At lave et kortlink-værktøj i Django er overraskende nemt.

Her er en lille opskrift.

Opskrift på kortlinkværktøj

Efter at have oprettet mit projekt (se evt. guide på https://www.djangoproject.com/start/) går jeg i gang.

Jeg starter med min datamodel i models.py. Hvert link har en destination (det lange link), et kort link og et tidsstempel. Destinationen er en URL, det korte link er et antal tegn og tidsstemplet er – et tidsstempel:

from django.db import models
from django.utils import timezone

class Link(models.Model):
    destination = models.URLField(max_length=500)
    shortlink = models.CharField(max_length=6, unique=True)
    date = models.DateTimeField(default=timezone.now, editable=False)

Jeg ved, at jeg skal bruge en formular. Den opretter jeg i forms.py. Her bruger jeg en type formular, der kaldes ModelForm. Django sørger for, at valideringsreglerne følger samme type data, som jeg har i min bagvedliggende datamodel:

from django.forms import ModelForm, URLInput
from .models import Link

class LinkForm(ModelForm):
    class Meta:
        model = Link
        fields = ['destination']
        widgets = {
            'destination': URLInput(attrs={'placeholder': 'Indsæt link'}),
        }

Logikkerne bag de enkelte visninger i Django laves i views.py. Jeg har to forskellige visninger. Én visning som jeg bruger til at vise min forside, hvor jeg både viser min formular til indtastning af links og det korte link (index). Én visning, som aktiveres når brugeren besøger et kort link (redirect).

Endelig har jeg en funktion, som jeg bruger til at generere selve de korte links.

Jeg har kommenteret koden en masse, så jeg håber den er til at følge med i:

from django.shortcuts import render
from django.http import HttpRequest, HttpResponseRedirect
from .models import Link
from .forms import LinkForm
import hashlib
import bcrypt

# Function to create a random hash to use as short link address
def create_shortlink(destination):
	salt = bcrypt.gensalt().decode()	# Random salt
	destination = destination+salt		# Salt added to destination URL
	hash = hashlib.md5(destination.encode()).hexdigest() # Hashed to alphanumeric string
	return hash[:6]	# First 6 characters of that string 

# Front page with a form to enter destination address. Short URL returned.
def index(request):
	form = LinkForm()	# Loads form
	url = 'https://wallnot.dk/link/'	# site url
	# If a destination is submitted, a short link is returned
	if request.method == 'POST':
		form = LinkForm(request.POST) # Form instance with submitted data
		# Check whether submitted data is valid
		if form.is_valid():
			destination = form.cleaned_data['destination'] # Submitted destination
			# If destination is already in database, return short link for destination from database
			try:
				link = Link.objects.get(destination=destination)
				sharelink = url + link.shortlink # Creates full URL using page URL and hash
			# If destination is not in database, create a new short link
			except:
				# Loop to create a unique hash value for short link
				unique_link = False
				while unique_link == False:
					hash = create_shortlink(destination)	# Return hash
					# First we check whether the hash is a duplicate
					try:
						Link.objects.get(shortlink=hash)	# Check whether hash is used
					# If not a duplicate, an error is thrown, and we can save the hash
					except:
						link = form.save(commit=False)	# Prepare to save form destination data and hash
						link.shortlink = hash	# Sets short link to hash value
						link.save()	# Saves destination and short link to database
						sharelink = url + link.shortlink # Creates full URL using page URL and hash
						unique_link = True	# If check causes error, hash is unused, exit loop
			context = {'sharelink': sharelink, 'form': form}	# Dictionary with variables used in template
			return render(request, 'links/index.html', context)
		# If form is invalid, just renders page.
		else:
			context = {'form': form}
			return render(request, 'links/index.html', context)
	# Render page with form before user has submitted
	context = {'form': form}
	return render(request, 'links/index.html', context)

# Short link redirect to destination URL
def redirect(request, shortlink):
	# Query the database for short link, if there is a hit, redirect to destination URL
	try:
		link = Link.objects.get(shortlink=shortlink)
		return HttpResponseRedirect(link.destination)
	# An error means the short link doesn't exist, so the front page template is shown with an error variable
	except:
		error = True
		context = {'error': error}
		return render(request, 'links/index.html', context)

For at kunne servere siderne, har jeg urls.py, der fortæller Django hvordan en indtastet URL af brugeren skal pege på funktioner i views.py:

from django.urls import path
from . import views

urlpatterns = [
    path('', views.index, name='index'),
    path('<shortlink>', views.redirect, name='redirect'),
]

Og endelig har jeg index.html, som er den skabelon, som min side genereres på baggrund af. Hvis du ikke har prøvet Django før, så læg mærke til alt det, der står i tuborgklammer ({}). De bruges dels til simple funktioner (fx if-funktioner, dels til at indsætte variable fra views.py i den side, der genereres.

Hvis du lægger mærke til funktionerne, bruger jeg if-funktionerne til at nøjes med en skabelon, uanset hvilken situation brugeren er havnet i, sådan at indholdet fx er anderledes, når brugeren har lavet en fejl i udfyldelsen af formularen, end når brugeren ikke har udfyldt formularen endnu.

Der er også et lille javascript i filen, der sørger for at brugeren kan kopiere det korte link til sin udklipsholder.

<!doctype html>
<html lang="da">
  <head>
    <!-- Required meta tags -->
	<title>Korte links</title>
	<meta name="description" content="Skønne korte links">
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
	<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
	<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
	<link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
	<link rel="manifest" href="/site.webmanifest">
	<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
	<meta name="msapplication-TileColor" content="#ffc40d">
	<meta name="theme-color" content="#ffffff">

	<style>
	body { 
		font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
		text-align: center;
		box-sizing: border-box;	
	}

	h1 {
		margin-top: 0;
		font-size: 4.0rem;
		font-weight: 300;
		line-height: 1.2;
		margin-bottom: 1.5rem;
	}

	h2 {
		margin-top: 1.5rem;
		font-size: 2.5rem;
		font-weight: 300;
		line-height: 1.2;
		margin-bottom: 1.5rem;
	}

	input {
		width: 60%;
		line-height: 1.2;
		font-size: 1.0rem;
		height: 1.5rem;
		padding: 10px;
	}

	button {
		width: 50%;
		border: 1px solid transparent;
		padding: .375rem .75rem;
		font-size: 1rem;
		line-height: 1.8;
		height: 2.5rem;
		border-radius: .25rem;
		color: #fff;
		background-color: #28a745;
		border-color: #28a745;
	}

	button:focus {
		box-shadow: 0 0 0 0.2rem rgba(72,180,97,.5)
	}

	button:hover {
		background-color: #218838;
		border-color: #1e7e34;
	}
	
	.footer {
		position: fixed;
		left: 0;
		bottom: 0;
		width: 100%;
		background-color: #f1f1f1;
		color: black;
	}	
	</style>
  </head>
  <body>

<h1>Lav et kort link</h1>

{% if form %}
	<form method="post">
	{% csrf_token %}
	<p>{{ form.destination }}</p>
	<p><button type="submit" value="Lav et kort link">Lav et kort link</button></p>
	</form>

	{% if form.destination.errors %}
		<h2>Tast et gyldigt link!</h2>
		<p><em>Du har tastet et ugyldigt link. Prøv igen med et gyldigt link med http://, https://, ftp:// eller ftps:// foran.</em></p>
	{% endif %}

	{% if request.method == "POST" and not form.destination.errors %}
		<h2>Her er dit link:</h2>
		<p><a href="{{ sharelink }}">{{ sharelink }}</a></p>
		<button class="copy">Kopier link</button>
	{% endif %} 

{% endif %}

{% if error %}
<h2>Har du tastet forkert?</h2>
<p><em>Du har prøvet at bruge et kort link. Desværre er det link, du har tastet, ikke registreret. Måske er du kommet til at taste forkert?</em></p>
<p><a href="{% url 'index' %}">Til forsiden</a>
{% endif %} 

<div class="footer">
  <p>Lav relativt korte links på wallnot.dk. Gratis og fri for annoncer og overvågning.</p>
</div>


<script>
function fallbackCopyTextToClipboard(text) {
  var textArea = document.createElement("textarea");
  textArea.value = text;
  document.body.appendChild(textArea);
  textArea.focus();
  textArea.select();

  try {
    var successful = document.execCommand("copy");
    var msg = successful ? "successful" : "unsuccessful";
    console.log("Fallback: Kopiering gik fint " + msg);
  } catch (err) {
    console.error("Fallback: Kunne ikke kopiere", err);
  }
  document.body.removeChild(textArea);
}

function copyTextToClipboard(text) {
  if (!navigator.clipboard) {
    fallbackCopyTextToClipboard(text);
    return;
  }
  navigator.clipboard.writeText(text).then(function() {
    console.log('Kopiering gik fint');
  }, function(err) {
    console.error('Kunne ikke kopiere', err);
  });
}

var copy = document.querySelector('.copy');

copy.addEventListener('click', function(event) {
  copyTextToClipboard('{{ sharelink }}');
});
</script>

</body>
</html>

Pakkesporing fra flere forskellige transportører

For tiden øver jeg mig i at bruge Django – et værktøj til at lave webapplikationer i Python. Det er vildt smart.

Det tog et par timer at få https://wallnot.dk/pak/ i luften, men så er der heller ikke gjort noget ud af brugerfladen og det bagvedliggende kunne helt sikkert også gøres smartere. Siden kan bruges til at spore pakker til levering fra flere forskellige transportører (PostNord, GLS, DAO).

Hvis du har pakker på vej fra andre transportører og vil dele pakkenumrene med mig, er jeg interesseret.

Et nyt bud på en simulation af krig

Min Python-simulation af kortspillet Krig var ikke særlig elegant. Ved krig og dobbelt-krig osv. var en masse “if”-sætninger inde i hinanden med samme logik. (Jeg fandt også nogle dumme fejl, så jeg har opdateret det oprindelige indlæg.)

Derfor har jeg prøvet at skrive en ny version.

Den fungerer fint og giver følgende output ved 1.000.000 spil:

Der blev spillet 1000000 spil
Det gennemsnitlige antal dueller var 177.217668
Det højeste antal dueller var 2238
Det laveste antal dueller var 3
Den spiller med højest sum af kort vandt 573276 gange (57%)
Den spiller med højest sum af kort tabte 397771 gange (40%)
Uafgjorte spil: 1
Antal enkeltkrig, dobbeltkrig, osv.: 12348559, 886651, 60655, 3722, 218, 11, 2
Vendte kort uden krig og med krig: 176766958, 13299818
Spillene tog 225.4 sekunder

Det nye program:

# KRIG #
import time
start_time = time.time()
import random

number_of_games_to_play = 1000000
number_of_games_counter = 0
number_of_plays_list = []
highest_deck_won = 0
highest_deck_lost = 0
equal_games = 0
war_types = [0,0,0,0,0,0,0]
war_or_not_war = [0,0]

# Loop to play games
percentage_copy = 0
i = 0
while i < number_of_games_to_play:
	# One is added to i so loop finishes once number of games have been played
	i += 1
	
	# Prints percentage done with 1 decimal every time it changes
	percentage_completed = round((i/number_of_games_to_play*100), 1)
	if percentage_copy != percentage_completed:
		print("{}% done".format(percentage_completed))
	percentage_copy = percentage_completed

	# Create a deck, shuffle it and divide between players
	deck = [2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14]
	random.shuffle(deck)
	player_a_deck = deck[0:26]
	player_b_deck = deck[26:52]

	# Which player has the highest sum of cards
	card_sum_a = sum(player_a_deck)
	card_sum_b = sum(player_b_deck)
	if card_sum_a > card_sum_b:
		highest_deck = "a"
	elif card_sum_a < card_sum_b:
		highest_deck = "b"
	else:
		highest_deck = "equal"
	
	# Loop to turn cards within games
	number_of_plays = 0
	index = 1
	while True:
		try:
			if index == 1:
				number_of_plays += 1	# Add 1 to number of plays counter
				war_count = 0			# Reset war counter	
			# Player a has the largest card
			if player_a_deck[index-1] > player_b_deck[index-1]:
				war_or_not_war[0] += 1
				player_a_deck.extend(player_a_deck[:index])
				player_a_deck.extend(player_b_deck[:index])	
				del player_a_deck[:index]
				del player_b_deck[:index]
				index = 1			# If a play is decided, index is reset
			# Player b has the largest card
			elif player_a_deck[index-1] < player_b_deck[index-1]:
				war_or_not_war[0] += 1
				# Cards are added in different order to deck in order to avoid (game) risk of going on forever (infinite loop)!
				player_b_deck.extend(player_b_deck[:index])	
				player_b_deck.extend(player_a_deck[:index])
				del player_a_deck[:index]
				del player_b_deck[:index]
				index = 1			# If a play is decided, index is reset
			# War is on!
			else:
				war_or_not_war[1] += 1
				index += 4			# In case of war the index is upped by four cards
				war_types[war_count] += 1
				war_count += 1
		# If a player has too few cards left to participate, game is over
		except IndexError:
			# If a player had no cards left and index is 1, the game was already over, so number of plays is corrected
			if index == 1:
				number_of_plays -= 1
			break
	
	# Single game is over #
	# Compare deck sizes to decide winner and add values to counters and lists
	deck_a = len(player_a_deck)
	deck_b = len(player_b_deck)
	if deck_a > deck_b:
		if highest_deck == "a":
			highest_deck_won += 1
		elif highest_deck == "b":
			highest_deck_lost += 1
	elif deck_a < deck_b:
		if highest_deck == "a":
			highest_deck_lost += 1
		elif highest_deck == "b"    :
			highest_deck_won += 1
	else:
		equal_games += 1
	
	number_of_plays_list.append(number_of_plays)
	number_of_games_counter += 1
	
# All games are over #
print("Der blev spillet {} spil".format(number_of_games_counter))
print("Det gennemsnitlige antal dueller var {}".format(sum(number_of_plays_list)/len(number_of_plays_list)))
print("Det højeste antal dueller var {}".format(max(number_of_plays_list)))
print("Det laveste antal dueller var {}".format(min(number_of_plays_list)))
print("Den spiller med højest sum af kort vandt {} gange ({}%)".format(highest_deck_won, round(highest_deck_won/number_of_games_counter*100)))
print("Den spiller med højest sum af kort tabte {} gange ({}%)".format(highest_deck_lost, round(highest_deck_lost/number_of_games_counter*100)))
print("Uafgjorte spil: {}".format(equal_games))
print("Antal enkeltkrig, dobbeltkrig, osv.: {}".format(", ".join(str(x) for x in war_types)))
print("Vendte kort uden krig og med krig: {}".format(", ".join(str(x) for x in war_or_not_war)))
print("Spillene tog {} sekunder".format(round(time.time() - start_time, 1)))

Ting du ikke vil vide om kortspillet Krig

Hvis man tilfældigvis har et barn i 5-årsalderen, kan man spille kortspillet Krig. Kortene blandes og deles ligeligt mellem 2 spillere, hver spiller vender et kort fra sin bunke samtidig, højeste kort vender, hvis kortene er lige høje, er der krig. Det er så enkelt, at man lige så godt kunne få en computer til at spille det.

Derfor skrev jeg et lille program i Python, der kan simulere kortspillet.

Jeg opdagede et hul i reglerne: Der er ingen steder, der beskriver, hvad der sker, når en spiller ikke har kort nok til at deltage i en krig (eller en dobbelt-krig, tredobbelt-krig, osv.) Jeg besluttede, at hvis en spiller på et tidspunkt mangler kort til at kunne deltage, taber den spiller, der ikke har kort nok til at deltage. I den meget sjældne situation, at begge spillere ikke har nok kort til at deltage (en mange-mange-dobbelt-krig i starten af spillet), vinder den spiller, der har flest kort. Har begge spillere lige mange kort, bliver det uafgjort.

Jeg fik computeren til at spille 1 million spil Krig, og her er hvad jeg kan fortælle dig om Krig, som du ikke vil vide:

  • Det gennemsnitlige antal dueller i et spil krig er 177
  • Spillet med flest dueller havde 1.825 dueller
  • Spillet med færrest havde 4 dueller
  • Spilleren med den højeste sum af kort efter kortene blev blandet vandt 573.405 gange
  • Spilleren med den laveste sum af kort vandt 397.602 gange
  • I løbet af spillene blev der spillet:
    • Enkeltkrig: 12.366.762 gange
    • Dobbeltkrig: 888.024 gange
    • Trippelkrig: 60.727 gange
    • Firdobbeltkrig: 3.852 gange
    • Femdobbeltkrig: 206 gange
    • Seksdobbeltkrig: 10 gange

Her er programmet:

import random

krig1 = 0
krig2 = 0
krig3 = 0
krig4 = 0
krig5 = 0
krig6 = 0
krig7 = 0

number_of_plays_list = []
not_war = 0
war = 0

highest_deck_won = 0
highest_deck_lost = 0
equal_games = 0

i = 0
number_of_games = 1000000

while i <= number_of_games:
	number_of_plays_counter = 0
	deck = [2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14]
	random.shuffle(deck)

	player_a_deck = deck[0:26]
	player_b_deck = deck[26:52]

	if sum(player_a_deck) > sum(player_b_deck):
		highest_deck = "a"
	elif sum(player_a_deck) < sum(player_b_deck):
		highest_deck = "b"
	else:
		highest_deck = "equal"

	while len(player_a_deck) > 0 and len(player_b_deck) > 0:
		number_of_plays_counter += 1
		if player_a_deck[0] > player_b_deck[0]:
			not_war += 1
			player_a_deck.append(player_a_deck[0])
			player_a_deck.append(player_b_deck[0])
			del player_a_deck[0]
			del player_b_deck[0]
		elif player_a_deck[0] < player_b_deck[0]:
			not_war += 1
			player_b_deck.append(player_b_deck[0])
			player_b_deck.append(player_a_deck[0])
			del player_a_deck[0]
			del player_b_deck[0]
		elif player_a_deck[0] == player_b_deck[0]:
			war += 1
			krig1 += 1
			if len(player_a_deck) >= 5 and len(player_b_deck) >= 5:
				if player_a_deck[4] > player_b_deck[4]:
					player_a_deck.extend(player_a_deck[0:5])
					player_a_deck.extend(player_b_deck[0:5])
					del player_a_deck[0:5]
					del player_b_deck[0:5]
				elif player_a_deck[4] < player_b_deck[4]:
					player_b_deck.extend(player_b_deck[0:5])
					player_b_deck.extend(player_a_deck[0:5])
					del player_a_deck[0:5]
					del player_b_deck[0:5]
				elif player_a_deck[4] == player_b_deck[4]:
					krig2 += 1
					if len(player_a_deck) >= 9 and len(player_b_deck) >= 9:			
						if player_a_deck[8] > player_b_deck[8]:
							player_a_deck.extend(player_a_deck[0:9])
							player_a_deck.extend(player_b_deck[0:9])
							del player_a_deck[0:9]
							del player_b_deck[0:9]
						elif player_a_deck[8] < player_b_deck[8]:
							player_b_deck.extend(player_b_deck[0:9])
							player_b_deck.extend(player_a_deck[0:9])
							del player_a_deck[0:9]
							del player_b_deck[0:9]	
						elif player_a_deck[8] == player_b_deck[8]:
							krig3 += 1
							if len(player_a_deck) >= 13 and len(player_b_deck) >= 13:
								if player_a_deck[12] > player_b_deck[12]:
									player_a_deck.extend(player_a_deck[0:13])
									player_a_deck.extend(player_b_deck[0:13])
									del player_a_deck[0:13]
									del player_b_deck[0:13]
								elif player_a_deck[12] < player_b_deck[12]:
									player_b_deck.extend(player_b_deck[0:13])
									player_b_deck.extend(player_a_deck[0:13])
									del player_a_deck[0:13]
									del player_b_deck[0:13]	
								elif player_a_deck[12] == player_b_deck[12]:
									krig4 += 1
									if len(player_a_deck) >= 17 and len(player_b_deck) >= 17:
										if player_a_deck[16] > player_b_deck[16]:
											player_a_deck.extend(player_a_deck[0:17])
											player_a_deck.extend(player_b_deck[0:17])
											del player_a_deck[0:17]
											del player_b_deck[0:17]
										elif player_a_deck[16] < player_b_deck[16]:
											player_b_deck.extend(player_b_deck[0:17])
											player_b_deck.extend(player_a_deck[0:17])
											del player_a_deck[0:17]
											del player_b_deck[0:17]
										elif player_a_deck[16] == player_b_deck[16]:
											krig5 += 1
											if len(player_a_deck) >= 21 and len(player_b_deck) >= 21:
												if player_a_deck[20] > player_b_deck[20]:
													player_a_deck.extend(player_a_deck[0:21])
													player_a_deck.extend(player_b_deck[0:21])
													del player_a_deck[0:21]
													del player_b_deck[0:21]
												elif player_a_deck[20] < player_b_deck[20]:
													player_b_deck.extend(player_b_deck[0:21])
													player_b_deck.extend(player_a_deck[0:21])
													del player_a_deck[0:21]
													del player_b_deck[0:21]										
												elif player_a_deck[20] == player_b_deck[20]:
													krig6 += 1
													if len(player_a_deck) >= 25 and len(player_b_deck) >= 25:
														if player_a_deck[24] > player_b_deck[24]:
															player_a_deck.extend(player_a_deck[0:25])
															player_a_deck.extend(player_b_deck[0:25])
															del player_a_deck[0:25]
															del player_b_deck[0:25]
														elif player_a_deck[24] < player_b_deck[24]:
															player_b_deck.extend(player_b_deck[0:25])
															player_b_deck.extend(player_a_deck[0:25])
															del player_a_deck[0:25]
															del player_b_deck[0:25]
														elif player_a_deck[24] == player_b_deck[24]:
															krig7 += 1
															break
													else:
														break
											else:
												break
									else:
										break
							else:
								break
					else:
						break
			else:
				break
	if len(player_a_deck) > len(player_b_deck):
		if highest_deck == "a":
			highest_deck_won += 1
		elif highest_deck == "b"	:
			highest_deck_lost += 1
	elif len(player_a_deck) < len(player_b_deck):
		if highest_deck == "a":
			highest_deck_lost += 1
		elif highest_deck == "b"	:
			highest_deck_won += 1
	else:
		equal_games += 1
	number_of_plays_list.append(number_of_plays_counter)
	i += 1
	print(i/number_of_games)

print("Der blev spillet {} spil".format(number_of_games))
print("Det gennemsnitlige antal dueller var {}".format(sum(number_of_plays_list)/len(number_of_plays_list)))
print("Det højeste antal dueller var {}".format(max(number_of_plays_list)))
print("Det laveste antal dueller var {}".format(min(number_of_plays_list)))
print("Den spiller med højest sum af kort vandt {} gange".format(highest_deck_won))
print("Den spiller med højest sum af kort tabte {} gange".format(highest_deck_lost))
print(krig1, krig2, krig3, krig4, krig5, krig6, krig7)
print(not_war, war)
print("Uafgjorte spil: {}".format(equal_games))