I like to document my doings and for about 15 years I’ve been documenting the books I have read. First in Notepad, then in Excel and finally in Python and Django with a database somewhere in the background. I am amazed what experts help amateurs achieve.
This post explains the proces of collecting data about my reads in little detail and in too great detail the code behind the page.


Finding information ONLINE
Most data was crawled from Danish library ressources, Goodreads and Wikpedia with varying success. A lot was entered manually, especially with works in translation. I spent hours and hours being pedantic.
Even though librarians have been managing data longer than anyone else on the planet, there is no autoritative relational database where you can look up when some book by some author was first published and when the first Danish language version came out. In defence of librarians, many writers go to great lengths to make data management on books hard (one example is the genre “non-fiction novel” used by Spanish writer Javier Cercas).
The mysteries of Goodreads
I was mystified by the ability of Goodreads to place study guides and commentary to great works of literature first in their search results (and many more strange things) and terrified by Google displaying available nowhere else I could find on the web author birthdays on top of search results .
Also, Goodreads magically has editions of books that are older than when Goodreads claims the book was first published.




Adding books
After crawling for data, I made a form to add new books:


The form
This was a breeze in Django. Here’s forms.py:
from django.forms import ModelForm
from books.models import Author, Title, Read
class AuthorForm(ModelForm):
class Meta:
model = Author
fields = ['first_name', 'last_name','gender','country','biography','birth_date','data_quality']
class TitleForm(ModelForm):
class Meta:
model = Title
fields = ['title','genre','read_language','original_language','publisher','isbn','published_date','first_published','cover_url','ereolen_url','biblo_dk_url','good_reads_url','pages','original_title']
class ReadForm(ModelForm):
class Meta:
model = Read
fields = ['date']
The view:
And here’s the logic from views.py (I probably shouldn’t uncritically be saving cover URLs found on the internet to my server, but):
# Add a read to database
@login_required
def add_read(request):
book_saved = False
author_form = AuthorForm()
title_form = TitleForm()
read_form = ReadForm()
if request.method == 'POST': # AND SUBMIT BUTTON
author_form = AuthorForm(request.POST)
title_form = TitleForm(request.POST)
read_form = ReadForm(request.POST)
if author_form.is_valid() and title_form.is_valid() and read_form.is_valid():
author_data = author_form.cleaned_data
title_data = title_form.cleaned_data
read_data = read_form.cleaned_data
existing_author = False
existing_title = False
# AUTHOR LOGIC - MAY ALSO MODIFY TITLE DATA
# Check if already exist
try:
author = Author.objects.get(first_name=author_data['first_name'], last_name=author_data['last_name'])
existing_author = True
context['existing_author'] = existing_author
except:
if 'lookup' in request.POST:
if any(not value for value in author_data.values()):
author_data, title_data = get_author(author_data, title_data) # try to fetch data
# TITLE LOGIC - MAY ALSO MODIFY AUTHOR DATA
# Check if title already exists, will only work is author has been found. (Book is re-read)
try:
if author:
title = Title.objects.get(authors=author, title=title_data['title'])
existing_title = True
context['existing_title'] = True
except:
if 'lookup' in request.POST:
if any(not value for value in title_data.values()):
title_data, author_data = get_title(title_data, author_data) # try to fetch data
# Render form with data from database or collected data
if 'lookup' in request.POST:
if not existing_author:
author_form = AuthorForm(author_data)
else:
author_form = AuthorForm(instance=author)
if not existing_title:
title_form = TitleForm(title_data)
else:
title_form = TitleForm(instance=title)
# Save data
if 'save' in request.POST:
if not existing_author:
author = author_form.save()
if not existing_title:
title = title_form.save()
title.authors.add(author)
if title.cover_url:
file = requests.get(title.cover_url, stream=True)
save_location = settings.STATIC_ROOT + "books/covers/"
if '.jpg' in title.cover_url:
ending = '.jpg'
elif '.png' in title.cover_url:
ending = '.png'
elif '.webp' in title.cover_url:
ending = '.webp'
else:
ending = '.jpg'
id = title.id
filename = str(id) + ending
with open(save_location+filename, 'wb') as f:
file.raw.decode_content = True
shutil.copyfileobj(file.raw, f)
title.cover_filename = filename
title.save()
#create thumbnail
image = Image.open(save_location+filename).convert("RGB")
maxsize = 150, 150
image.thumbnail(maxsize, Image.ANTIALIAS)
image.save(save_location+"150/"+str(id)+".webp", "WEBP")
save_read = read_form.save(commit=False)
save_read.title = title
save_read = read_form.save()
# Set save variable to True and display empty form
book_saved = True
author_form = AuthorForm()
title_form = TitleForm()
read_form = ReadForm()
context = {'author_form': author_form, 'title_form': title_form, 'read_form': read_form, 'book_saved': book_saved}
return render(request, 'books/add.html', context)
The helper function
If you are a really curious and patient individual, you may be wondering about the get_author and get_title functions. You are in luck! Here is most of helpers.py which helps me scrape some data from the internet and will probably break in the future:
# HELPER FUNCTIONS #
def numbers_in_string(string):
numbers = sum(character.isdigit() for character in string)
return numbers
def get_author(author_data, title_data):
# WIKIPEDIA
if not author_data['biography']:
if not author_data['country'] == 'da':
url = 'https://en.wikipedia.org/w/index.php?search=intitle%3A%22' + author_data['first_name'] + " " + author_data['last_name'] + '%22&title=Special:Search&profile=advanced&fulltext=1&ns0=1'
else:
url = 'https://da.wikipedia.org/w/index.php?search=intitle%3A%22' + author_data['first_name'] + " " + author_data['last_name'] + '%22&title=Special:Search&profile=advanced&fulltext=1&ns0=1'
else:
url = author_data['biography']
author_request = requests.get(url)
if author_request.status_code == 200:
soup = BeautifulSoup(author_request.text, "lxml")
try:
first_result = soup.find('div', {'class':'mw-search-result-heading'}).a['href']
if not author_data['country'] == 'da':
result_page = 'https://en.wikipedia.org' + first_result
else:
result_page = 'https://da.wikipedia.org' + first_result
page_request = requests.get(result_page)
soup = BeautifulSoup(page_request.text, "lxml")
# If not provided, set biography
if not author_data['biography']:
author_data['biography'] = result_page
# If not provided, try to get birth_date
if not author_data['birth_date']:
try:
birthday = soup.find('span', {'class':'bday'}).string
author_data['birth_date'] = datetime.strptime(birthday, '%Y-%m-%d')
except:
try:
birthday = soup.find('th', text="Født").parent.get_text()
# sometimes the above doesn't return a space between year and next info causing a fuckup
try:
find_year = re.search("\d\d\d\d\S", birthday).span()[1]
birthday = birthday[:find_year-1] + " " + birthday[find_year+-1:]
except:
pass
# sometimes even more fuckery
try:
letters_and_numbers_together = re.search("[a-zA-Z]\d", birthday).span()[1]
birthday = birthday[:letters_and_numbers_together-1] + " " + birthday[letters_and_numbers_together-1:]
except:
pass
birthday_date = search_dates(birthday,languages=['da'])[0][1]
author_data['birth_date'] = birthday_date
except:
paragraphs = soup.find_all('p')
for paragraph in paragraphs:
text = paragraph.get_text()
if '(født' in text:
birth_mention = text.find('(født')
birth_string = text[birth_mention+1:text.find(")",birth_mention)]
if len(birth_string) < 10: # just a year, probably
year = int(birth_string[5:10])
birthday = date(year,1,1)
author_data['birth_date'] = birthday
else:
birthday_date = search_dates(birth_string,languages=['da'])[0][1]
author_data['birth_date'] = birthday_date
break
# If not provided, try to get country
if not author_data['country']:
try:
birthplace = soup.find('div', {'class':'birthplace'}).get_text()
except:
try:
birthplace = soup.find('th', text="Born").parent.get_text()
except:
pass
if birthplace:
country = get_country(birthplace)
if not country:
try:
birthplace = soup.find('th', text="Nationality").find_next_sibling().string
country = get_country(birthplace)
except:
pass
if country:
author_data['country'] = country
if not title_data['original_language']:
if country == 'us' or country == 'sc' or contry == 'ir' or country == 'en' or country == 'au':
country = 'en'
title_data['original_language'] = country
except:
pass
# GENDER
if not author_data['gender']:
request = requests.get('https://gender-api.com/get?name=' + author_data['first_name'] + '&key=vCjPrydWvlRcMxGszD')
response = request.json()
if response['gender'] == 'male':
author_data['gender'] = 'm'
elif response['gender'] == 'female':
author_data['gender'] = 'f'
if not author_data['data_quality']:
if author_data['first_name'] and author_data['last_name'] and author_data['gender'] and author_data['country'] and author_data['birth_date'] and author_data['biography']:
author_data['data_quality'] = 'med'
else:
author_data['data_quality'] = 'bad'
# WIKIPEDIA ALTERNATIVE, ONLY FOR BOOKS READ IN DANISH
if not author_data['biography'] and author_data['first_name'] and title_data['read_language'] == 'da':
url = 'https://litteraturpriser.dk/henv/' + author_data['last_name'][0].lower() + '.htm'
request = requests.get(url)
soup = BeautifulSoup(request.text, "lxml")
links = soup.find_all('a', href=True)
for link in links:
if len(link['href']) > 7:
text = link.get_text().lower()
if author_data['last_name'].lower() + ", " + author_data['first_name'].lower() == text:
url = 'https://litteraturpriser.dk' + link['href']
request = requests.get(url)
soup = BeautifulSoup(request.text, "lxml")
author_data['biography'] = request.url
if not author_data['country']:
author_data['country'] = 'da'
if not author_data['birth_date']:
born = soup.find(text=re.compile('Født'))
if born:
birthday_date = search_dates(born,languages=['da'])[0][1]
author_data['birth_date'] = birthday_date
else:
born = soup.find(text=re.compile('f. '))
birth_year = int(re.search("\d\d\d\d", born).group())
author_data['birth_date'] = date(birth_year,1,1)
if not title_data['original_language']:
title_data['original_language'] = 'da'
break
return author_data, title_data
def get_ereolen(title_data, author_data):
# EREOLEN
soup = ""
if not title_data['ereolen_url']:
if title_data['isbn']:
url = 'https://ereolen.dk/search/ting/' + title_data['isbn'] + '?&facets[]=facet.type%3Aebog'
else:
url = 'https://ereolen.dk/search/ting/' + author_data['first_name'] + " " + author_data['last_name']+ " " + title_data['title'] + '?&facets[]=facet.type%3Aebog'
request = requests.get(url)
try:
search_soup = BeautifulSoup(request.text, "lxml")
links = [a['href'] for a in search_soup.find_all('a', href=True) if '/collection/' in a['href']]
book_request = requests.get('https://ereolen.dk' + links[0])
soup = BeautifulSoup(book_request.text, "lxml")
links = [a['href'] for a in soup.find_all('a', href=True) if '/object/' in a['href']]
# ebooks and audiobook versions
if len(links) == 4:
book_request = requests.get('https://ereolen.dk' + links[0])
soup = BeautifulSoup(book_request.text, "lxml")
# SAVE HIT URL
title_data['ereolen_url'] = 'https://ereolen.dk' + links[0]
except:
pass
else:
book_request = title_data['ereolen_url']
book_request = requests.get(book_request)
soup = BeautifulSoup(book_request.text, "lxml")
if soup:
if not title_data['published_date']:
try:
published = soup.find('div', class_={"field-name-ting-author"}).get_text()
published = int(re.search("[(]\d\d\d\d[)]", published).group()[1:5])
title_data['published_date'] = date(published,1,1)
except:
pass
if not title_data['isbn']:
try:
isbn_tag = soup.find('div', class_={"field-name-ting-details-isbn"})
title_data['isbn'] = isbn_tag.find('div', class_={"field-items"}).get_text()
except:
pass
if not title_data['publisher']:
try:
publisher_tag = soup.find('div', class_={"field-name-ting-details-publisher"})
title_data['publisher'] = publisher_tag.find('div', class_={"field-items"}).get_text()
except:
pass
if not title_data['pages']:
try:
page_tag = soup.find('div', class_={"field-name-ting-details-extent"})
title_data['pages'] = int(page_tag.find('div', class_={"field-items"}).get_text().replace(" sider",""))
except:
pass
if not title_data['original_title']:
try:
original_title_tag = soup.find('div', class_={"field-name-ting-details-source"})
title_data['original_title'] = original_title_tag.find('div', class_={"field-items"}).get_text()
except:
pass
if not title_data['cover_url']:
covers = [img['src'] for img in soup.find_all('img') if '/covers/' in img['src']]
title_data['cover_url'] = covers[0][:covers[0].find("?")]
return title_data, author_data
def get_bibliotek_dk(title_data, author_data):
search_url = 'https://bibliotek.dk/da/search/work?search_block_form=phrase.creator%3D%22' + author_data['first_name'] + " " + author_data['last_name'] + '%22+and+phrase.title%3D%22' + title_data['title'] + '%22&select_material_type=bibdk_frontpage&op=S%C3%B8g&n%2Famaterialetype%5Bterm.workType%253D%2522literature%2522%5D=term.workType%253D%2522literature%2522&year_op=%2522year_eq%2522&year_value=&form_id=search_block_form&sort=rank_main_title&page_id=bibdk_frontpage'
request = requests.get(search_url)
soup = BeautifulSoup(request.text, "lxml")
hits = soup.find_all('div', {'class':'work mobile-page'})
if not hits:
url = 'https://bibliotek.dk/da/search/work?search_block_form=' + author_data['first_name'] + " " + author_data['last_name'] + " " + title_data['title'] +'&select_material_type=bibdk_frontpage%2Fbog&op=S%C3%B8g&n%2Famaterialetype%5Bterm.workType%253D%2522literature%2522%5D=term.workType%253D%2522literature%2522&year_op=%2522year_eq%2522&year_value=&form_build_id=form-TQ8TlT3HGFiKXyvz6cCFaiuTMZKimuHMF-p4q1Mb8ZI&form_id=search_block_form&sort=rank_main_title&page_id=bibdk_frontpage#content'
request = requests.get(url)
soup = BeautifulSoup(request.text, "lxml")
hits = soup.find_all('div', {'class':'work mobile-page'})
for hit in hits:
id = hit['id']
title = hit.find('h2', {'class':'searchresult-work-title'}).get_text()
author = hit.h3.get_text()
if title_data['title'].lower() in title.lower() or title.lower() in title_data['title'].lower() or len(hits) == 1:
if 'basis' in id:
link = id.replace("basis","-basis:")
elif 'katalog' in id:
link = id.replace("katalog","-katalog:")
biblo_url = 'https://bibliotek.dk/da/work/' + link
request = requests.get(biblo_url)
if not title_data['biblo_dk_url']:
title_data['biblo_dk_url'] = biblo_url
soup = BeautifulSoup(request.text, "lxml")
if not title_data['cover_url']:
try:
img = soup.find('div', {'class':'bibdk-cover'}).img['src'].replace("/medium/","/large/")
img = img[:img.find("?")]
title_data['cover_url'] = img
except:
pass
book_data = soup.find('div', {'class':'manifestation-data'})
if not title_data['pages']:
try:
pages = book_data.find('div', {'class':'field-name-bibdk-mani-format'}).find('span', {'class':'openformat-field'}).string.strip()
pages = pages[:pages.find(" ")]
pages = int(pages)
title_data['pages'] = pages
except:
pass
if not title_data['publisher']:
try:
publisher = book_data.find('div', {'class':'field-name-bibdk-mani-publisher'}).find('span', {'property':'name'}).string
title_data['publisher'] = publisher
except:
pass
if not title_data['published_date'] or not title_data['first_published']:
try:
first_published = book_data.find('div', {'class':'field-name-bibdk-mani-originals'}).find('span', {'class':'openformat-field'}).string.strip()
published = int(re.search("\d\d\d\d", first_published).group())
if not title_data['published_date']:
title_data['published_date'] = date(published,1,1)
if not title_data['first_published'] and title_data['read_language'] == 'da' and title_data['original_language'] == 'da':
title_data['first_published'] = date(published,1,1)
except:
try:
pub_year = int(book_data.find('div', {'class':'field-name-bibdk-mani-pub-year'}).find('span', {'class':'openformat-field'}).string.strip())
title_data['published_date'] = date(pub_year,1,1)
if title_data['read_language'] == 'da' and title_data['original_language'] == 'da':
try:
edition = book_data.find('div', {'class':'field-name-bibdk-mani-edition'}).find('span', {'class':'openformat-field'}).string.strip()
if edition == "1. udgave":
title_data['first_published'] = date(pub_year,1,1)
except:
pass
except:
pass
break
return title_data, author_data
def get_goodreads(title_data, author_data):
if not title_data['good_reads_url']:
searchterm = author_data['first_name'] + " " + author_data['last_name'] + " " + title_data['title']
search_url = 'https://www.goodreads.com/search?utf8=✓&q=' + searchterm + '&search_type=books'
response = requests.get(search_url)
search_soup = BeautifulSoup(response.text, "lxml")
all_results = search_soup.find_all('tr', {'itemtype':'http://schema.org/Book'})
if not all_results:
search_url = 'https://www.goodreads.com/search?utf8=✓&q=' + title_data['title'] + '&search_type=books'
response = requests.get(search_url)
search_soup = BeautifulSoup(response.text, "lxml")
all_results = search_soup.find_all('tr', {'itemtype':'http://schema.org/Book'})
if all_results:
good_match = False
#exact match
for result in all_results:
gr_author = result.find('span', {'itemprop':'author'}).get_text().strip()
gr_author = gr_author.replace(' (Goodreads Author)','')
if " " in gr_author:
gr_author = gr_author.replace(" "," ")
elif " " in gr_author:
gr_author = gr_author.replace(" "," ")
gr_title = result.find('a', {'class':'bookTitle'})
gr_title_string = gr_title.get_text().strip()
title_url = gr_title['href']
if gr_title_string.lower() == title_data['title'].lower() and gr_author.lower() == author_data['first_name'].lower() + " " + author_data['last_name'].lower():
good_match = True
break
if good_match == True:
url = 'https://www.goodreads.com' + title_url
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
else:
links = search_soup.find_all('a', href=True)
books = [a['href'] for a in links if '/book/show/' in a['href']]
for book in books:
if not 'summary' in book and not 'analysis' in book and not 'lesson-plan' in book and not 'sidekick' in book and not 'teaching-with' in book and not 'study-guide' in book and not 'quicklet' in book and not 'lit-crit' in book and not author_data['last_name'].lower() in book:
url = 'https://www.goodreads.com' + book
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
heading = soup.find('h1', {'id': 'bookTitle'}).string
break
else:
url = title_data['good_reads_url']
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
if not title_data['good_reads_url']:
if '?' in url:
url = url[:url.rfind("?")]
title_data['good_reads_url'] = url
if not title_data['cover_url']:
try:
title_data['cover_url'] = soup.find('img', {"id" : "coverImage"})['src'].replace("compressed.","")
except:
pass
details = soup.find('div', {"id" : "details"})
details_text = details.get_text()
if not title_data['published_date']:
possible_dates = details.find_all('div', attrs={'class':'row'})
for item in possible_dates:
published_date = item.find(text=re.compile("Published"))
if published_date:
published_date = published_date.strip()
numbers = numbers_in_string(published_date)
if numbers > 4:
title_data['published_date'] = search_dates(published_date,languages=['en'])[0][1]
elif numbers == 4:
year = int(re.search("\d\d\d\d", published_date).group())
title_data['published_date'] = date(year,1,1)
if not title_data['first_published']:
try:
first_published = details.find('nobr').string.strip()
numbers = numbers_in_string(first_published)
if numbers > 4:
title_data['first_published'] = search_dates(first_published,languages=['en'])[0][1]
elif numbers == 4:
year = int(re.search("\d\d\d\d", first_published).group())
title_data['first_published'] = date(year,1,1)
except:
pass
if not title_data['pages']:
try:
pages = details.find('span', {'itemprop': 'numberOfPages'}).string
title_data['pages'] = int(pages[:pages.find(" ")])
except:
pass
if not title_data['publisher']:
try:
by_location = details_text.find("by ")
title_data['publisher'] = details_text[by_location+3:details_text.find("\n", by_location)]
except:
pass
if not title_data['isbn']:
try:
isbn = re.search("\d\d\d\d\d\d\d\d\d\d\d\d\d", details_text).group()
title_data['isbn'] = isbn
except:
try:
isbn = re.search("\d\d\d\d\d\d\d\d\d\d", details_text).group()
title_data['isbn'] = isbn
except:
pass
if not title_data['original_title'] and title_data['read_language'] != title_data['original_language']:
try:
parent = details.find('div', text="Original Title").parent
original_title = parent.find('div', {'class':'infoBoxRowItem'}).string
title_data['original_title'] = original_title
except:
pass
return title_data, author_data
def get_title(title_data, author_data):
if title_data['read_language'] == 'da':
title_data, author_data = get_ereolen(title_data, author_data)
title_data, author_data = get_bibliotek_dk(title_data, author_data)
title_data, author_data = get_goodreads(title_data, author_data)
#cover from ereolen, mofibo, saxo
# danish library request
else:
title_data, author_data = get_goodreads(title_data, author_data)
return title_data, author_data
The template
The simplicity:
<h1>Add book</h1>
{% if book_saved %}
<p>Bogen blev gemt!</p>
{% endif %}
<form method="post">
<p class="center"><input class="button blue" name="lookup" type="submit" value="Look up">
<input class="button green" name="save" type="submit" value="Save"></p>
<p class="center">
{% if author_form.biography.value %}
<a href="{{ author_form.biography.value }}">biografi</a>
{% endif %}
{% if title_form.good_reads_url.value %}
<a href="{{ title_form.good_reads_url.value }}">goodreads</a>
{% endif %}
{% if title_form.ereolen_url.value %}
<a href="{{ title_form.ereolen_url.value }}">ereolen</a>
{% endif %}
{% if title_form.biblo_dk_url.value %}
<a href="{{ title_form.biblo_dk_url.value }}">bibliotek.dk</a>
{% endif %}
</p>
{% csrf_token %}
<div class="grid addbook">
<div>
{{ author_form }}
</div>
<div>
{{ title_form }}
</div>
<div>
{{ read_form }}
{% if title_form.cover_url.value %}
<img class="cover" src="{{ title_form.cover_url.value }}">
{% endif %}
</div>
</div>
</form>
The data model
Here’s models.py with the embarrassing list of countries and languages (that I should have gotten from somewhere else) edited out:
from isbn_field import ISBNField
class Author(models.Model):
GENDER_CHOICES = [
('f', 'Female'),
('m', 'Male'),
('o', 'Other'),
]
DATA_QUALITY_CHOICES = [
('good', 'Good'),
('bad', 'Bad'),
('med', 'Medium'),
]
first_name = models.CharField('First name', max_length=500, blank=True)
last_name = models.CharField('Last name', max_length=500)
def __str__(self):
return self.first_name + " " + self.last_name
def get_titles(self):
return " & ".join([t.title for t in self.title_set.all()])
gender = models.CharField('Gender', choices=GENDER_CHOICES, max_length=1, blank=True)
birth_date = models.DateField(null=True, blank=True)
country = models.CharField('Country', choices=COUNTRY_CHOICES, max_length=2, blank=True)
biography = models.URLField('Biography url', max_length=500, blank=True)
data_quality = models.CharField('Datakvalitet', choices=DATA_QUALITY_CHOICES, max_length=4, blank=True)
class Meta:
ordering = ['last_name']
class Title(models.Model):
GENRE_CHOICES = [
('nf', 'Non-Fiction'),
('fi', 'Fiction'),
]
authors = models.ManyToManyField(Author)
def get_authors(self):
return " & ".join([t.first_name + " " + t.last_name for t in self.authors.all()])
get_authors.short_description = "Author(s)"
title = models.CharField('Title', max_length=500)
def __str__(self):
return self.title
read_language = models.CharField('Read in language', choices=LANGUAGE_CHOICES, max_length=2)
original_language = models.CharField('Original language', choices=LANGUAGE_CHOICES, max_length=2, blank=True)
original_title = models.CharField('Original title', max_length=500, blank=True)
genre = models.CharField('Overall genre', choices=GENRE_CHOICES, max_length=2)
publisher = models.CharField('Publisher', max_length=100, blank=True)
first_published = models.DateField(null=True, blank=True)
published_date = models.DateField(null=True, blank=True)
isbn = ISBNField(null=True, blank=True)
cover_filename = models.CharField('Cover filename', max_length=100, blank=True)
cover_url = models.URLField('Cover-url', max_length=500, blank=True)
pages = models.PositiveIntegerField(blank=True, null=True)
good_reads_url = models.URLField('Goodreads-url', max_length=500, blank=True)
ereolen_url = models.URLField('Ereolen-url', max_length=500, blank=True)
biblo_dk_url = models.URLField('Biblo-url', max_length=500, blank=True)
class Meta:
ordering = ['title']
class Read(models.Model):
title = models.ForeignKey(Title, on_delete=models.CASCADE)
date = models.DateField()
sort_order = models.PositiveIntegerField(blank=True, null=True)
The front page
The views.py function for the front page is short and sweet:
def index(request):
context = {}
context['request'] = request
reads = Read.objects.order_by('-date__year', 'date__month','sort_order','id').select_related('title')
context['reads'] = reads
context['months'] = [[i, calendar.month_abbr[i]] for i in range(1,13)]
return render(request, 'books/index.html', context)
And, while longer, I think the template loop is nice too, (although there is that clumsy nested loop):
{% regroup reads by date.year as years_list %}
{% for year, readings in years_list %}
<h2>{{ year }}</h2>
{% if year == 2015 %}
<p>I was on paternity leave most of this year which gave me time to read a lot, but not the mental surplus to register by month. This year I bought a Kindle which re-kindled (durr) my interest in reading.</p>
{% elif year == 2004 %}
<p>I was working in England from around September 2003 to February 2004. This gave me time to read a lot, but not the computer access at home necessary to register my reads precisely.</p>
{% elif year == 2003 %}
<p>The year I began registering my reads.</p>
{% elif year == 2002 %}
<p>This - and all years before - is from memory in 2003, so not really precise.</p>
{% endif %}
{% regroup readings by date.month as months_list %}
{% if year > 2004 and not year == 2015 %}
<div class="grid reads">
{% for month in months %}
<div class="flex">
<div>{{ month.1 }}</div>
{% for mon, reads in months_list %}
{% if mon == month.0 %}
{% for read in reads %}
<a title="{{ read.title }}" href="{% url 'books_book' read.title.id %}"><img class="frontcover" loading="lazy" src="{% static 'books/covers/150/' %}{{ read.title.id }}.webp"></a>
{% endfor %}
{% endif %}
{% endfor %}
</div>
{% endfor %}
</div>
{% else %}
{% for read in readings %}
<a href="{% url 'books_book' read.title.id %}"><img class="frontcover" loading="lazy" src="{% static 'books/covers/150/' %}{{ read.title.id }}.webp"></a>
{% endfor %}
{% endif %}
The statistics page
The charts on the statistics page are made with Chart.js which is so easy that you don’t even need to know Javascript.
Here’s the views.py function which could probably be sped up if I had any idea how (which I don’t):
def statistics(request):
context = {}
# All reads, used for lots of charts
reads = Read.objects.order_by('date__year').select_related('title').prefetch_related('title__authors')
context['reads'] = reads
# Books per year chart queryset
books_pages_per_year = Read.objects.values('date__year').annotate(Count('id'), Sum('title__pages'), Avg('title__pages')).order_by('date__year')
context['books_pages_per_year'] = books_pages_per_year
# Prepare year, value-dictionaries
genre_structure = {} # fiction vs. non-fiction
author_gender_structure = {} # male vs. female
author_birth_structure = {} # median age of authors
read_language_structure = {} # language of read
original_language_structure = {} # original language of read
language_choices = dict(Title.LANGUAGE_CHOICES) # look up dict for original languages
author_country_structure = {} # country of author
country_choices = dict(Author.COUNTRY_CHOICES)
book_age_structure = {} # median age of books
for read in reads:
year_of_read = read.date.year
# Put year keys in dictionaries
if not year_of_read in genre_structure: # check one = check all
genre_structure[year_of_read] = []
author_gender_structure[year_of_read] = []
author_birth_structure[year_of_read] = []
read_language_structure[year_of_read] = []
original_language_structure[year_of_read] = []
author_country_structure[year_of_read] = []
book_age_structure[year_of_read] = []
# Put values in dictionaries
if read.title.read_language == 'da' or read.title.read_language == 'en':
read_language_structure[year_of_read].append(read.title.read_language)
if read.title.original_language:
original_language_structure[year_of_read].append(language_choices[read.title.original_language])
if read.title.genre:
genre_structure[year_of_read].append(read.title.genre)
if read.title.first_published:
book_age_structure[year_of_read].append(read.title.first_published.year)
for author in read.title.authors.all():
if author.gender:
author_gender_structure[year_of_read].append(author.gender)
if author.birth_date:
author_birth_structure[year_of_read].append(author.birth_date.year)
if author.country:
author_country_structure[year_of_read].append(country_choices[author.country])
# Prepare datasets for charts
genres = {}
for year, genre_list in genre_structure.items():
number_of_titles = len(genre_list)
number_of_fiction_titles = sum(1 for genre in genre_list if genre == 'fi')
fiction_percentage = int(number_of_fiction_titles/number_of_titles*100)
non_fiction_percentage = 100 - fiction_percentage
genres[year] = [fiction_percentage, non_fiction_percentage]
context['genres'] = genres
median_author_age = {}
for year, birthyears in author_birth_structure.items():
birthyears = sorted(birthyears)
median_birthyear = birthyears[len(birthyears) // 2]
median_author_age[year] = year - median_birthyear
context['median_author_age'] = median_author_age
author_genders = {}
for year, genders in author_gender_structure.items():
number_of_authors = len(genders)
males = sum(1 for gender in genders if gender == 'm')
male_percentage = int(males/number_of_authors*100)
female_percentage = 100 - male_percentage
author_genders[year] = [male_percentage, female_percentage]
context['author_genders'] = author_genders
read_languages = {}
for year, languages in read_language_structure.items():
number_of_languages = len(languages)
danish = sum(1 for language in languages if language == 'da')
danish_percentage = int(danish / number_of_languages * 100)
english_percentage = 100 - danish_percentage
read_languages[year] = [danish_percentage, english_percentage]
context['read_languages'] = read_languages
original_languages = []
original_languages_years = []
for year, languages in original_language_structure.items():
if not year in original_languages_years:
original_languages_years.append(year)
for lang in languages:
if lang not in original_languages:
original_languages.append(lang)
original_languages_template = {}
for language in original_languages:
original_languages_template[language] = []
for year in original_languages_years:
count_of_language_in_year = sum(1 for lang in original_language_structure[year] if language == lang)
original_languages_template[language].append(count_of_language_in_year)
context['original_languages_template'] = original_languages_template
context['original_languages_years'] = original_languages_years
author_countries = []
author_countries_years = []
for year, countries in author_country_structure.items():
if not year in author_countries_years:
author_countries_years.append(year)
for country in countries:
if country not in author_countries:
author_countries.append(country)
author_countries_template = {}
for country in author_countries:
author_countries_template[country] = []
for year in author_countries_years:
count_of_country_in_year = sum(1 for countr in author_country_structure[year] if country == countr)
author_countries_template[country].append(count_of_country_in_year)
context['author_countries_template'] = author_countries_template
context['author_countries_years'] = author_countries_years
median_book_age = {}
for year, publish_years in book_age_structure.items():
publish_years = sorted(publish_years)
# account for no data in years
if len(publish_years) >= 2:
median_publish_year = publish_years[len(publish_years) // 2]
elif len(publish_years) == 1:
median_publish_year = publish_years[0]
else:
median_publish_year = 0
median_book_age[year] = year - median_publish_year
context['median_book_age'] = median_book_age
return render(request, 'books/statistics.html', context)
And a template example:
<div>
<h2>Reads per year</a>
<canvas id="books_per_year"></canvas>
</div>
<script>
var ctx = document.getElementById('books_per_year').getContext('2d');
var myChart = new Chart(ctx, {
type: 'bar',
data: {
labels: [{% for year in books_pages_per_year %}{% if not forloop.last %}{{ year.date__year }}, {% else %}{{ year.date__year }}{% endif %}{% endfor %}],
datasets: [{
label: 'Read',
data: [{% for year in books_pages_per_year %}{% if not forloop.last %}{{ year.id__count }}, {% else %}{{ year.id__count }}{% endif %}{% endfor %}],
backgroundColor: 'rgba(255, 99, 132, 0.2)',
borderColor: 'rgba(255, 99, 132, 1)',
borderWidth: 1
}]
},
options: {
tooltips: {
callbacks: {
label: function(tooltipItem, data) {
return data.datasets[tooltipItem.datasetIndex].label + ': ' + tooltipItem.value + ' books';
}
}
},
legend: {
display: false
},
responsive: true,
scales: {
yAxes: [{
ticks: {
beginAtZero: true
}
}]
}
}
});
</script>