#!/usr/bin/env python import re import json import os from smtplib import SMTP_SSL from email.mime.text import MIMEText from requests_html import HTMLSession with open("config.json", "r") as fil: config = json.load(fil) session = HTMLSession() r = session.get("https://www.kaffee-netz.de/forums/private-angebote-maschinen-und-muehlen.14/") items = r.html.find("div.structItem-title") if not os.path.isfile("matches.json"): with open("matches.json", "w") as fil: json.dump({}, fil) with open("matches.json", "r") as fil: matches = json.load(fil) for item in items: title = item.text print("\nprocessing item", item, "with title", title) links = item.find("a") link = None for l in links: if not "threads/" in l.attrs["href"]: continue link = l.attrs["href"] if not l: print("could not find link for item", title) print("found link", link) found = False for kw in config["keywords"]: if re.match(kw, title, re.IGNORECASE): found = True break if not found: print("... no match") continue print("... match!") if link in matches: print("... is already known") continue matches[link] = "title" content = session.get("https://www.kaffee-netz.de"+link) description = content.html.find("div.bbWrapper", first=True).text mail_subject = "Neues Kaffeenetz Item: " + title mail_body = """ Neues Angebot im Kaffeenetz gefunden! %s: %s Beschreibung: %s """%(title, "https://www.kaffee-netz.de"+link, description) mime_body = MIMEText(mail_body.encode('utf-8'), _charset='utf-8') mime_body["Subject"] = "Neues Kaffeenetz Angebot: "+title mime_body["From"] = config["from_mail"] mime_body["To"] = config["to_mail"] print("Sending mail ...") with SMTP_SSL("mail.ghostdub.de") as smtp: smtp.login(config["smtp_user"], config["smtp_pass"]) smtp.sendmail(config["from_mail"], config["to_mail"], mime_body.as_string()) with open("matches.json", "w") as fil: json.dump(matches, fil)