commit
8ee3e13734
@ -0,0 +1,12 @@
|
||||
{
|
||||
"keywords": [
|
||||
".*mara.*",
|
||||
".*xenia.*",
|
||||
".*bz.*10.*"
|
||||
],
|
||||
"to_mail": "dario@kanojo.de",
|
||||
"from_mail": "",
|
||||
"smtp_server": "mail.ghostdub.de",
|
||||
"smtp_user": "",
|
||||
"smtp_pass": ""
|
||||
}
|
||||
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import json
|
||||
import os
|
||||
from smtplib import SMTP_SSL
|
||||
from email.mime.text import MIMEText
|
||||
from requests_html import HTMLSession
|
||||
|
||||
|
||||
with open("config.json", "r") as fil:
|
||||
config = json.load(fil)
|
||||
|
||||
session = HTMLSession()
|
||||
r = session.get("https://www.kaffee-netz.de/forums/private-angebote-maschinen-und-muehlen.14/")
|
||||
items = r.html.find("div.structItem-title")
|
||||
|
||||
if not os.path.isfile("matches.json"):
|
||||
with open("matches.json", "w") as fil:
|
||||
json.dump({}, fil)
|
||||
|
||||
with open("matches.json", "r") as fil:
|
||||
matches = json.load(fil)
|
||||
|
||||
for item in items:
|
||||
title = item.text
|
||||
print("\nprocessing item", item, "with title", title)
|
||||
|
||||
links = item.find("a")
|
||||
link = None
|
||||
for l in links:
|
||||
if not "threads/" in l.attrs["href"]:
|
||||
continue
|
||||
link = l.attrs["href"]
|
||||
if not l:
|
||||
print("could not find link for item", title)
|
||||
|
||||
print("found link", link)
|
||||
|
||||
found = False
|
||||
for kw in config["keywords"]:
|
||||
if re.match(kw, title, re.IGNORECASE):
|
||||
found = True
|
||||
break
|
||||
|
||||
if not found:
|
||||
print("... no match")
|
||||
continue
|
||||
|
||||
print("... match!")
|
||||
if link in matches:
|
||||
print("... is already known")
|
||||
continue
|
||||
|
||||
matches[link] = "title"
|
||||
|
||||
content = session.get("https://www.kaffee-netz.de"+link)
|
||||
description = content.html.find("div.bbWrapper", first=True).text
|
||||
|
||||
mail_subject = "Neues Kaffeenetz Item: " + title
|
||||
mail_body = """
|
||||
Neues Angebot im Kaffeenetz gefunden!
|
||||
|
||||
%s: %s
|
||||
|
||||
Beschreibung:
|
||||
%s
|
||||
"""%(title, "https://www.kaffee-netz.de"+link, description)
|
||||
|
||||
mime_body = MIMEText(mail_body.encode('utf-8'), _charset='utf-8')
|
||||
mime_body["Subject"] = "Neues Kaffeenetz Angebot: "+title
|
||||
mime_body["From"] = config["from_mail"]
|
||||
mime_body["To"] = config["to_mail"]
|
||||
|
||||
print("Sending mail ...")
|
||||
with SMTP_SSL("mail.ghostdub.de") as smtp:
|
||||
smtp.login(config["smtp_user"], config["smtp_pass"])
|
||||
smtp.sendmail(config["from_mail"], config["to_mail"], mime_body.as_string())
|
||||
|
||||
with open("matches.json", "w") as fil:
|
||||
json.dump(matches, fil)
|
||||
@ -0,0 +1,2 @@
|
||||
lxml_html_clean==0.4.1
|
||||
requests-html==0.10.0
|
||||
Loading…
Reference in new issue