Init
This commit is contained in:
191
bot.py
Normal file
191
bot.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# coding: utf-8
|
||||
|
||||
from telegram.ext import Updater
|
||||
from lxml import html
|
||||
import requests
|
||||
from lxml.etree import tostring as htmlstring
|
||||
|
||||
import time
|
||||
import re
|
||||
|
||||
from dotenv import dotenv_values
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
|
||||
linkTemplate = '/html[1]/body[1]/div[2]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]'
|
||||
|
||||
debug = False
|
||||
countStrPath = '/home/nenas/projects/python/hardwareChecker/countStr.txt' if debug else '/home/nenas/telegram/hardwareChecker/countStr.txt'
|
||||
|
||||
def sendMessage(updater, channelID, message):
|
||||
if debug:
|
||||
print(message)
|
||||
else:
|
||||
updater.bot.sendMessage(channelID, message)
|
||||
|
||||
def getNewContent():
|
||||
fireFoxOptions = webdriver.FirefoxOptions()
|
||||
fireFoxOptions.headless = True
|
||||
browser = webdriver.Firefox(options=fireFoxOptions)
|
||||
|
||||
browser.get('https://www.hardwareluxx.de/community/members/hwl-news-bot.268095/#recent-content')
|
||||
|
||||
try:
|
||||
WebDriverWait(browser, 120).until(EC.presence_of_element_located((By.XPATH, linkTemplate.format(index='1'))))
|
||||
return browser
|
||||
except TimeoutException:
|
||||
return -1
|
||||
|
||||
def getLinksForID(browser, id):
|
||||
elements = browser.find_elements_by_xpath(linkTemplate.format(index=(str(id))))
|
||||
links = [elem.get_attribute('href') for elem in elements]
|
||||
|
||||
if len(links) != 1:
|
||||
return -1
|
||||
return links[0]
|
||||
|
||||
def checkForUpdate(browser):
|
||||
xpathStr = '//dt[contains(text(),\'Beiträge\')]/../dd/a'
|
||||
count = browser.find_elements_by_xpath(xpathStr)
|
||||
|
||||
if len(count) != 1:
|
||||
return -1
|
||||
|
||||
|
||||
countStr = count[0].text.replace('\n', '')
|
||||
countStr = countStr.replace('.', '')
|
||||
|
||||
count = int(countStr)
|
||||
|
||||
line = ''
|
||||
|
||||
with open (countStrPath, 'r+') as countStrFile:
|
||||
line = countStrFile.readline()
|
||||
|
||||
if line == '':
|
||||
fileCount = -1
|
||||
else:
|
||||
fileCount = int(line)
|
||||
|
||||
if count != fileCount:
|
||||
with open (countStrPath, 'w') as countStrFile:
|
||||
countStrFile.write(countStr)
|
||||
if (count - fileCount) < 0:
|
||||
return 0
|
||||
else:
|
||||
return count - fileCount
|
||||
return 0
|
||||
|
||||
def end(ret, browser):
|
||||
browser.quit()
|
||||
return ret
|
||||
|
||||
def main():
|
||||
|
||||
config = dotenv_values(".env")
|
||||
|
||||
|
||||
token = config["TOKEN"]
|
||||
channelID = config["CHANNELID"]
|
||||
|
||||
updater = Updater(token)
|
||||
|
||||
browser = getNewContent()
|
||||
if browser == -1:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann dynamic Content nicht laden')
|
||||
return -1
|
||||
try:
|
||||
ret = checkForUpdate(browser)
|
||||
|
||||
if ret == 0:
|
||||
return end(0, browser)
|
||||
|
||||
|
||||
if (ret < 0):
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann static Content nicht laden')
|
||||
return end(-1, browser)
|
||||
|
||||
links = []
|
||||
for index in range(ret):
|
||||
link = getLinksForID(browser, index + 1)
|
||||
if link == -1:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Links nicht laden')
|
||||
return end(-1, browser)
|
||||
links.insert(0, link)
|
||||
|
||||
for link in links:
|
||||
linkStr = str(link)
|
||||
post = re.findall(r'post-[0-9]*$', linkStr)
|
||||
if len(post) > 1:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Post-ID nicht ermitteln: ' + linkStr)
|
||||
continue
|
||||
if len(post) == 1:
|
||||
postID = post[0][5:]
|
||||
|
||||
linkToPost = 'https://www.hardwareluxx.de/community/posts/' + postID + '/'
|
||||
|
||||
page = requests.get(linkToPost)
|
||||
|
||||
tree = html.fromstring(page.content)
|
||||
|
||||
postElement = tree.xpath('//article[@id=\'js-post-'+ postID + '\']/div/div[2]/div/div/div/article/div[1]')
|
||||
if len(postElement) != 1:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
|
||||
continue
|
||||
postElementStr = str(htmlstring(postElement[0], encoding='unicode'))
|
||||
|
||||
match = re.search(r'.*Name: (.*)<br.*', postElementStr)
|
||||
if match == None:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
|
||||
continue
|
||||
hwName = match.group(1)
|
||||
|
||||
match = re.search(r'.*Shop: (.*)<br.*', postElementStr)
|
||||
if match == None:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
|
||||
continue
|
||||
shop = match.group(1)
|
||||
|
||||
match = re.search(r'.*Preis: (.*)<br.*', postElementStr)
|
||||
if match == None:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
|
||||
continue
|
||||
preis = match.group(1)
|
||||
|
||||
match = re.search(r'href\="([^"]+).*', postElementStr)
|
||||
if match == None:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
|
||||
continue
|
||||
url = match.group(1)
|
||||
|
||||
message = 'Name: ' + hwName + '\nPreis: ' + preis + '\nShop: ' + shop + '\nURL: ' + url
|
||||
sendMessage(updater, channelID, message)
|
||||
else:
|
||||
page = requests.get(linkStr)
|
||||
|
||||
tree = html.fromstring(page.content)
|
||||
|
||||
postElement = tree.xpath('//a[contains(text(), \'weiterlesen\')]')
|
||||
if len(postElement) != 1:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
|
||||
return end(-1, browser)
|
||||
postElementStr = str(htmlstring(postElement[0], encoding='unicode'))
|
||||
|
||||
match = re.search(r'href\="([^"]+).*', postElementStr)
|
||||
if match == None:
|
||||
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen.')
|
||||
continue
|
||||
url = match.group(1)
|
||||
sendMessage(updater, channelID, 'News: ' + url)
|
||||
return end(0, browser)
|
||||
|
||||
except Exception as e:
|
||||
browser.close()
|
||||
sendMessage(updater, channelID, str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user