Fixes bot

This commit is contained in:
2025-02-10 19:42:10 +01:00
parent 8b5b2bd887
commit 2604fc490e

85
bot.py
View File

@@ -1,14 +1,14 @@
# coding: utf-8
from telegram.ext import Updater
from telegram import Bot
from lxml import html
import requests
from lxml.etree import tostring as htmlstring
from urllib.parse import urljoin, urlparse
import time
import re
from dotenv import dotenv_values
import asyncio
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
@@ -16,32 +16,38 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
linkTemplate = '/html[1]/body[1]/div[2]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]'
#linkTemplate = '/html[1]/body[1]/div[2]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]'
linkTemplate = '/html[1]/body[1]/div[1]/div[4]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]'
debug = False
countStrPath = '/home/nenas/projects/python/hardwareChecker/countStr.txt' if debug else '/home/nenas/telegram/hardwareChecker/countStr.txt'
countStrPath = '/home/nenas/telegram/hardwareChecker/countStr.txt' if debug else '/home/nenas/telegram/hardwareChecker/countStr.txt'
def sendMessage(updater, channelID, message):
def sendMessage(bot, channelID, message):
if debug:
print(message)
else:
updater.bot.sendMessage(channelID, message)
loop = asyncio.get_event_loop()
loop.run_until_complete(bot.sendMessage(channelID, message))
def getNewContent():
fireFoxOptions = webdriver.FirefoxOptions()
fireFoxOptions.headless = True
fireFoxOptions.add_argument("--headless")
browser = webdriver.Firefox(options=fireFoxOptions)
browser.get('https://www.hardwareluxx.de/community/members/hwl-news-bot.268095/#recent-content')
for i in range(0, 3):
browser.get('https://www.hardwareluxx.de/community/members/hwl-news-bot.268095/#recent-content')
try:
WebDriverWait(browser, 120).until(EC.presence_of_element_located((By.XPATH, linkTemplate.format(index='1'))))
WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.XPATH, linkTemplate.format(index='1'))))
return browser
except TimeoutException:
if debug:
print("Timeout on WebDriverWait")
browser.quit()
return -1
def getLinksForID(browser, id):
elements = browser.find_elements_by_xpath(linkTemplate.format(index=(str(id))))
elements = browser.find_elements("xpath", linkTemplate.format(index=(str(id))))
links = [elem.get_attribute('href') for elem in elements]
if len(links) != 1:
@@ -50,7 +56,7 @@ def getLinksForID(browser, id):
def checkForUpdate(browser):
xpathStr = '//dt[contains(text(),\'Beiträge\')]/../dd/a'
count = browser.find_elements_by_xpath(xpathStr)
count = browser.find_elements("xpath", xpathStr)
if len(count) != 1:
return -1
@@ -86,34 +92,31 @@ def end(ret, browser):
def main():
config = dotenv_values(".env")
token = config["TOKEN"]
channelID = config["CHANNELID"]
updater = Updater(token)
token = "1495297410:AAGCwqqUZVdcc6RjnyWtl7XtO0K2tz4EpDQ"
channelID = '-1001418260700'
bot = Bot(token)
browser = getNewContent()
if browser == -1:
sendMessage(updater, channelID, 'Bot kaputt: Kann dynamic Content nicht laden')
# sendMessage(bot, channelID, 'Bot kaputt: Kann dynamic Content nicht laden, Hardwareluxx-Server sind im Eimer')
return -1
try:
ret = checkForUpdate(browser)
if debug:
print(f'Updates: {ret}')
if ret == 0:
return end(0, browser)
if (ret < 0):
sendMessage(updater, channelID, 'Bot kaputt: Kann static Content nicht laden')
sendMessage(bot, channelID, 'Bot kaputt: Kann static Content nicht laden')
return end(-1, browser)
links = []
for index in range(ret):
link = getLinksForID(browser, index + 1)
if link == -1:
sendMessage(updater, channelID, 'Bot kaputt: Kann Links nicht laden')
sendMessage(bot, channelID, 'Bot kaputt: Kann Links nicht laden')
return end(-1, browser)
links.insert(0, link)
@@ -121,7 +124,7 @@ def main():
linkStr = str(link)
post = re.findall(r'post-[0-9]*$', linkStr)
if len(post) > 1:
sendMessage(updater, channelID, 'Bot kaputt: Kann Post-ID nicht ermitteln: ' + linkStr)
sendMessage(bot, channelID, 'Bot kaputt: Kann Post-ID nicht ermitteln: ' + linkStr)
continue
if len(post) == 1:
postID = post[0][5:]
@@ -134,36 +137,42 @@ def main():
postElement = tree.xpath('//article[@id=\'js-post-'+ postID + '\']/div/div[2]/div/div/div/article/div[1]')
if len(postElement) != 1:
sendMessage(updater, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
sendMessage(bot, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
continue
postElementStr = str(htmlstring(postElement[0], encoding='unicode'))
match = re.search(r'.*Name: (.*)<br.*', postElementStr)
if match == None:
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
sendMessage(bot, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
continue
hwName = match.group(1)
match = re.search(r'.*Shop: (.*)<br.*', postElementStr)
if match == None:
sendMessage(updater, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
sendMessage(bot, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
continue
shop = match.group(1)
match = re.search(r'.*Preis: (.*)<br.*', postElementStr)
if match == None:
sendMessage(updater, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
sendMessage(bot, channelID, 'Bot kaputt: Kann Shop nicht capturen: ' + postElementStr)
continue
preis = match.group(1)
match = re.search(r'href\="([^"]+).*', postElementStr)
if match == None:
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
sendMessage(bot, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen: ' + postElementStr)
continue
url = match.group(1)
message = 'Name: ' + hwName + '\nPreis: ' + preis + '\nShop: ' + shop + '\nURL: ' + url
sendMessage(updater, channelID, message)
r = requests.head(url, allow_redirects=True)
clean_url = urljoin(r.url, urlparse(r.url).path)
if "notebooksbilliger" in r.url:
clean_url = r.url.split("?")[0]
if debug:
message = 'Name: ' + hwName + '\nPreis: ' + preis + '\nShop: ' + shop + '\nURL: ' + url + '\nresolved: ' + r.url + '\nnoQuery: ' + clean_url
else:
message = 'Name: ' + hwName + '\nPreis: ' + preis + '\nShop: ' + shop + '\nURL: ' + clean_url
sendMessage(bot, channelID, message)
else:
page = requests.get(linkStr)
@@ -171,21 +180,21 @@ def main():
postElement = tree.xpath('//a[contains(text(), \'weiterlesen\')]')
if len(postElement) != 1:
sendMessage(updater, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
sendMessage(bot, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln')
return end(-1, browser)
postElementStr = str(htmlstring(postElement[0], encoding='unicode'))
match = re.search(r'href\="([^"]+).*', postElementStr)
if match == None:
sendMessage(updater, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen.')
sendMessage(bot, channelID, 'Bot kaputt: Kann Hardwarename nicht capturen.')
continue
url = match.group(1)
sendMessage(updater, channelID, 'News: ' + url)
sendMessage(bot, channelID, 'News: ' + url)
return end(0, browser)
except Exception as e:
browser.close()
sendMessage(updater, channelID, str(e))
browser.quit()
sendMessage(bot, channelID, str(e))
if __name__ == "__main__":
main()