From 2604fc490e74b4bf892b13d1814e5de6bd072d1c Mon Sep 17 00:00:00 2001 From: Andreas Siegling Date: Mon, 10 Feb 2025 19:42:10 +0100 Subject: [PATCH] Fixes bot --- bot.py | 93 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/bot.py b/bot.py index 78bfe90..3651167 100644 --- a/bot.py +++ b/bot.py @@ -1,14 +1,14 @@ # coding: utf-8 -from telegram.ext import Updater +from telegram import Bot from lxml import html import requests from lxml.etree import tostring as htmlstring +from urllib.parse import urljoin, urlparse import time import re - -from dotenv import dotenv_values +import asyncio from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait @@ -16,32 +16,38 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from selenium.common.exceptions import TimeoutException -linkTemplate = '/html[1]/body[1]/div[2]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]' - +#linkTemplate = '/html[1]/body[1]/div[2]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]' +linkTemplate = '/html[1]/body[1]/div[1]/div[4]/div[4]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[2]/div[1]/div[1]/ol[1]/li[{index}]/div[1]/div[1]/h3[1]/a[1]' debug = False -countStrPath = '/home/nenas/projects/python/hardwareChecker/countStr.txt' if debug else '/home/nenas/telegram/hardwareChecker/countStr.txt' +countStrPath = '/home/nenas/telegram/hardwareChecker/countStr.txt' if debug else '/home/nenas/telegram/hardwareChecker/countStr.txt' -def sendMessage(updater, channelID, message): + +def sendMessage(bot, channelID, message): if debug: print(message) else: - updater.bot.sendMessage(channelID, message) + loop = asyncio.get_event_loop() + loop.run_until_complete(bot.sendMessage(channelID, message)) def getNewContent(): fireFoxOptions = webdriver.FirefoxOptions() - fireFoxOptions.headless = True + fireFoxOptions.add_argument("--headless") browser = webdriver.Firefox(options=fireFoxOptions) - browser.get('https://www.hardwareluxx.de/community/members/hwl-news-bot.268095/#recent-content') - try: - WebDriverWait(browser, 120).until(EC.presence_of_element_located((By.XPATH, linkTemplate.format(index='1')))) - return browser - except TimeoutException: - return -1 + for i in range(0, 3): + browser.get('https://www.hardwareluxx.de/community/members/hwl-news-bot.268095/#recent-content') + try: + WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.XPATH, linkTemplate.format(index='1')))) + return browser + except TimeoutException: + if debug: + print("Timeout on WebDriverWait") + browser.quit() + return -1 def getLinksForID(browser, id): - elements = browser.find_elements_by_xpath(linkTemplate.format(index=(str(id)))) + elements = browser.find_elements("xpath", linkTemplate.format(index=(str(id)))) links = [elem.get_attribute('href') for elem in elements] if len(links) != 1: @@ -50,7 +56,7 @@ def getLinksForID(browser, id): def checkForUpdate(browser): xpathStr = '//dt[contains(text(),\'Beiträge\')]/../dd/a' - count = browser.find_elements_by_xpath(xpathStr) + count = browser.find_elements("xpath", xpathStr) if len(count) != 1: return -1 @@ -86,34 +92,31 @@ def end(ret, browser): def main(): - config = dotenv_values(".env") - - - token = config["TOKEN"] - channelID = config["CHANNELID"] - - updater = Updater(token) + token = "1495297410:AAGCwqqUZVdcc6RjnyWtl7XtO0K2tz4EpDQ" + channelID = '-1001418260700' + bot = Bot(token) browser = getNewContent() if browser == -1: - sendMessage(updater, channelID, 'Bot kaputt: Kann dynamic Content nicht laden') +# sendMessage(bot, channelID, 'Bot kaputt: Kann dynamic Content nicht laden, Hardwareluxx-Server sind im Eimer') return -1 try: ret = checkForUpdate(browser) - + if debug: + print(f'Updates: {ret}') if ret == 0: return end(0, browser) if (ret < 0): - sendMessage(updater, channelID, 'Bot kaputt: Kann static Content nicht laden') + sendMessage(bot, channelID, 'Bot kaputt: Kann static Content nicht laden') return end(-1, browser) links = [] for index in range(ret): link = getLinksForID(browser, index + 1) if link == -1: - sendMessage(updater, channelID, 'Bot kaputt: Kann Links nicht laden') + sendMessage(bot, channelID, 'Bot kaputt: Kann Links nicht laden') return end(-1, browser) links.insert(0, link) @@ -121,7 +124,7 @@ def main(): linkStr = str(link) post = re.findall(r'post-[0-9]*$', linkStr) if len(post) > 1: - sendMessage(updater, channelID, 'Bot kaputt: Kann Post-ID nicht ermitteln: ' + linkStr) + sendMessage(bot, channelID, 'Bot kaputt: Kann Post-ID nicht ermitteln: ' + linkStr) continue if len(post) == 1: postID = post[0][5:] @@ -134,36 +137,42 @@ def main(): postElement = tree.xpath('//article[@id=\'js-post-'+ postID + '\']/div/div[2]/div/div/div/article/div[1]') if len(postElement) != 1: - sendMessage(updater, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln') + sendMessage(bot, channelID, 'Bot kaputt: Kann Post für ID=' + postID + ' nicht ermitteln') continue postElementStr = str(htmlstring(postElement[0], encoding='unicode')) match = re.search(r'.*Name: (.*)