первая публикация бота
This commit is contained in:
246
bot/rss_bot.py
Normal file
246
bot/rss_bot.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""Основная логика RSS бота"""
|
||||
|
||||
import asyncio
|
||||
import feedparser
|
||||
import re
|
||||
from datetime import datetime
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
from nio import AsyncClient
|
||||
|
||||
from .config import Config
|
||||
from .history_manager import HistoryManager
|
||||
from .image_handler import ImageHandler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RSSNewsBot:
|
||||
"""Основной класс бота"""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
self.config = config
|
||||
self.client = AsyncClient(config.homeserver, config.bot_user_id)
|
||||
self.client.access_token = config.access_token
|
||||
|
||||
self.history = HistoryManager(
|
||||
config.history_file,
|
||||
config.history_days,
|
||||
config.max_history_size
|
||||
)
|
||||
|
||||
self.image_handler = ImageHandler(
|
||||
config.images_dir,
|
||||
config.compress_images,
|
||||
config.max_image_size_mb,
|
||||
config.max_image_width,
|
||||
config.max_image_height
|
||||
)
|
||||
|
||||
self.cycle_counter = 0
|
||||
|
||||
async def fetch_rss(self, url: str, source_name: str, room_id: str) -> List[Dict[str, Any]]:
|
||||
"""Загружает и парсит RSS-ленту"""
|
||||
try:
|
||||
feed = feedparser.parse(url)
|
||||
if feed.bozo:
|
||||
logger.warning(f"Ошибка парсинга {source_name}: {feed.bozo_exception}")
|
||||
return []
|
||||
|
||||
entries_with_metadata = []
|
||||
for entry in feed.entries:
|
||||
image_url = self.image_handler.extract_from_entry(entry)
|
||||
|
||||
entry_with_meta = {
|
||||
'title': entry.get('title', 'Без заголовка'),
|
||||
'link': entry.get('link', ''),
|
||||
'summary': entry.get('summary', ''),
|
||||
'published': entry.get('published', ''),
|
||||
'source': source_name,
|
||||
'room_id': room_id,
|
||||
'image_url': image_url
|
||||
}
|
||||
entries_with_metadata.append(entry_with_meta)
|
||||
|
||||
return entries_with_metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка при загрузке {source_name}: {e}")
|
||||
return []
|
||||
|
||||
def format_news_message(self, entry: Dict[str, Any]) -> tuple[str, str]:
|
||||
"""Форматирует новость с красивым HTML"""
|
||||
title = entry.get('title', 'Без заголовка')
|
||||
link = entry.get('link', '')
|
||||
summary = entry.get('summary', '')
|
||||
published = entry.get('published', '')
|
||||
|
||||
# Очищаем HTML теги
|
||||
summary = re.sub(r'<[^>]+>', '', summary)
|
||||
if len(summary) > 300:
|
||||
summary = summary[:300] + "…"
|
||||
|
||||
# Форматируем дату
|
||||
try:
|
||||
pub_date = parsedate_to_datetime(published)
|
||||
formatted_date = pub_date.strftime("%d %B %Y, %H:%M")
|
||||
except Exception:
|
||||
formatted_date = published
|
||||
|
||||
# HTML версия
|
||||
html_message = f"""<b>📰 {title}</b><br/>
|
||||
<br/>
|
||||
{summary}<br/>
|
||||
<br/>
|
||||
🕒 <i>{formatted_date}</i><br/>
|
||||
🔗 <a href="{link}">Читать полностью</a><br/>"""
|
||||
|
||||
# Plain text версия
|
||||
plain_message = f"📰 {title}\n\n{summary}\n\n🕒 {formatted_date}\n🔗 {link}"
|
||||
|
||||
return plain_message, html_message
|
||||
|
||||
async def send_news(self, room_id: str, entry: Dict[str, Any]) -> bool:
|
||||
"""Отправляет новость с изображением (если есть)"""
|
||||
title = entry.get('title', 'Без заголовка')
|
||||
image_url = entry.get('image_url')
|
||||
link = entry.get('link', '')
|
||||
|
||||
# Отправляем изображение если есть
|
||||
if image_url:
|
||||
logger.debug(f"Найдено изображение: {image_url[:80]}...")
|
||||
image_path = await self.image_handler.download(image_url, link)
|
||||
|
||||
if image_path:
|
||||
success = await self.image_handler.upload_and_send(
|
||||
self.client, room_id, image_path
|
||||
)
|
||||
if success:
|
||||
logger.debug("Изображение отправлено")
|
||||
else:
|
||||
logger.warning("Не удалось отправить изображение")
|
||||
|
||||
# Отправляем текст новости
|
||||
plain_message, html_message = self.format_news_message(entry)
|
||||
|
||||
retries = 3
|
||||
while retries > 0:
|
||||
try:
|
||||
response = await self.client.room_send(
|
||||
room_id=room_id,
|
||||
message_type="m.room.message",
|
||||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": plain_message,
|
||||
"format": "org.matrix.custom.html",
|
||||
"formatted_body": html_message
|
||||
}
|
||||
)
|
||||
|
||||
if isinstance(response, tuple):
|
||||
response = response[0]
|
||||
|
||||
if hasattr(response, 'event_id'):
|
||||
logger.debug(f"Текст отправлен: {title[:50]}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
if "429" in error_msg or "ratelimit" in error_msg:
|
||||
await asyncio.sleep(15)
|
||||
retries -= 1
|
||||
else:
|
||||
logger.error(f"Ошибка отправки: {e}")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
async def check_and_send(self) -> None:
|
||||
"""Основная логика: проверяем все ленты и отправляем новое"""
|
||||
self.cycle_counter += 1
|
||||
|
||||
logger.info(f"Цикл #{self.cycle_counter}")
|
||||
|
||||
news_by_room: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
for source in self.config.sources:
|
||||
logger.debug(f"Проверяю: {source['name']}")
|
||||
entries = await self.fetch_rss(
|
||||
source["url"],
|
||||
source["name"],
|
||||
source["room_id"]
|
||||
)
|
||||
|
||||
new_entries = []
|
||||
for entry in entries:
|
||||
link = entry.get('link', '')
|
||||
if link and not self.history.is_already_sent(link):
|
||||
room_id = entry.get('room_id')
|
||||
if room_id:
|
||||
try:
|
||||
published = entry.get('published', '')
|
||||
pub_date = parsedate_to_datetime(published) if published else datetime.now()
|
||||
entry['timestamp'] = pub_date
|
||||
except Exception:
|
||||
entry['timestamp'] = datetime.now()
|
||||
|
||||
new_entries.append(entry)
|
||||
|
||||
if new_entries:
|
||||
logger.info(f"Найдено {len(new_entries)} новых в {source['name']}")
|
||||
for entry in new_entries:
|
||||
room_id = entry.get('room_id')
|
||||
if room_id not in news_by_room:
|
||||
news_by_room[room_id] = []
|
||||
news_by_room[room_id].append(entry)
|
||||
else:
|
||||
logger.debug(f"Новых нет в {source['name']}")
|
||||
|
||||
# Сортируем и отправляем
|
||||
if news_by_room:
|
||||
for room_id, news_list in news_by_room.items():
|
||||
news_list.sort(key=lambda x: x['timestamp'])
|
||||
logger.info(f"Отправка {len(news_list)} новостей в комнату")
|
||||
|
||||
for i, entry in enumerate(news_list, 1):
|
||||
title = entry.get('title', '')[:50]
|
||||
logger.info(f"[{i}/{len(news_list)}]: {title}")
|
||||
|
||||
success = await self.send_news(room_id, entry)
|
||||
if success:
|
||||
self.history.add(entry.get('link', ''), title)
|
||||
|
||||
if i < len(news_list):
|
||||
await asyncio.sleep(self.config.delay_between_posts)
|
||||
|
||||
# Сохраняем историю
|
||||
self.history.save()
|
||||
|
||||
# Периодическая очистка
|
||||
if self.cycle_counter % self.config.cleanup_images_every == 0:
|
||||
await self.image_handler.clean()
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Запускает бота"""
|
||||
logger.info("Запускаем RSS-бота...")
|
||||
|
||||
try:
|
||||
await self.client.sync(timeout=3000)
|
||||
logger.info("Соединение с Matrix установлено")
|
||||
except Exception as e:
|
||||
logger.warning(f"Ошибка при подключении: {e}")
|
||||
|
||||
logger.info("Бот запущен!")
|
||||
|
||||
while True:
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
try:
|
||||
await self.check_and_send()
|
||||
except Exception as e:
|
||||
logger.exception(f"Ошибка в основном цикле: {e}")
|
||||
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
wait_time = max(0, self.config.check_interval - elapsed)
|
||||
if wait_time > 0:
|
||||
await asyncio.sleep(wait_time)
|
||||
Reference in New Issue
Block a user