246 lines
9.5 KiB
Python
246 lines
9.5 KiB
Python
"""Основная логика RSS бота"""
|
||
|
||
import asyncio
|
||
import feedparser
|
||
import re
|
||
from datetime import datetime
|
||
from email.utils import parsedate_to_datetime
|
||
from typing import List, Dict, Any, Optional
|
||
import logging
|
||
from nio import AsyncClient
|
||
|
||
from .config import Config
|
||
from .history_manager import HistoryManager
|
||
from .image_handler import ImageHandler
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class RSSNewsBot:
|
||
"""Основной класс бота"""
|
||
|
||
def __init__(self, config: Config):
|
||
self.config = config
|
||
self.client = AsyncClient(config.homeserver, config.bot_user_id)
|
||
self.client.access_token = config.access_token
|
||
|
||
self.history = HistoryManager(
|
||
config.history_file,
|
||
config.history_days,
|
||
config.max_history_size
|
||
)
|
||
|
||
self.image_handler = ImageHandler(
|
||
config.images_dir,
|
||
config.compress_images,
|
||
config.max_image_size_mb,
|
||
config.max_image_width,
|
||
config.max_image_height
|
||
)
|
||
|
||
self.cycle_counter = 0
|
||
|
||
async def fetch_rss(self, url: str, source_name: str, room_id: str) -> List[Dict[str, Any]]:
|
||
"""Загружает и парсит RSS-ленту"""
|
||
try:
|
||
feed = feedparser.parse(url)
|
||
if feed.bozo:
|
||
logger.warning(f"Ошибка парсинга {source_name}: {feed.bozo_exception}")
|
||
return []
|
||
|
||
entries_with_metadata = []
|
||
for entry in feed.entries:
|
||
image_url = self.image_handler.extract_from_entry(entry)
|
||
|
||
entry_with_meta = {
|
||
'title': entry.get('title', 'Без заголовка'),
|
||
'link': entry.get('link', ''),
|
||
'summary': entry.get('summary', ''),
|
||
'published': entry.get('published', ''),
|
||
'source': source_name,
|
||
'room_id': room_id,
|
||
'image_url': image_url
|
||
}
|
||
entries_with_metadata.append(entry_with_meta)
|
||
|
||
return entries_with_metadata
|
||
|
||
except Exception as e:
|
||
logger.error(f"Ошибка при загрузке {source_name}: {e}")
|
||
return []
|
||
|
||
def format_news_message(self, entry: Dict[str, Any]) -> tuple[str, str]:
|
||
"""Форматирует новость с красивым HTML"""
|
||
title = entry.get('title', 'Без заголовка')
|
||
link = entry.get('link', '')
|
||
summary = entry.get('summary', '')
|
||
published = entry.get('published', '')
|
||
|
||
# Очищаем HTML теги
|
||
summary = re.sub(r'<[^>]+>', '', summary)
|
||
if len(summary) > 300:
|
||
summary = summary[:300] + "…"
|
||
|
||
# Форматируем дату
|
||
try:
|
||
pub_date = parsedate_to_datetime(published)
|
||
formatted_date = pub_date.strftime("%d %B %Y, %H:%M")
|
||
except Exception:
|
||
formatted_date = published
|
||
|
||
# HTML версия
|
||
html_message = f"""<b>📰 {title}</b><br/>
|
||
<br/>
|
||
{summary}<br/>
|
||
<br/>
|
||
🕒 <i>{formatted_date}</i><br/>
|
||
🔗 <a href="{link}">Читать полностью</a><br/>"""
|
||
|
||
# Plain text версия
|
||
plain_message = f"📰 {title}\n\n{summary}\n\n🕒 {formatted_date}\n🔗 {link}"
|
||
|
||
return plain_message, html_message
|
||
|
||
async def send_news(self, room_id: str, entry: Dict[str, Any]) -> bool:
|
||
"""Отправляет новость с изображением (если есть)"""
|
||
title = entry.get('title', 'Без заголовка')
|
||
image_url = entry.get('image_url')
|
||
link = entry.get('link', '')
|
||
|
||
# Отправляем изображение если есть
|
||
if image_url:
|
||
logger.debug(f"Найдено изображение: {image_url[:80]}...")
|
||
image_path = await self.image_handler.download(image_url, link)
|
||
|
||
if image_path:
|
||
success = await self.image_handler.upload_and_send(
|
||
self.client, room_id, image_path
|
||
)
|
||
if success:
|
||
logger.debug("Изображение отправлено")
|
||
else:
|
||
logger.warning("Не удалось отправить изображение")
|
||
|
||
# Отправляем текст новости
|
||
plain_message, html_message = self.format_news_message(entry)
|
||
|
||
retries = 3
|
||
while retries > 0:
|
||
try:
|
||
response = await self.client.room_send(
|
||
room_id=room_id,
|
||
message_type="m.room.message",
|
||
content={
|
||
"msgtype": "m.text",
|
||
"body": plain_message,
|
||
"format": "org.matrix.custom.html",
|
||
"formatted_body": html_message
|
||
}
|
||
)
|
||
|
||
if isinstance(response, tuple):
|
||
response = response[0]
|
||
|
||
if hasattr(response, 'event_id'):
|
||
logger.debug(f"Текст отправлен: {title[:50]}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
error_msg = str(e).lower()
|
||
if "429" in error_msg or "ratelimit" in error_msg:
|
||
await asyncio.sleep(15)
|
||
retries -= 1
|
||
else:
|
||
logger.error(f"Ошибка отправки: {e}")
|
||
return False
|
||
|
||
return False
|
||
|
||
async def check_and_send(self) -> None:
|
||
"""Основная логика: проверяем все ленты и отправляем новое"""
|
||
self.cycle_counter += 1
|
||
|
||
logger.info(f"Цикл #{self.cycle_counter}")
|
||
|
||
news_by_room: Dict[str, List[Dict[str, Any]]] = {}
|
||
|
||
for source in self.config.sources:
|
||
logger.debug(f"Проверяю: {source['name']}")
|
||
entries = await self.fetch_rss(
|
||
source["url"],
|
||
source["name"],
|
||
source["room_id"]
|
||
)
|
||
|
||
new_entries = []
|
||
for entry in entries:
|
||
link = entry.get('link', '')
|
||
if link and not self.history.is_already_sent(link):
|
||
room_id = entry.get('room_id')
|
||
if room_id:
|
||
try:
|
||
published = entry.get('published', '')
|
||
pub_date = parsedate_to_datetime(published) if published else datetime.now()
|
||
entry['timestamp'] = pub_date
|
||
except Exception:
|
||
entry['timestamp'] = datetime.now()
|
||
|
||
new_entries.append(entry)
|
||
|
||
if new_entries:
|
||
logger.info(f"Найдено {len(new_entries)} новых в {source['name']}")
|
||
for entry in new_entries:
|
||
room_id = entry.get('room_id')
|
||
if room_id not in news_by_room:
|
||
news_by_room[room_id] = []
|
||
news_by_room[room_id].append(entry)
|
||
else:
|
||
logger.debug(f"Новых нет в {source['name']}")
|
||
|
||
# Сортируем и отправляем
|
||
if news_by_room:
|
||
for room_id, news_list in news_by_room.items():
|
||
news_list.sort(key=lambda x: x['timestamp'])
|
||
logger.info(f"Отправка {len(news_list)} новостей в комнату")
|
||
|
||
for i, entry in enumerate(news_list, 1):
|
||
title = entry.get('title', '')[:50]
|
||
logger.info(f"[{i}/{len(news_list)}]: {title}")
|
||
|
||
success = await self.send_news(room_id, entry)
|
||
if success:
|
||
self.history.add(entry.get('link', ''), title)
|
||
|
||
if i < len(news_list):
|
||
await asyncio.sleep(self.config.delay_between_posts)
|
||
|
||
# Сохраняем историю
|
||
self.history.save()
|
||
|
||
# Периодическая очистка
|
||
if self.cycle_counter % self.config.cleanup_images_every == 0:
|
||
await self.image_handler.clean()
|
||
|
||
async def run(self) -> None:
|
||
"""Запускает бота"""
|
||
logger.info("Запускаем RSS-бота...")
|
||
|
||
try:
|
||
await self.client.sync(timeout=3000)
|
||
logger.info("Соединение с Matrix установлено")
|
||
except Exception as e:
|
||
logger.warning(f"Ошибка при подключении: {e}")
|
||
|
||
logger.info("Бот запущен!")
|
||
|
||
while True:
|
||
start_time = asyncio.get_event_loop().time()
|
||
try:
|
||
await self.check_and_send()
|
||
except Exception as e:
|
||
logger.exception(f"Ошибка в основном цикле: {e}")
|
||
|
||
elapsed = asyncio.get_event_loop().time() - start_time
|
||
wait_time = max(0, self.config.check_interval - elapsed)
|
||
if wait_time > 0:
|
||
await asyncio.sleep(wait_time) |