"""Основная логика RSS бота""" import asyncio import feedparser import re from datetime import datetime from email.utils import parsedate_to_datetime from typing import List, Dict, Any, Optional import logging from nio import AsyncClient from .config import Config from .history_manager import HistoryManager from .image_handler import ImageHandler logger = logging.getLogger(__name__) class RSSNewsBot: """Основной класс бота""" def __init__(self, config: Config): self.config = config self.client = AsyncClient(config.homeserver, config.bot_user_id) self.client.access_token = config.access_token self.history = HistoryManager( config.history_file, config.history_days, config.max_history_size ) self.image_handler = ImageHandler( config.images_dir, config.compress_images, config.max_image_size_mb, config.max_image_width, config.max_image_height ) self.cycle_counter = 0 async def fetch_rss(self, url: str, source_name: str, room_id: str) -> List[Dict[str, Any]]: """Загружает и парсит RSS-ленту""" try: feed = feedparser.parse(url) if feed.bozo: logger.warning(f"Ошибка парсинга {source_name}: {feed.bozo_exception}") return [] entries_with_metadata = [] for entry in feed.entries: image_url = self.image_handler.extract_from_entry(entry) entry_with_meta = { 'title': entry.get('title', 'Без заголовка'), 'link': entry.get('link', ''), 'summary': entry.get('summary', ''), 'published': entry.get('published', ''), 'source': source_name, 'room_id': room_id, 'image_url': image_url } entries_with_metadata.append(entry_with_meta) return entries_with_metadata except Exception as e: logger.error(f"Ошибка при загрузке {source_name}: {e}") return [] def format_news_message(self, entry: Dict[str, Any]) -> tuple[str, str]: """Форматирует новость с красивым HTML""" title = entry.get('title', 'Без заголовка') link = entry.get('link', '') summary = entry.get('summary', '') published = entry.get('published', '') # Очищаем HTML теги summary = re.sub(r'<[^>]+>', '', summary) if len(summary) > 300: summary = summary[:300] + "…" # Форматируем дату try: pub_date = parsedate_to_datetime(published) formatted_date = pub_date.strftime("%d %B %Y, %H:%M") except Exception: formatted_date = published # HTML версия html_message = f"""📰 {title}

{summary}

🕒 {formatted_date}
🔗 Читать полностью
""" # Plain text версия plain_message = f"📰 {title}\n\n{summary}\n\n🕒 {formatted_date}\n🔗 {link}" return plain_message, html_message async def send_news(self, room_id: str, entry: Dict[str, Any]) -> bool: """Отправляет новость с изображением (если есть)""" title = entry.get('title', 'Без заголовка') image_url = entry.get('image_url') link = entry.get('link', '') # Отправляем изображение если есть if image_url: logger.debug(f"Найдено изображение: {image_url[:80]}...") image_path = await self.image_handler.download(image_url, link) if image_path: success = await self.image_handler.upload_and_send( self.client, room_id, image_path ) if success: logger.debug("Изображение отправлено") else: logger.warning("Не удалось отправить изображение") # Отправляем текст новости plain_message, html_message = self.format_news_message(entry) retries = 3 while retries > 0: try: response = await self.client.room_send( room_id=room_id, message_type="m.room.message", content={ "msgtype": "m.text", "body": plain_message, "format": "org.matrix.custom.html", "formatted_body": html_message } ) if isinstance(response, tuple): response = response[0] if hasattr(response, 'event_id'): logger.debug(f"Текст отправлен: {title[:50]}") return True except Exception as e: error_msg = str(e).lower() if "429" in error_msg or "ratelimit" in error_msg: await asyncio.sleep(15) retries -= 1 else: logger.error(f"Ошибка отправки: {e}") return False return False async def check_and_send(self) -> None: """Основная логика: проверяем все ленты и отправляем новое""" self.cycle_counter += 1 logger.info(f"Цикл #{self.cycle_counter}") news_by_room: Dict[str, List[Dict[str, Any]]] = {} for source in self.config.sources: logger.debug(f"Проверяю: {source['name']}") entries = await self.fetch_rss( source["url"], source["name"], source["room_id"] ) new_entries = [] for entry in entries: link = entry.get('link', '') if link and not self.history.is_already_sent(link): room_id = entry.get('room_id') if room_id: try: published = entry.get('published', '') pub_date = parsedate_to_datetime(published) if published else datetime.now() entry['timestamp'] = pub_date except Exception: entry['timestamp'] = datetime.now() new_entries.append(entry) if new_entries: logger.info(f"Найдено {len(new_entries)} новых в {source['name']}") for entry in new_entries: room_id = entry.get('room_id') if room_id not in news_by_room: news_by_room[room_id] = [] news_by_room[room_id].append(entry) else: logger.debug(f"Новых нет в {source['name']}") # Сортируем и отправляем if news_by_room: for room_id, news_list in news_by_room.items(): news_list.sort(key=lambda x: x['timestamp']) logger.info(f"Отправка {len(news_list)} новостей в комнату") for i, entry in enumerate(news_list, 1): title = entry.get('title', '')[:50] logger.info(f"[{i}/{len(news_list)}]: {title}") success = await self.send_news(room_id, entry) if success: self.history.add(entry.get('link', ''), title) if i < len(news_list): await asyncio.sleep(self.config.delay_between_posts) # Сохраняем историю self.history.save() # Периодическая очистка if self.cycle_counter % self.config.cleanup_images_every == 0: await self.image_handler.clean() async def run(self) -> None: """Запускает бота""" logger.info("Запускаем RSS-бота...") try: await self.client.sync(timeout=3000) logger.info("Соединение с Matrix установлено") except Exception as e: logger.warning(f"Ошибка при подключении: {e}") logger.info("Бот запущен!") while True: start_time = asyncio.get_event_loop().time() try: await self.check_and_send() except Exception as e: logger.exception(f"Ошибка в основном цикле: {e}") elapsed = asyncio.get_event_loop().time() - start_time wait_time = max(0, self.config.check_interval - elapsed) if wait_time > 0: await asyncio.sleep(wait_time)