первая публикация бота
This commit is contained in:
5
bot/__init__.py
Normal file
5
bot/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Matrix RSS Bot - бот для публикации RSS новостей в Matrix чаты"""
|
||||
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "iSlipper"
|
||||
__license__ = "MIT"
|
||||
115
bot/config.py
Normal file
115
bot/config.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""Модуль для работы с конфигурацией"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
class Config:
|
||||
"""Класс для хранения и загрузки конфигурации"""
|
||||
|
||||
def __init__(self, config_path: str = "config/config.yaml"):
|
||||
self.config_path = Path(config_path)
|
||||
self.sources_path = Path("config/sources.yaml")
|
||||
self.config: Dict[str, Any] = {}
|
||||
self.sources: List[Dict[str, str]] = []
|
||||
|
||||
self._load_config()
|
||||
self._load_sources()
|
||||
|
||||
def _load_config(self) -> None:
|
||||
"""Загружает основной конфиг"""
|
||||
if not self.config_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Конфиг не найден: {self.config_path}\n"
|
||||
f"Скопируйте config/config.example.yaml в config/config.yaml и настройте его"
|
||||
)
|
||||
|
||||
with open(self.config_path, 'r', encoding='utf-8') as f:
|
||||
self.config = yaml.safe_load(f)
|
||||
|
||||
# Проверяем обязательные поля
|
||||
required_fields = ['homeserver', 'bot_user_id', 'access_token']
|
||||
for field in required_fields:
|
||||
if field not in self.config:
|
||||
raise ValueError(f"В конфиге отсутствует обязательное поле: {field}")
|
||||
|
||||
def _load_sources(self) -> None:
|
||||
"""Загружает список RSS источников"""
|
||||
if not self.sources_path.exists():
|
||||
raise FileNotFoundError(
|
||||
f"Файл с источниками не найден: {self.sources_path}\n"
|
||||
f"Скопируйте config/sources.example.yaml в config/sources.yaml и настройте его"
|
||||
)
|
||||
|
||||
with open(self.sources_path, 'r', encoding='utf-8') as f:
|
||||
data = yaml.safe_load(f)
|
||||
self.sources = data.get('sources', [])
|
||||
|
||||
if not self.sources:
|
||||
raise ValueError("Не найдено ни одного RSS источника в конфиге")
|
||||
|
||||
@property
|
||||
def homeserver(self) -> str:
|
||||
return self.config['homeserver']
|
||||
|
||||
@property
|
||||
def bot_user_id(self) -> str:
|
||||
return self.config['bot_user_id']
|
||||
|
||||
@property
|
||||
def access_token(self) -> str:
|
||||
return self.config['access_token']
|
||||
|
||||
@property
|
||||
def check_interval(self) -> int:
|
||||
return self.config.get('check_interval', 600)
|
||||
|
||||
@property
|
||||
def delay_between_posts(self) -> int:
|
||||
return self.config.get('delay_between_posts', 1)
|
||||
|
||||
@property
|
||||
def history_file(self) -> str:
|
||||
return self.config.get('history_file', 'data/sent_history.json')
|
||||
|
||||
@property
|
||||
def history_days(self) -> int:
|
||||
return self.config.get('history_days', 15)
|
||||
|
||||
@property
|
||||
def max_history_size(self) -> int:
|
||||
return self.config.get('max_history_size', 2000)
|
||||
|
||||
@property
|
||||
def images_dir(self) -> str:
|
||||
return self.config.get('images_dir', 'data/news_images')
|
||||
|
||||
@property
|
||||
def cleanup_images_every(self) -> int:
|
||||
return self.config.get('cleanup_images_every', 144)
|
||||
|
||||
@property
|
||||
def compress_images(self) -> bool:
|
||||
return self.config.get('compress_images', True)
|
||||
|
||||
@property
|
||||
def max_image_size_mb(self) -> float:
|
||||
return self.config.get('max_image_size_mb', 0.5)
|
||||
|
||||
@property
|
||||
def max_image_width(self) -> int:
|
||||
return self.config.get('max_image_width', 1200)
|
||||
|
||||
@property
|
||||
def max_image_height(self) -> int:
|
||||
return self.config.get('max_image_height', 1200)
|
||||
|
||||
@property
|
||||
def log_level(self) -> str:
|
||||
return self.config.get('log_level', 'INFO')
|
||||
|
||||
@property
|
||||
def log_file(self) -> Optional[str]:
|
||||
return self.config.get('log_file', None)
|
||||
94
bot/history_manager.py
Normal file
94
bot/history_manager.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Управление историей отправленных новостей"""
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Optional
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HistoryManager:
|
||||
"""Менеджер истории отправленных новостей"""
|
||||
|
||||
def __init__(self, history_file: str, history_days: int, max_history_size: int):
|
||||
self.history_file = Path(history_file)
|
||||
self.history_days = history_days
|
||||
self.max_history_size = max_history_size
|
||||
self.sent_history: Dict[str, dict] = {}
|
||||
|
||||
# Создаем директорию для файла истории
|
||||
self.history_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
"""Загружает историю из файла"""
|
||||
try:
|
||||
if self.history_file.exists():
|
||||
with open(self.history_file, 'r', encoding='utf-8') as f:
|
||||
self.sent_history = json.load(f)
|
||||
logger.info(f"Загружено {len(self.sent_history)} записей из истории")
|
||||
else:
|
||||
logger.info("Файл с историей не найден, создаю новый")
|
||||
self.sent_history = {}
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Файл истории повреждён, создаю новый")
|
||||
self.sent_history = {}
|
||||
|
||||
def save(self) -> None:
|
||||
"""Сохраняет историю в файл"""
|
||||
try:
|
||||
with open(self.history_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.sent_history, f, indent=2, ensure_ascii=False, default=str)
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка сохранения истории: {e}")
|
||||
|
||||
def add(self, url: str, title: str) -> None:
|
||||
"""Добавляет ссылку в историю"""
|
||||
self.sent_history[url] = {
|
||||
"date": datetime.now().isoformat(),
|
||||
"title": title[:100]
|
||||
}
|
||||
|
||||
def is_already_sent(self, url: str) -> bool:
|
||||
"""Проверяет, была ли ссылка уже отправлена"""
|
||||
return url in self.sent_history
|
||||
|
||||
def clean_old(self, force: bool = False) -> None:
|
||||
"""Удаляет старые записи из истории"""
|
||||
if not self.sent_history:
|
||||
return
|
||||
|
||||
original_size = len(self.sent_history)
|
||||
now = datetime.now()
|
||||
cutoff_date = now - timedelta(days=self.history_days)
|
||||
|
||||
new_history = {}
|
||||
for url, data in self.sent_history.items():
|
||||
try:
|
||||
if isinstance(data, str):
|
||||
timestamp = datetime.fromisoformat(data)
|
||||
if timestamp > cutoff_date:
|
||||
new_history[url] = {"date": data, "title": "unknown"}
|
||||
else:
|
||||
timestamp = datetime.fromisoformat(data['date'])
|
||||
if timestamp > cutoff_date:
|
||||
new_history[url] = data
|
||||
except (ValueError, KeyError):
|
||||
pass
|
||||
|
||||
self.sent_history = new_history
|
||||
|
||||
# Ограничиваем размер
|
||||
if len(self.sent_history) > self.max_history_size:
|
||||
sorted_items = sorted(
|
||||
self.sent_history.items(),
|
||||
key=lambda x: x[1]['date'] if isinstance(x[1], dict) else x[1],
|
||||
reverse=True
|
||||
)
|
||||
self.sent_history = dict(sorted_items[:self.max_history_size])
|
||||
|
||||
if original_size != len(self.sent_history) or force:
|
||||
self.save()
|
||||
273
bot/image_handler.py
Normal file
273
bot/image_handler.py
Normal file
@@ -0,0 +1,273 @@
|
||||
"""Обработка изображений: скачивание, сжатие, отправка"""
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
import logging
|
||||
import aiohttp
|
||||
from PIL import Image
|
||||
from nio import AsyncClient, UploadResponse, RoomSendResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImageHandler:
|
||||
"""Обработчик изображений для Matrix"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
images_dir: str,
|
||||
compress: bool = True,
|
||||
max_size_mb: float = 0.5,
|
||||
max_width: int = 1200,
|
||||
max_height: int = 1200
|
||||
):
|
||||
self.images_dir = Path(images_dir)
|
||||
self.compress = compress
|
||||
self.max_size_mb = max_size_mb
|
||||
self.max_width = max_width
|
||||
self.max_height = max_height
|
||||
|
||||
self.images_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def extract_from_entry(self, entry) -> Optional[str]:
|
||||
"""Извлекает URL изображения из записи RSS"""
|
||||
|
||||
# 1. Проверяем стандартные поля RSS
|
||||
if hasattr(entry, 'media_content') and entry.media_content:
|
||||
for media in entry.media_content:
|
||||
if 'url' in media and 'image' in media.get('type', ''):
|
||||
return media['url']
|
||||
|
||||
if hasattr(entry, 'enclosures') and entry.enclosures:
|
||||
for enc in entry.enclosures:
|
||||
if 'image' in enc.get('type', ''):
|
||||
return enc.get('href', '')
|
||||
|
||||
# 2. Проверяем специфичные теги (РБК, etc)
|
||||
for field in ['rbc_news_image', 'image']:
|
||||
if hasattr(entry, field):
|
||||
img = getattr(entry, field)
|
||||
if hasattr(img, 'url'):
|
||||
return img.url
|
||||
|
||||
# 3. Проверяем thumbnail
|
||||
if hasattr(entry, 'rbc_news_thumbnail'):
|
||||
if hasattr(entry.rbc_news_thumbnail, 'url'):
|
||||
return entry.rbc_news_thumbnail.url
|
||||
|
||||
# 4. Ищем в description
|
||||
summary = entry.get('summary', '') or entry.get('description', '')
|
||||
import re
|
||||
img_patterns = [
|
||||
r'<img[^>]+src=["\']([^"\']+)["\']',
|
||||
r'src=["\'](https?://[^"\']+\.(jpg|jpeg|png|gif|webp))["\']',
|
||||
]
|
||||
|
||||
for pattern in img_patterns:
|
||||
match = re.search(pattern, summary, re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
async def download(self, image_url: str, news_link: str) -> Optional[str]:
|
||||
"""Скачивает изображение и сохраняет во временную папку"""
|
||||
try:
|
||||
# Создаем имя файла на основе URL новости
|
||||
url_hash = hashlib.md5(news_link.encode()).hexdigest()[:12]
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(image_url, timeout=10) as response:
|
||||
if response.status != 200:
|
||||
logger.warning(f"Не удалось скачать {image_url}: статус {response.status}")
|
||||
return None
|
||||
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
file_ext = self._get_extension(content_type)
|
||||
|
||||
file_name = f"{url_hash}.{file_ext}"
|
||||
file_path = self.images_dir / file_name
|
||||
|
||||
with open(file_path, 'wb') as f:
|
||||
f.write(await response.read())
|
||||
|
||||
return str(file_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Ошибка скачивания изображения {image_url}: {e}")
|
||||
return None
|
||||
|
||||
def _get_extension(self, content_type: str) -> str:
|
||||
"""Определяет расширение файла по MIME типу"""
|
||||
ext_map = {
|
||||
'png': 'png',
|
||||
'gif': 'gif',
|
||||
'webp': 'webp',
|
||||
'jpeg': 'jpg',
|
||||
'jpg': 'jpg'
|
||||
}
|
||||
for key, ext in ext_map.items():
|
||||
if key in content_type:
|
||||
return ext
|
||||
return 'jpg'
|
||||
|
||||
async def compress_image(self, image_path: str) -> str:
|
||||
"""Сжимает изображение и возвращает путь к сжатой версии"""
|
||||
if not self.compress:
|
||||
return image_path
|
||||
|
||||
try:
|
||||
compressed_path = image_path.replace('.', '_compressed.')
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
original_size = os.path.getsize(image_path) / (1024 * 1024)
|
||||
original_format = img.format
|
||||
has_alpha = img.mode in ('RGBA', 'LA', 'P') and 'transparency' in img.info
|
||||
|
||||
# Изменяем размер если нужно
|
||||
if img.width > self.max_width or img.height > self.max_height:
|
||||
ratio = min(self.max_width / img.width, self.max_height / img.height)
|
||||
new_width = int(img.width * ratio)
|
||||
new_height = int(img.height * ratio)
|
||||
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
logger.debug(f"Изменен размер: {img.width}x{img.height}")
|
||||
|
||||
# Выбираем формат и качество
|
||||
if original_format == 'PNG' and has_alpha:
|
||||
img.save(compressed_path, 'PNG', optimize=True)
|
||||
elif original_format in ('JPEG', 'JPG'):
|
||||
img.save(compressed_path, 'JPEG', quality=60, optimize=True)
|
||||
else:
|
||||
# Конвертируем в JPEG
|
||||
if img.mode in ('RGBA', 'LA', 'P'):
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
|
||||
img = background
|
||||
img.save(compressed_path, 'JPEG', quality=60, optimize=True)
|
||||
|
||||
compressed_size = os.path.getsize(compressed_path) / (1024 * 1024)
|
||||
logger.debug(f"Сжатие: {original_size:.2f}MB → {compressed_size:.2f}MB")
|
||||
|
||||
return compressed_path
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Не удалось сжать: {e}, отправляю оригинал")
|
||||
return image_path
|
||||
|
||||
async def upload_and_send(
|
||||
self,
|
||||
client: AsyncClient,
|
||||
room_id: str,
|
||||
image_path: str
|
||||
) -> bool:
|
||||
"""Загружает изображение на сервер Matrix и отправляет"""
|
||||
try:
|
||||
if not os.path.exists(image_path):
|
||||
logger.error(f"Файл не найден: {image_path}")
|
||||
return False
|
||||
|
||||
# Сжимаем если нужно
|
||||
compressed_path = await self.compress_image(image_path)
|
||||
|
||||
file_name = os.path.basename(compressed_path)
|
||||
file_size = os.path.getsize(compressed_path)
|
||||
mime_type = mimetypes.guess_type(compressed_path)[0] or 'image/jpeg'
|
||||
|
||||
# Получаем размеры
|
||||
width, height = self._get_image_size(compressed_path)
|
||||
|
||||
# Загружаем на сервер
|
||||
with open(compressed_path, 'rb') as f:
|
||||
upload_response = await client.upload(
|
||||
f,
|
||||
content_type=mime_type,
|
||||
filename=file_name
|
||||
)
|
||||
|
||||
if isinstance(upload_response, tuple):
|
||||
upload_response = upload_response[0]
|
||||
|
||||
if not hasattr(upload_response, 'content_uri'):
|
||||
logger.error("Не удалось загрузить изображение")
|
||||
return False
|
||||
|
||||
content_uri = upload_response.content_uri
|
||||
|
||||
# Формируем сообщение
|
||||
content = {
|
||||
"msgtype": "m.image",
|
||||
"body": file_name,
|
||||
"url": content_uri,
|
||||
"info": {
|
||||
"mimetype": mime_type,
|
||||
"size": file_size
|
||||
}
|
||||
}
|
||||
|
||||
if width and height:
|
||||
content["info"]["w"] = width
|
||||
content["info"]["h"] = height
|
||||
|
||||
# Добавляем thumbnail
|
||||
content["info"]["thumbnail_url"] = content_uri
|
||||
content["info"]["thumbnail_info"] = {
|
||||
"mimetype": mime_type,
|
||||
"size": file_size,
|
||||
"w": width or 800,
|
||||
"h": height or 600
|
||||
}
|
||||
|
||||
send_response = await client.room_send(
|
||||
room_id,
|
||||
"m.room.message",
|
||||
content
|
||||
)
|
||||
|
||||
if isinstance(send_response, tuple):
|
||||
send_response = send_response[0]
|
||||
|
||||
# Удаляем сжатый файл если он временный
|
||||
if compressed_path != image_path and os.path.exists(compressed_path):
|
||||
os.remove(compressed_path)
|
||||
|
||||
if hasattr(send_response, 'event_id'):
|
||||
logger.debug(f"Изображение отправлено: {file_name}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Ошибка отправки: {send_response}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка при отправке изображения: {e}")
|
||||
return False
|
||||
|
||||
def _get_image_size(self, image_path: str) -> Tuple[Optional[int], Optional[int]]:
|
||||
"""Получает размеры изображения"""
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
return img.size
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
async def clean(self) -> None:
|
||||
"""Очищает папку с изображениями"""
|
||||
try:
|
||||
if not self.images_dir.exists():
|
||||
return
|
||||
|
||||
files = list(self.images_dir.glob('*'))
|
||||
if files:
|
||||
for file in files:
|
||||
try:
|
||||
if file.is_file():
|
||||
file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
logger.info(f"Очищено {len(files)} изображений")
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка очистки папки с изображениями: {e}")
|
||||
60
bot/main.py
Normal file
60
bot/main.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Точка входа в бота"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Добавляем корневую директорию в PATH
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from bot.config import Config
|
||||
from bot.rss_bot import RSSNewsBot
|
||||
|
||||
|
||||
def setup_logging(config: Config) -> None:
|
||||
"""Настраивает логирование"""
|
||||
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
log_level = getattr(logging, config.log_level.upper(), logging.INFO)
|
||||
|
||||
handlers = [logging.StreamHandler()]
|
||||
|
||||
if config.log_file:
|
||||
handlers.append(logging.FileHandler(config.log_file, encoding='utf-8'))
|
||||
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format=log_format,
|
||||
handlers=handlers
|
||||
)
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Главная функция"""
|
||||
try:
|
||||
# Загружаем конфигурацию
|
||||
config = Config()
|
||||
|
||||
# Настраиваем логирование
|
||||
setup_logging(config)
|
||||
|
||||
# Создаем и запускаем бота
|
||||
bot = RSSNewsBot(config)
|
||||
await bot.run()
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logging.error(str(e))
|
||||
sys.exit(1)
|
||||
except ValueError as e:
|
||||
logging.error(str(e))
|
||||
sys.exit(1)
|
||||
except KeyboardInterrupt:
|
||||
logging.info("Бот остановлен")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logging.exception(f"Критическая ошибка: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
246
bot/rss_bot.py
Normal file
246
bot/rss_bot.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""Основная логика RSS бота"""
|
||||
|
||||
import asyncio
|
||||
import feedparser
|
||||
import re
|
||||
from datetime import datetime
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import List, Dict, Any, Optional
|
||||
import logging
|
||||
from nio import AsyncClient
|
||||
|
||||
from .config import Config
|
||||
from .history_manager import HistoryManager
|
||||
from .image_handler import ImageHandler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RSSNewsBot:
|
||||
"""Основной класс бота"""
|
||||
|
||||
def __init__(self, config: Config):
|
||||
self.config = config
|
||||
self.client = AsyncClient(config.homeserver, config.bot_user_id)
|
||||
self.client.access_token = config.access_token
|
||||
|
||||
self.history = HistoryManager(
|
||||
config.history_file,
|
||||
config.history_days,
|
||||
config.max_history_size
|
||||
)
|
||||
|
||||
self.image_handler = ImageHandler(
|
||||
config.images_dir,
|
||||
config.compress_images,
|
||||
config.max_image_size_mb,
|
||||
config.max_image_width,
|
||||
config.max_image_height
|
||||
)
|
||||
|
||||
self.cycle_counter = 0
|
||||
|
||||
async def fetch_rss(self, url: str, source_name: str, room_id: str) -> List[Dict[str, Any]]:
|
||||
"""Загружает и парсит RSS-ленту"""
|
||||
try:
|
||||
feed = feedparser.parse(url)
|
||||
if feed.bozo:
|
||||
logger.warning(f"Ошибка парсинга {source_name}: {feed.bozo_exception}")
|
||||
return []
|
||||
|
||||
entries_with_metadata = []
|
||||
for entry in feed.entries:
|
||||
image_url = self.image_handler.extract_from_entry(entry)
|
||||
|
||||
entry_with_meta = {
|
||||
'title': entry.get('title', 'Без заголовка'),
|
||||
'link': entry.get('link', ''),
|
||||
'summary': entry.get('summary', ''),
|
||||
'published': entry.get('published', ''),
|
||||
'source': source_name,
|
||||
'room_id': room_id,
|
||||
'image_url': image_url
|
||||
}
|
||||
entries_with_metadata.append(entry_with_meta)
|
||||
|
||||
return entries_with_metadata
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка при загрузке {source_name}: {e}")
|
||||
return []
|
||||
|
||||
def format_news_message(self, entry: Dict[str, Any]) -> tuple[str, str]:
|
||||
"""Форматирует новость с красивым HTML"""
|
||||
title = entry.get('title', 'Без заголовка')
|
||||
link = entry.get('link', '')
|
||||
summary = entry.get('summary', '')
|
||||
published = entry.get('published', '')
|
||||
|
||||
# Очищаем HTML теги
|
||||
summary = re.sub(r'<[^>]+>', '', summary)
|
||||
if len(summary) > 300:
|
||||
summary = summary[:300] + "…"
|
||||
|
||||
# Форматируем дату
|
||||
try:
|
||||
pub_date = parsedate_to_datetime(published)
|
||||
formatted_date = pub_date.strftime("%d %B %Y, %H:%M")
|
||||
except Exception:
|
||||
formatted_date = published
|
||||
|
||||
# HTML версия
|
||||
html_message = f"""<b>📰 {title}</b><br/>
|
||||
<br/>
|
||||
{summary}<br/>
|
||||
<br/>
|
||||
🕒 <i>{formatted_date}</i><br/>
|
||||
🔗 <a href="{link}">Читать полностью</a><br/>"""
|
||||
|
||||
# Plain text версия
|
||||
plain_message = f"📰 {title}\n\n{summary}\n\n🕒 {formatted_date}\n🔗 {link}"
|
||||
|
||||
return plain_message, html_message
|
||||
|
||||
async def send_news(self, room_id: str, entry: Dict[str, Any]) -> bool:
|
||||
"""Отправляет новость с изображением (если есть)"""
|
||||
title = entry.get('title', 'Без заголовка')
|
||||
image_url = entry.get('image_url')
|
||||
link = entry.get('link', '')
|
||||
|
||||
# Отправляем изображение если есть
|
||||
if image_url:
|
||||
logger.debug(f"Найдено изображение: {image_url[:80]}...")
|
||||
image_path = await self.image_handler.download(image_url, link)
|
||||
|
||||
if image_path:
|
||||
success = await self.image_handler.upload_and_send(
|
||||
self.client, room_id, image_path
|
||||
)
|
||||
if success:
|
||||
logger.debug("Изображение отправлено")
|
||||
else:
|
||||
logger.warning("Не удалось отправить изображение")
|
||||
|
||||
# Отправляем текст новости
|
||||
plain_message, html_message = self.format_news_message(entry)
|
||||
|
||||
retries = 3
|
||||
while retries > 0:
|
||||
try:
|
||||
response = await self.client.room_send(
|
||||
room_id=room_id,
|
||||
message_type="m.room.message",
|
||||
content={
|
||||
"msgtype": "m.text",
|
||||
"body": plain_message,
|
||||
"format": "org.matrix.custom.html",
|
||||
"formatted_body": html_message
|
||||
}
|
||||
)
|
||||
|
||||
if isinstance(response, tuple):
|
||||
response = response[0]
|
||||
|
||||
if hasattr(response, 'event_id'):
|
||||
logger.debug(f"Текст отправлен: {title[:50]}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
if "429" in error_msg or "ratelimit" in error_msg:
|
||||
await asyncio.sleep(15)
|
||||
retries -= 1
|
||||
else:
|
||||
logger.error(f"Ошибка отправки: {e}")
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
async def check_and_send(self) -> None:
|
||||
"""Основная логика: проверяем все ленты и отправляем новое"""
|
||||
self.cycle_counter += 1
|
||||
|
||||
logger.info(f"Цикл #{self.cycle_counter}")
|
||||
|
||||
news_by_room: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
for source in self.config.sources:
|
||||
logger.debug(f"Проверяю: {source['name']}")
|
||||
entries = await self.fetch_rss(
|
||||
source["url"],
|
||||
source["name"],
|
||||
source["room_id"]
|
||||
)
|
||||
|
||||
new_entries = []
|
||||
for entry in entries:
|
||||
link = entry.get('link', '')
|
||||
if link and not self.history.is_already_sent(link):
|
||||
room_id = entry.get('room_id')
|
||||
if room_id:
|
||||
try:
|
||||
published = entry.get('published', '')
|
||||
pub_date = parsedate_to_datetime(published) if published else datetime.now()
|
||||
entry['timestamp'] = pub_date
|
||||
except Exception:
|
||||
entry['timestamp'] = datetime.now()
|
||||
|
||||
new_entries.append(entry)
|
||||
|
||||
if new_entries:
|
||||
logger.info(f"Найдено {len(new_entries)} новых в {source['name']}")
|
||||
for entry in new_entries:
|
||||
room_id = entry.get('room_id')
|
||||
if room_id not in news_by_room:
|
||||
news_by_room[room_id] = []
|
||||
news_by_room[room_id].append(entry)
|
||||
else:
|
||||
logger.debug(f"Новых нет в {source['name']}")
|
||||
|
||||
# Сортируем и отправляем
|
||||
if news_by_room:
|
||||
for room_id, news_list in news_by_room.items():
|
||||
news_list.sort(key=lambda x: x['timestamp'])
|
||||
logger.info(f"Отправка {len(news_list)} новостей в комнату")
|
||||
|
||||
for i, entry in enumerate(news_list, 1):
|
||||
title = entry.get('title', '')[:50]
|
||||
logger.info(f"[{i}/{len(news_list)}]: {title}")
|
||||
|
||||
success = await self.send_news(room_id, entry)
|
||||
if success:
|
||||
self.history.add(entry.get('link', ''), title)
|
||||
|
||||
if i < len(news_list):
|
||||
await asyncio.sleep(self.config.delay_between_posts)
|
||||
|
||||
# Сохраняем историю
|
||||
self.history.save()
|
||||
|
||||
# Периодическая очистка
|
||||
if self.cycle_counter % self.config.cleanup_images_every == 0:
|
||||
await self.image_handler.clean()
|
||||
|
||||
async def run(self) -> None:
|
||||
"""Запускает бота"""
|
||||
logger.info("Запускаем RSS-бота...")
|
||||
|
||||
try:
|
||||
await self.client.sync(timeout=3000)
|
||||
logger.info("Соединение с Matrix установлено")
|
||||
except Exception as e:
|
||||
logger.warning(f"Ошибка при подключении: {e}")
|
||||
|
||||
logger.info("Бот запущен!")
|
||||
|
||||
while True:
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
try:
|
||||
await self.check_and_send()
|
||||
except Exception as e:
|
||||
logger.exception(f"Ошибка в основном цикле: {e}")
|
||||
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
wait_time = max(0, self.config.check_interval - elapsed)
|
||||
if wait_time > 0:
|
||||
await asyncio.sleep(wait_time)
|
||||
Reference in New Issue
Block a user