Skip to content

Commit

Permalink
feat(telegram): add pagination fetching of messages (#4394)
Browse files Browse the repository at this point in the history
* feat(telegram): add pagination fetching of messages

* docs
  • Loading branch information
dvikan authored Jan 4, 2025
1 parent f9e9c81 commit 48cb7d7
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 31 deletions.
88 changes: 57 additions & 31 deletions bridges/TelegramBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ class TelegramBridge extends BridgeAbstract
]
]
];

const CONFIGURATION = [
'max_pages' => [
'required' => false,
'defaultValue' => 1,
],
];

const TEST_DETECT_PARAMETERS = [
'https://t.me/s/rssbridge' => ['username' => 'rssbridge'],
'https://t.me/rssbridge' => ['username' => 'rssbridge'],
Expand All @@ -26,7 +34,7 @@ class TelegramBridge extends BridgeAbstract
'https://rssbridge.t.me/' => ['username' => 'rssbridge'],
];

const CACHE_TIMEOUT = 60 * 15; // 15 mins
const CACHE_TIMEOUT = 60 * 60; // 1h
private $feedName = '';

private $enclosures = [];
Expand All @@ -36,33 +44,56 @@ class TelegramBridge extends BridgeAbstract

public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());

$channelTitle = $html->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
$channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
$this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';
$posts = $html->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
if (!$channelTitle && !$posts) {
throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
}
foreach ($posts as $messageDiv) {
$this->itemTitle = '';
$this->enclosures = [];
$item = [];

$item['uri'] = $messageDiv->find('a.tgme_widget_message_date', 0)->href;
$item['content'] = $this->processContent($messageDiv);
$item['title'] = $this->itemTitle;
$item['timestamp'] = $messageDiv->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
$item['enclosures'] = $this->enclosures;

$messageOwner = $messageDiv->find('a.tgme_widget_message_owner_name', 0);
if ($messageOwner) {
$item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
$pages = 0;
$url = 'https://t.me/s/' . $this->normalizeUsername();

$max_pages = $this->getOption('max_pages');

// Hard-coded upper bound of 100 loops
while ($pages < $max_pages && $pages < 100) {
$pages++;

$dom = getSimpleHTMLDOM($url);

$channelTitle = $dom->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
$channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
$this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';

$messages = $dom->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
if (!$channelTitle && !$messages) {
throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
}

$this->items[] = $item;
foreach (array_reverse($messages) as $message) {
$this->itemTitle = '';
$this->enclosures = [];

$item = [];

$item['uri'] = $message->find('a.tgme_widget_message_date', 0)->href;
$item['content'] = $this->processContent($message);
$item['title'] = $this->itemTitle;
$item['timestamp'] = $message->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
$item['enclosures'] = $this->enclosures;

$messageOwner = $message->find('a.tgme_widget_message_owner_name', 0);
if ($messageOwner) {
$item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
}

array_unshift($this->items, $item);
}

$more = $dom->find('> div.tgme_widget_message_centered.js-messages_more_wrap a', 0);
if ($more && str_contains($more->href, 'before')) {
$url = 'https://t.me/' . $more->href;
} else {
break;
}
}

$this->logger->info(sprintf('Fetched %s messages from %s pages (%s)', count($this->items), $pages, $url));

$this->items = array_reverse($this->items);
}

Expand Down Expand Up @@ -369,12 +400,7 @@ private function ellipsisTitle($text)

private function normalizeUsername()
{
// todo: can be replaced with ltrim($username, '@');
$username = $this->getInput('username');
if (substr($username, 0, 1) === '@') {
return substr($username, 1);
}
return $username;
return ltrim($this->getInput('username'), '@');
}

public function detectParameters($url)
Expand Down
5 changes: 5 additions & 0 deletions config.default.ini.php
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@

; --- Bridge specific configuration ------

[TelegramBridge]

; Max pages to fetch (1 page => 20 messages), min=1 max=100
max_pages = 1

[DiscogsBridge]

; Sets the personal access token for interactions with Discogs. When
Expand Down
12 changes: 12 additions & 0 deletions docs/10_Bridge_Specific/Telegram.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# TelegramBridge

By default, it fetches a single page with up to 20 messages.

To increase this limit, tweak the `max_pages` config:

```ini
[TelegramBridge]

; Fetch a maximum of 3 pages (requires 3 http requests)
max_pages = 3
```

0 comments on commit 48cb7d7

Please sign in to comment.