Send RSS Feeds to Twitter

This isn’t quite as polished as the others yet, so it’s still rough around the edges. I’m not 100% on the Twitter API, but that’s not a big concern as stuff like this is just playing around.

This script was cobbled together from a bunch of other scripts that each do their own thing. This one, though, takes anything you can throw at it in RSS form, and sends it over to “X” as a Xweet. You can modify this to fit your needs, but it should be a decent framework to start out.

Feel free to follow @cmcwain to see the script at work.

Tweet RSS

tweet_rss.py

import feedparser
import tweepy
import os
import hashlib
from datetime import datetime
import urllib.parse
from bs4 import BeautifulSoup

api_key = "API_KEY"
api_secret_key = "API_SECRET_KEY"

client = tweepy.Client(
    consumer_key=api_key,
    consumer_secret=api_secret_key,
)

posted_entries_file = "./posted_news_entries.txt"

auth = tweepy.OAuth1UserHandler(api_key, api_secret_key, access_token, access_token_secret)
api = tweepy.API(auth)

if os.path.exists(posted_entries_file):
    with open(posted_entries_file, "r") as file:
        posted_entries = set(line.strip() for line in file)
else:
    posted_entries = set()

def generate_entry_id(entry):
    unique_string = entry.link + entry.title + (entry.get('updated', '') or entry.get('published', '') or entry.get('pubdate', ''))
    return hashlib.md5(unique_string.encode()).hexdigest()

def is_entry_from_today(entry, date_field):
    try:
        if date_field == 'updated':
            if 'updated' in entry:
                published_date = datetime.strptime(entry.updated, '%Y-%m-%dT%H:%M:%S%z').date()
            else:
                return False
        elif date_field == 'published':
            if 'published' in entry:
                published_date = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %Z').date()
            else:
                return False
        elif date_field == 'pubdate':
            if 'pubdate' in entry:
                published_date = datetime.strptime(entry.pubdate, '%a, %d %b %Y %H:%M:%S %Z').date()
            else:
                return False
        else:
            return False
        return published_date == datetime.today().date()
    except Exception as e:
        print(f"Error parsing date for entry {entry.title}: {e}")
        return False

def truncate_text(text, max_length=150):
    if len(text) > max_length:
        return text[:max_length] + "..."
    return text

def modify_link(link):
    parsed_url = urllib.parse.urlparse(link)
    path_parts = parsed_url.path.strip('/').split('/')
    new_path = '/' + '/'.join(path_parts)
    new_url = urllib.parse.urlunparse((parsed_url.scheme, parsed_url.netloc, new_path, '', '', ''))
    return new_url

def strip_html(content):
    soup = BeautifulSoup(content, 'html.parser')
    return soup.get_text()

def process_feed(rss_url, date_field):
    print(f"Processing feed: {rss_url}")
    feed = feedparser.parse(rss_url)

    if not feed.entries:
        print("No entries found.")
        return

    new_entries = []

    for entry in feed.entries:
        print(f"Checking entry: {entry.title}")
        if is_entry_from_today(entry, date_field):
            print(f"Entry from today found: {entry.title}")
            entry_id = generate_entry_id(entry)
            if entry_id not in posted_entries:
                # Check for the content in various possible attributes
                content = getattr(entry, 'content', None)
                if content:
                    content = content[0].value
                else:
                    content = getattr(entry, 'summary', '') or getattr(entry, 'description', '')

                clean_content = strip_html(content)
                truncated_content = truncate_text(clean_content)
                modified_link = modify_link(entry.link)
                tweet = f"{entry.title}\n\n{truncated_content} {modified_link}"
                print(tweet)
                client.create_tweet(text=tweet)
                posted_entries.add(entry_id)
                new_entries.append(entry_id)
            else:
                print(f"Entry already posted: {entry.title}")
        else:
            print(f"Entry not from today: {entry.title}")

    if new_entries:
        with open(posted_entries_file, "a") as file:
            for entry_id in new_entries:
                file.write(f"{entry_id}\n")

rss_feeds = [
    ("https://example.com/index.xml", 'updated'),
    ("https://notactuallychicken.com/products/feed.xml", 'pubdate')
]

for rss_url, date_field in rss_feeds:
    process_feed(rss_url, date_field)
Previous: Keychron Replacement Key Next: Auto Initial Linux Setup