August 4, 2024 at 15:12
Ever want to automate posting Tweets from the things you normally do every day? Me neither, but this will do that for you. All you need is an RSS feed and every update will be automatically posted into you Twitter account. The best part is that this is all free.
Add any RSS feed in the list cleverly named “rss_feeds”, and new updates will get their own post on Twitter. This script will also store unique tweets in a text file to avoid duplication.
I use it to automatically post items in my Plex Watchlist, news articles that I want to share, public bookmarks from shared LinkDing saves, blog posts, and RetroAchievement successes.
Feel free to follow @cmcwain to see the script at work.
tweet_rss.py
import feedparser
import tweepy
import os
import hashlib
from datetime import datetime
import urllib.parse
from bs4 import BeautifulSoup
api_key = "API_KEY"
api_secret_key = "API_SECRET_KEY"
client = tweepy.Client(
consumer_key=api_key,
consumer_secret=api_secret_key,
)
posted_entries_file = "./posted_news_entries.txt"
auth = tweepy.OAuth1UserHandler(api_key, api_secret_key, access_token, access_token_secret)
api = tweepy.API(auth)
if os.path.exists(posted_entries_file):
with open(posted_entries_file, "r") as file:
posted_entries = set(line.strip() for line in file)
else:
posted_entries = set()
def generate_entry_id(entry):
unique_string = entry.link + entry.title + (entry.get('updated', '') or entry.get('published', '') or entry.get('pubdate', ''))
return hashlib.md5(unique_string.encode()).hexdigest()
def is_entry_from_today(entry, date_field):
try:
if date_field == 'updated':
if 'updated' in entry:
published_date = datetime.strptime(entry.updated, '%Y-%m-%dT%H:%M:%S%z').date()
else:
return False
elif date_field == 'published':
if 'published' in entry:
published_date = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %Z').date()
else:
return False
elif date_field == 'pubdate':
if 'pubdate' in entry:
published_date = datetime.strptime(entry.pubdate, '%a, %d %b %Y %H:%M:%S %Z').date()
else:
return False
else:
return False
return published_date == datetime.today().date()
except Exception as e:
print(f"Error parsing date for entry {entry.title}: {e}")
return False
def truncate_text(text, max_length=150):
if len(text) > max_length:
return text[:max_length] + "..."
return text
def modify_link(link):
parsed_url = urllib.parse.urlparse(link)
path_parts = parsed_url.path.strip('/').split('/')
new_path = '/' + '/'.join(path_parts)
new_url = urllib.parse.urlunparse((parsed_url.scheme, parsed_url.netloc, new_path, '', '', ''))
return new_url
def strip_html(content):
soup = BeautifulSoup(content, 'html.parser')
return soup.get_text()
def process_feed(rss_url, date_field):
print(f"Processing feed: {rss_url}")
feed = feedparser.parse(rss_url)
if not feed.entries:
print("No entries found.")
return
new_entries = []
for entry in feed.entries:
print(f"Checking entry: {entry.title}")
if is_entry_from_today(entry, date_field):
print(f"Entry from today found: {entry.title}")
entry_id = generate_entry_id(entry)
if entry_id not in posted_entries:
# Check for the content in various possible attributes
content = getattr(entry, 'content', None)
if content:
content = content[0].value
else:
content = getattr(entry, 'summary', '') or getattr(entry, 'description', '')
clean_content = strip_html(content)
truncated_content = truncate_text(clean_content)
modified_link = modify_link(entry.link)
tweet = f"{entry.title}\n\n{truncated_content} {modified_link}"
print(tweet)
client.create_tweet(text=tweet)
posted_entries.add(entry_id)
new_entries.append(entry_id)
else:
print(f"Entry already posted: {entry.title}")
else:
print(f"Entry not from today: {entry.title}")
if new_entries:
with open(posted_entries_file, "a") as file:
for entry_id in new_entries:
file.write(f"{entry_id}\n")
rss_feeds = [
("https://example.com/index.xml", 'updated'),
("https://notactuallychicken.com/products/feed.xml", 'pubdate')
]
for rss_url, date_field in rss_feeds:
process_feed(rss_url, date_field)
Questions or comments?