This commit is contained in:
Josh Fraser 2020-02-24 18:06:29 -08:00
commit 516599ce9e
6 changed files with 99 additions and 35 deletions

View File

@ -5,9 +5,11 @@ Head to https://www.originprotocol.com/developers to learn more about what we're
# Telegram Bot
- Deletes messages matching specified patterns
- Bans users for posting messagses matching specified patterns
- Bans users for posting messages matching specified patterns
- Bans users with usernames matching specified patterns
- Records logs of converstations
- Records logs of conversations
- Logs an English translation of any foreign languages using Google Translate
- Uses textblob for basic sentiment analysis of both polarity and subjectivity
## Installation
@ -17,10 +19,13 @@ Head to https://www.originprotocol.com/developers to learn more about what we're
- `pip install --upgrade -r requirements.txt`
## Database setup
- Store database URL in environment variable.
```
export TELEGRAM_BOT_POSTGRES_URL="postgresql://<user>:<password>@localhost:5432/<databasename>"
```
- Run: `python model.py` to setup the DB tables.
## Setup
@ -32,6 +37,7 @@ Head to https://www.originprotocol.com/developers to learn more about what we're
```
export TELEGRAM_BOT_TOKEN="4813829027:ADJFKAf0plousH2EZ2jBfxxRWFld3oK34ya"
```
- Create your Telegram group.
- Add your bot to the group like so: https://stackoverflow.com/questions/37338101/how-to-add-a-bot-to-a-telegram-group
- Make your bot an admin in the group
@ -48,7 +54,24 @@ Head to https://www.originprotocol.com/developers to learn more about what we're
- `ADMIN_EXEMPT` : If set to anything except `false`, admin users will be exempt from monitoring. Reccomended to be set, but useful to turn off for debugging.
- `NOTIFY_CHAT` : ID of chat to report actions. Can be useful if you have an admin-only chat where you want to monitor the bot's activity. E.g. `-140532994`
## Download the corpus for Textblob
For sentiment analysis to work, you'll need to download the latest corpus file for textblob. You can do this by running:
```
python -m textblob.download_corpora
```
If you're running the bot on Heroku, set an environment variable named `NLTK_DATA` to `/app/nltk_data` by running:
```
heroku config:set NLTK_DATA='/app/nltk_data'
```
## Message ban patterns
Sample bash file to set `MESSAGE_BAN_PATTERNS`:
```
read -r -d '' MESSAGE_BAN_PATTERNS << 'EOF'
# ETH Address
@ -60,15 +83,17 @@ read -r -d '' MESSAGE_BAN_PATTERNS << 'EOF'
EOF
```
## Attachements
## Attachments
By default, any attachments other than images or animations will cause the message to be hidden.
## Running
### Locally
- Run: `python bot.py` to start logger
- Messages will be displayed on `stdout` as they are logged.
### On Heroku
- You must enable the worker on Heroku app dashboard. (By default it is off.)

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
source $BIN_DIR/utils
echo "-----> Starting corpora installation"
# Assumes NLTK_DATA environment variable is already set
# $ heroku config:set NLTK_DATA='/app/nltk_data'
# Install the default corpora to NLTK_DATA directory
python -m textblob.download_corpora
# Open the NLTK_DATA directory
cd ${NLTK_DATA}
# Delete all of the zip files in the NLTK DATA directory
find . -name "*.zip" -type f -delete
echo "-----> Finished corpora installatio"

9
bin/post_compile Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
if [ -f bin/install_textblob_corpora ]; then
echo "-----> Running install_textblob_corpora"
chmod +x bin/install_textblob_corpora
bin/install_textblob_corpora
fi
echo "-----> Post-compile done"

17
bot.py
View File

@ -19,6 +19,7 @@ import re
import unidecode
from mwt import MWT
from googletrans import Translator
from textblob import TextBlob
class TelegramMonitorBot:
@ -26,12 +27,12 @@ class TelegramMonitorBot:
def __init__(self):
self.debug = (
(os.environ.get('DEBUG') is not None) and
(os.environ.get('DEBUG').upper() != "false"))
(os.environ.get('DEBUG').lower() != "false"))
# Are admins exempt from having messages checked?
self.admin_exempt = (
(os.environ.get('ADMIN_EXEMPT') is not None) and
(os.environ.get('ADMIN_EXEMPT').upper() != "false"))
(os.environ.get('ADMIN_EXEMPT').lower() != "false"))
if (self.debug):
print("🔵 debug:", self.debug)
@ -304,20 +305,26 @@ class TelegramMonitorBot:
return bool_set
def log_message(self, user_id, user_message, chat_id):
try:
s = session()
language_code = english_message = ""
polarity = subjectivity = 0.0
try:
# translate to English & log the original language
translator = Translator()
translated = translator.translate(user_message)
language_code = translated.src
english_message = translated.text
# run basic sentiment analysis on the translated English string
analysis = TextBlob(english_message)
polarity = analysis.sentiment.polarity
subjectivity = analysis.sentiment.subjectivity
except Exception as e:
print(e.message)
msg1 = Message(user_id=user_id, message=user_message,
chat_id=chat_id, language_code=language_code, english_message=english_message)
msg1 = Message(user_id=user_id, message=user_message, chat_id=chat_id,
language_code=language_code, english_message=english_message, polarity=polarity,
subjectivity=subjectivity)
s.add(msg1)
s.commit()
s.close()

View File

@ -1,4 +1,4 @@
from sqlalchemy import Column, DateTime, BigInteger, String, Integer, ForeignKey, func
from sqlalchemy import Column, DateTime, BigInteger, String, Integer, Numeric, ForeignKey, func
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base
import os
@ -30,9 +30,10 @@ class Message(Base):
language_code = Column(String)
english_message = Column(String)
chat_id = Column(BigInteger)
polarity = Column(Numeric)
subjectivity = Column(Numeric)
time = Column(DateTime, default=func.now())
class MessageHide(Base):
__tablename__ = 'telegram_message_hides'
id = Column(Integer, primary_key=True)

View File

@ -4,3 +4,6 @@ SQLAlchemy==1.2.2
configparser==3.5.0
Unidecode==1.0.22
googletrans==2.4.0
textblob==0.15.3
ipython==5.5.0