From 4e1e58612318cc4d7e49cc7175117a48898e0e4d Mon Sep 17 00:00:00 2001 From: Josh Fraser Date: Mon, 27 Jan 2020 20:08:00 -0800 Subject: [PATCH] setup textblob for sentiment analysis --- README.md | 2 +- bin/install_textblob_corpora | 19 +++++++++++++++++++ bin/post_compile | 9 +++++++++ model.py | 3 ++- requirements.txt | 1 + 5 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 bin/install_textblob_corpora create mode 100644 bin/post_compile diff --git a/README.md b/README.md index bc0c466..ce6b554 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Head to https://www.originprotocol.com/developers to learn more about what we're - Bans users for posting messagses matching specified patterns - Bans users with usernames matching specified patterns - Records logs of converstations -- Translates foreign languages to English using Google Translate +- Logs an English translation of any foreign languages using Google Translate ## Installation diff --git a/bin/install_textblob_corpora b/bin/install_textblob_corpora new file mode 100644 index 0000000..47e2819 --- /dev/null +++ b/bin/install_textblob_corpora @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +source $BIN_DIR/utils + +echo "-----> Starting corpora installation" + +# Assumes NLTK_DATA environment variable is already set +# $ heroku config:set NLTK_DATA='/app/nltk_data' + +# Install the default corpora to NLTK_DATA directory +python -m textblob.download_corpora + +# Open the NLTK_DATA directory +cd ${NLTK_DATA} + +# Delete all of the zip files in the NLTK DATA directory +find . -name "*.zip" -type f -delete + +echo "-----> Finished corpora installatio" diff --git a/bin/post_compile b/bin/post_compile new file mode 100644 index 0000000..6078c43 --- /dev/null +++ b/bin/post_compile @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +if [ -f bin/install_textblob_corpora ]; then + echo "-----> Running install_textblob_corpora" + chmod +x bin/install_textblob_corpora + bin/install_textblob_corpora +fi + +echo "-----> Post-compile done" diff --git a/model.py b/model.py index 98a96d5..729dd88 100644 --- a/model.py +++ b/model.py @@ -30,9 +30,10 @@ class Message(Base): language_code = Column(String) english_message = Column(String) chat_id = Column(BigInteger) + polarity = Column(Numeric) + subjectivity = Column(Numeric) time = Column(DateTime, default=func.now()) - class MessageHide(Base): __tablename__ = 'telegram_message_hides' id = Column(Integer, primary_key=True) diff --git a/requirements.txt b/requirements.txt index f5a5e0e..c34709e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ SQLAlchemy==1.2.2 configparser==3.5.0 Unidecode==1.0.22 googletrans==2.4.0 +textblob