From f674e179d602d3ccb9818d28fe06f371059449dc Mon Sep 17 00:00:00 2001 From: Nikita Kostovsky Date: Sun, 22 Jun 2025 16:54:02 +0200 Subject: parse and insert feeds and items --- src/atomitem.cpp | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 src/atomitem.cpp (limited to 'src/atomitem.cpp') diff --git a/src/atomitem.cpp b/src/atomitem.cpp new file mode 100644 index 0000000..5f099f6 --- /dev/null +++ b/src/atomitem.cpp @@ -0,0 +1,180 @@ +#include "atomitem.h" + +#include +#include +#include + +#include "atomchannel.h" +#include "constants.h" +#include "macros.h" +#include "rsshit_db.h" + +AtomItem::AtomItem(QXmlStreamReader *xmlReader) +{ + Q_ASSERT(xmlReader != nullptr); + + const QString titleTag{"title"}; + const QString linkTag{"link"}; + const QString categoryTag{"category"}; + const QString guidTag{"guid"}; + const QString pubDateTag{"pubDate"}; + const QString descriptionTag{"description"}; + const QString encodedTag{"encoded"}; + + while (!xmlReader->atEnd() && !xmlReader->hasError()) { + const auto itemNext = xmlReader->readNext(); + + switch (itemNext) { + case QXmlStreamReader::TokenType::StartElement: { + const auto name = xmlReader->name(); + const auto elementText = xmlReader->readElementText(); + + if (name == titleTag) + title = elementText; + else if (name == linkTag) + link = elementText; + else if (name == categoryTag) + categories.append(elementText); + else if (name == guidTag) + guid = AtomGuid{elementText}; + else if (name == pubDateTag) + pubDate = QDateTime::fromString(elementText, Qt::DateFormat::RFC2822Date); + else if (name == descriptionTag) + description = elementText; + else if (name == encodedTag) + encoded = elementText; + + break; + } + case QXmlStreamReader::TokenType::EndElement: + // qDebug() << "EndElement: " << xmlReader->name(); + return; + case QXmlStreamReader::TokenType::Characters: + const auto characters = xmlReader->text().toString().simplified(); + + if (characters.isEmpty()) + break; + + qDebug() << "item: characters: " << characters; + break; + } + } +} + +int AtomItem::getDbId() +{ + if (dbId != rsshit::db::IdNotFound) + return dbId; + + const auto db = rsshit::db::open(); + + if (!db) + return rsshit::db::IdNotFound; + + QSqlQuery selectQ{"select id from items where link=?"}; + selectQ.addBindValue(link); + + if (!selectQ.exec()) { + qWarning() << "cannot exec query" << selectQ.lastQuery() << ":" + << selectQ.lastError().text() << ":" << selectQ.executedQuery(); + + return rsshit::db::IdNotFound; + } + + if (!selectQ.next()) + return rsshit::db::IdNotFound; + + const auto idVariant = selectQ.value(rsshit::db::idTag); + + if (!idVariant.isValid() || !idVariant.canConvert()) + return rsshit::db::IdNotFound; + + bool ok{false}; + const auto result = idVariant.toInt(&ok); + + if (!ok) { + qWarning() << "got invalid id from db:" << idVariant; + + return rsshit::db::IdNotFound; + } + + return result; +} + +int AtomItem::createInDb(const int feedId) +{ + if (dbId != rsshit::db::IdNotFound) + return dbId; + + const auto db = rsshit::db::open(); + + if (!db) + return rsshit::db::IdNotFound; + + QSqlQuery insertQ{ + "insert into items(feed_fk, pub_datetime_unix, title, link, author, description)" + "values(?, ?, ?, ?, ?, ?)"}; + + insertQ.addBindValue(feedId); + insertQ.addBindValue(pubDate.toSecsSinceEpoch()); + insertQ.addBindValue(title); + insertQ.addBindValue(link); + insertQ.addBindValue(author); + insertQ.addBindValue(description); + + if (!insertQ.exec()) { + qWarning() << "cannot exec query" << insertQ.lastQuery() << ":" + << insertQ.lastError().text() << ":" << insertQ.executedQuery(); + + return rsshit::db::IdNotFound; + } + + return insertQ.lastInsertId().toInt(); +} + +int AtomItem::getOrInsertDbId(const int feedId) +{ + const auto id = getDbId(); + + if (id != rsshit::db::IdNotFound) + return id; + + return createInDb(feedId); +} + +QDebug operator<<(QDebug debug, const AtomItem &item) +{ + QDebugStateSaver saver{debug}; + + debug.nospace() << typeid(AtomItem).name() << " {" << Qt::endl; + + PRINT_ATOM_FIELD(item, title); + PRINT_ATOM_FIELD(item, link); + PRINT_ATOM_FIELD(item, categories); + PRINT_ATOM_FIELD(item, guid); + PRINT_ATOM_FIELD(item, pubDate); + // PRINT_ATOM_ITEM_FIELD(description); + // PRINT_ATOM_ITEM_FIELD(encoded); + + auto halfSize = item.description.size() / 2; + constexpr decltype(halfSize) maxLeft{70}; + constexpr decltype(halfSize) maxRight{20}; + + auto left = std::min(maxLeft, halfSize); + auto right = std::min(maxRight, halfSize); + + debug.nospace().noquote() << "\tdescription: \"" << item.description.left(left) << "..." + << item.description.right(right) << '"' << Qt::endl; + + halfSize = item.encoded.size() / 2; + + left = std::min(maxLeft, halfSize); + right = std::min(maxRight, halfSize); + + debug.nospace().noquote() << "\tencoded: \"" << item.encoded.left(left) << "..." + << item.encoded.right(right) << '"' << Qt::endl; + + debug.nospace() << "}"; + + return debug; +} -- cgit v1.2.3-70-g09d2