Files
XijiaX/db_manager.py
2025-07-15 10:52:00 +08:00

117 lines
4.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import sqlite3
import os
from datetime import datetime, timedelta
class DatabaseManager:
def __init__(self, db_path='articles.db'):
self.db_path = db_path
self._init_db()
def _init_db(self):
"""初始化数据库,创建必要的表"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS processed_articles (
url TEXT PRIMARY KEY,
title TEXT,
processed_time TIMESTAMP,
content_type TEXT,
optimized_title TEXT,
published BOOLEAN DEFAULT 0
)
""")
conn.commit()
async def is_article_processed(self, url):
"""检查文章是否已经处理过"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT url FROM processed_articles WHERE url = ?",
(url,)
)
return cursor.fetchone() is not None
async def add_processed_article(self, url, title, content_type=None, optimized_title=None):
"""添加已处理的文章记录,但未发布状态"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""INSERT INTO processed_articles
(url, title, processed_time, content_type, optimized_title, published)
VALUES (?, ?, ?, ?, ?, ?)""",
(url, title, datetime.now(), content_type, optimized_title, False)
)
conn.commit()
async def mark_article_published(self, url):
"""将文章标记为已发布状态"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"UPDATE processed_articles SET published = ? WHERE url = ?",
(True, url)
)
conn.commit()
async def cleanup_old_records(self, days=7):
"""清理指定天数之前的记录"""
cleanup_date = datetime.now() - timedelta(days=days)
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"DELETE FROM processed_articles WHERE processed_time < ?",
(cleanup_date,)
)
conn.commit()
print(f"已清理 {days} 天前的数据库记录")
async def get_recent_articles(self, limit=10):
"""获取最近处理的文章列表"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"""SELECT url, title, processed_time, content_type, optimized_title
FROM processed_articles
ORDER BY processed_time DESC
LIMIT ?""",
(limit,)
)
return cursor.fetchall()
async def cleanup_article_images(self, url, image_dir='images'):
"""清理已处理文章的图片"""
try:
# 检查文章是否存在于数据库中
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute(
"SELECT 1 FROM processed_articles WHERE url = ?",
(url,)
)
result = cursor.fetchone()
if result:
# 文章存在于数据库中,清理相关图片
article_hash = str(hash(url))
image_path = os.path.join(image_dir, f"{article_hash}*")
# 使用glob模块查找匹配的文件
import glob
matching_files = glob.glob(image_path)
# 删除找到的所有匹配文件
for file_path in matching_files:
try:
if os.path.exists(file_path):
os.remove(file_path)
print(f"已删除图片:{file_path}")
except Exception as e:
print(f"删除图片文件时出错:{file_path} - {str(e)}")
if not matching_files:
print(f"未找到与URL对应的图片文件{url}")
except Exception as e:
print(f"清理图片时出错:{str(e)}")