117 lines
4.7 KiB
Python
117 lines
4.7 KiB
Python
import sqlite3
|
||
import os
|
||
from datetime import datetime, timedelta
|
||
|
||
class DatabaseManager:
|
||
def __init__(self, db_path='articles.db'):
|
||
self.db_path = db_path
|
||
self._init_db()
|
||
|
||
def _init_db(self):
|
||
"""初始化数据库,创建必要的表"""
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
CREATE TABLE IF NOT EXISTS processed_articles (
|
||
url TEXT PRIMARY KEY,
|
||
title TEXT,
|
||
processed_time TIMESTAMP,
|
||
content_type TEXT,
|
||
optimized_title TEXT,
|
||
published BOOLEAN DEFAULT 0
|
||
)
|
||
""")
|
||
conn.commit()
|
||
|
||
async def is_article_processed(self, url):
|
||
"""检查文章是否已经处理过"""
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"SELECT url FROM processed_articles WHERE url = ?",
|
||
(url,)
|
||
)
|
||
return cursor.fetchone() is not None
|
||
|
||
async def add_processed_article(self, url, title, content_type=None, optimized_title=None):
|
||
"""添加已处理的文章记录,但未发布状态"""
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"""INSERT INTO processed_articles
|
||
(url, title, processed_time, content_type, optimized_title, published)
|
||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||
(url, title, datetime.now(), content_type, optimized_title, False)
|
||
)
|
||
conn.commit()
|
||
|
||
async def mark_article_published(self, url):
|
||
"""将文章标记为已发布状态"""
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"UPDATE processed_articles SET published = ? WHERE url = ?",
|
||
(True, url)
|
||
)
|
||
conn.commit()
|
||
|
||
async def cleanup_old_records(self, days=7):
|
||
"""清理指定天数之前的记录"""
|
||
cleanup_date = datetime.now() - timedelta(days=days)
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"DELETE FROM processed_articles WHERE processed_time < ?",
|
||
(cleanup_date,)
|
||
)
|
||
conn.commit()
|
||
print(f"已清理 {days} 天前的数据库记录")
|
||
|
||
async def get_recent_articles(self, limit=10):
|
||
"""获取最近处理的文章列表"""
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"""SELECT url, title, processed_time, content_type, optimized_title
|
||
FROM processed_articles
|
||
ORDER BY processed_time DESC
|
||
LIMIT ?""",
|
||
(limit,)
|
||
)
|
||
return cursor.fetchall()
|
||
|
||
async def cleanup_article_images(self, url, image_dir='images'):
|
||
"""清理已处理文章的图片"""
|
||
try:
|
||
# 检查文章是否存在于数据库中
|
||
with sqlite3.connect(self.db_path) as conn:
|
||
cursor = conn.cursor()
|
||
cursor.execute(
|
||
"SELECT 1 FROM processed_articles WHERE url = ?",
|
||
(url,)
|
||
)
|
||
result = cursor.fetchone()
|
||
|
||
if result:
|
||
# 文章存在于数据库中,清理相关图片
|
||
article_hash = str(hash(url))
|
||
image_path = os.path.join(image_dir, f"{article_hash}*")
|
||
|
||
# 使用glob模块查找匹配的文件
|
||
import glob
|
||
matching_files = glob.glob(image_path)
|
||
|
||
# 删除找到的所有匹配文件
|
||
for file_path in matching_files:
|
||
try:
|
||
if os.path.exists(file_path):
|
||
os.remove(file_path)
|
||
print(f"已删除图片:{file_path}")
|
||
except Exception as e:
|
||
print(f"删除图片文件时出错:{file_path} - {str(e)}")
|
||
|
||
if not matching_files:
|
||
print(f"未找到与URL对应的图片文件:{url}")
|
||
|
||
except Exception as e:
|
||
print(f"清理图片时出错:{str(e)}") |