1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
import re from datetime import datetime from collections import defaultdict import os import argparse import sys
def parse_kindle_date(date_str): """万能日期解析器""" try: date_str = (date_str.replace("星期一", "Monday") .replace("星期二", "Tuesday") .replace("星期三", "Wednesday") .replace("星期四", "Thursday") .replace("星期五", "Friday") .replace("星期六", "Saturday") .replace("星期日", "Sunday") .replace("星期天", "Sunday") .replace("上午", "AM") .replace("下午", "PM")) return datetime.strptime(date_str, "%Y年%m月%d日%A %p%I:%M:%S") except Exception as e: print(f"⚠️ 日期解析异常({date_str}),使用当前时间替代 | 错误:{str(e)}") return datetime.now()
def parse_clippings_file(file_path): """解析Kindle剪贴文件""" try: for encoding in ['utf-8-sig', 'utf-16', 'gb18030']: try: with open(file_path, 'r', encoding=encoding) as f: content = f.read() break except UnicodeDecodeError: continue else: raise ValueError("无法确定文件编码") content = content.replace('\ufeff', '').replace('\r\n', '\n') entries = [e.strip() for e in content.split('==========') if e.strip()] clippings = [] pattern = re.compile( r'^(.*?)\n- 您在第 (\d+) 页(位置 #([\d-]+))的(标注|笔记) \| 添加于 (.*?)\n\n(.*)$', re.DOTALL ) for entry in entries: match = pattern.match(entry) if match: title, page, location, clip_type, date_str, content = match.groups() clippings.append({ 'title': title.strip(), 'type': '标注' if '标注' in clip_type else '笔记', 'page': page.strip(), 'location': location.strip(), 'date': parse_kindle_date(date_str.strip()), 'content': content.strip() }) return clippings except Exception as e: print(f"❌ 文件解析失败:{str(e)}") sys.exit(1)
def generate_markdown(clippings, output_path): """生成Markdown文件""" os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True) clippings.sort(key=lambda x: x['date'], reverse=True) books = defaultdict(list) book_last_date = {} for clip in clippings: books[clip['title']].append(clip) if clip['title'] not in book_last_date or clip['date'] > book_last_date[clip['title']]: book_last_date[clip['title']] = clip['date'] sorted_books = sorted(books.items(), key=lambda item: book_last_date[item[0]], reverse=True) with open(output_path, 'w', encoding='utf-8') as f: f.write("# 📚 Kindle读书笔记(按最后阅读时间排序)\n\n") f.write(f"> 导出时间:{datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n") f.write("## 📋 书籍目录(从新到旧)\n") for i, (title, _) in enumerate(sorted_books, 1): safe_title = re.sub(r'[^\w\s-]', '', title) last_date = book_last_date[title].strftime('%Y-%m-%d') f.write(f"{i}. [{title}](#{safe_title.lower().replace(' ', '-')}) *(最后标注:{last_date})*\n") f.write("\n---\n\n") for title, clips in sorted_books: safe_title = re.sub(r'[^\w\s-]', '', title) f.write(f'<a id="{safe_title.lower().replace(" ", "-")}"></a>\n') f.write(f"## 📖 {title}\n") f.write(f"*最后标注时间:{book_last_date[title].strftime('%Y-%m-%d %H:%M')}*\n\n") for clip in clips: icon = '🔖' if clip['type'] == '标注' else '✏️' f.write(f"### {icon} {clip['type']}(第{clip['page']}页·位置 {clip['location']})\n") if clip['type'] == '标注': f.write(f"> {clip['content']}\n") else: f.write(f"{clip['content']}\n") f.write(f"\n*标注时间:{clip['date'].strftime('%Y-%m-%d %H:%M')}*\n") f.write("---\n\n")
def main(): parser = argparse.ArgumentParser(description='Kindle剪贴转换终极版') parser.add_argument('input_file', help='My Clippings.txt文件路径') parser.add_argument('--output', '-o', default='Kindle_Notes.md', help='输出文件路径') args = parser.parse_args() print("⏳ 正在解析Kindle剪贴文件...") clippings = parse_clippings_file(args.input_file) print(f"✅ 找到 {len(clippings)} 条笔记(来自 {len(set(c['title'] for c in clippings))} 本书)") print("🔄 正在生成Markdown文件...") generate_markdown(clippings, args.output) print(f"🎉 转换完成:{os.path.abspath(args.output)}")
if __name__ == '__main__': main()
|