基于streamlit与olmOCR大模型实现的pdf提取工具
import os
import json
import subprocess
import pandas as pd
from pathlib import Path
import shutil
import time
import re
import streamlit as st# 创建工作目录
WORKSPACE_DIR "olmocr_workspace"
os.maked…
清洗VOC格式数据集代码示例
import os
import xml.etree.ElementTree as ETdef process_annotations(image_folder, annotation_folder):# 遍历标签文件夹中的所有XML文件for xml_file in os.listdir(annotation_folder):if not xml_file.endswith(.xml):continuexml_path os…