import  warnings
from  pathlib import  Path
import  subprocess
from  itertools import  product
import  numpy as  np
import  pandas as  pd
from  MDAnalysis import  Universe
from  openbabel import  pybel
import  os
import  requests  
warnings. filterwarnings( "ignore" ) 
HERE =  Path( os. getcwd( ) ) 
DATA =  HERE /  'data' 
DATA. mkdir( parents= True ,  exist_ok= True ) 
RESULT_CSV =  DATA /  'result_dock.csv' 
if  not  RESULT_CSV. exists( ) : pd. DataFrame( columns= [ 'pdb' ,  'ligand_resnames' ,  'smiles' ,  'dock_info' ] ) . to_csv( RESULT_CSV,  index= False ) class  Structure ( Universe) : """Core object to load structures with.""" @classmethod def  from_string ( cls,  pdb_path) : """Load a structure from a local PDB file.""" return  cls( pdb_path) def  download_pdb ( pdb_id,  save_path) : """从 RCSB PDB 数据库下载 PDB 文件.""" url =  f"https://files.rcsb.org/download/ { pdb_id} .pdb" =  requests. get( url) if  response. status_code ==  200 : with  open ( save_path,  "wb" )  as  file : file . write( response. content) print ( f" { pdb_id}  下载成功." ) else : raise  ValueError( f"无法下载  { pdb_id} :  { response. status_code} " ) 
def  extract_protein_to_pdb ( structure,  protein_path) : """Extract protein from structure and save it as PDB.""" protein =  structure. select_atoms( "protein" ) protein. write( str ( protein_path) ) 
def  find_all_ligand_resnames ( structure) : """返回所有非蛋白质和非水分子的残基名称列表""" ligand_atoms =  structure. select_atoms( "not protein and not resname HOH" ) return  list ( set ( ligand_atoms. resnames) ) def  pdb_to_pdbqt ( pdb_path,  pdbqt_path,  pH= 7.4 ) : """Convert a PDB file to a PDBQT file.""" molecule =  list ( pybel. readfile( "pdb" ,  str ( pdb_path) ) ) [ 0 ] molecule. OBMol. CorrectForPH( pH) molecule. addh( ) for  atom in  molecule. atoms: atom. OBAtom. GetPartialCharge( ) molecule. write( "pdbqt" ,  str ( pdbqt_path) ,  overwrite= True ) def  smiles_to_pdbqt ( smiles,  pdbqt_path,  pH= 7.4 ) : """Convert a SMILES string to a PDBQT file.""" molecule =  pybel. readstring( "smi" ,  smiles) molecule. OBMol. CorrectForPH( pH) molecule. addh( ) molecule. make3D( forcefield= "mmff94s" ,  steps= 10000 ) for  atom in  molecule. atoms: atom. OBAtom. GetPartialCharge( ) molecule. write( "pdbqt" ,  str ( pdbqt_path) ,  overwrite= True ) def  run_smina ( ligand_path,  protein_path,  out_path,  pocket_center,  pocket_size) : """Perform docking with Smina.""" output_text =  subprocess. check_output( [ "smina" , "--receptor" ,  str ( protein_path) , "--ligand" ,  str ( ligand_path) , "--out" ,  str ( out_path) , "--center_x" ,  str ( pocket_center[ 0 ] ) , "--center_y" ,  str ( pocket_center[ 1 ] ) , "--center_z" ,  str ( pocket_center[ 2 ] ) , "--size_x" ,  str ( pocket_size[ 0 ] ) , "--size_y" ,  str ( pocket_size[ 1 ] ) , "--size_z" ,  str ( pocket_size[ 2 ] ) ] ) return  output_text. decode( "utf-8" ) def  parse_dock_info ( dock_output) : """解析 Smina 输出中的对接信息""" dock_lines =  dock_output. splitlines( ) dock_data =  [ ] capture =  False for  line in  dock_lines: if  "mode"  in  line and  "affinity"  in  line:   capture =  True continue if  capture: if  line. strip( )  ==  ""  or  "Refine time"  in  line or  "Loop time"  in  line: break dock_data. append( line. strip( ) ) return  "\n" . join( dock_data) def  split_sdf_file ( sdf_path) : """Split an SDF file into separate files for each molecule.""" sdf_path =  Path( sdf_path) stem =  sdf_path. stemparent =  sdf_path. parentmolecules =  pybel. readfile( "sdf" ,  str ( sdf_path) ) for  i,  molecule in  enumerate ( molecules,  1 ) : molecule. write( "sdf" ,  str ( parent /  f" { stem} _ { i} .sdf" ) ,  overwrite= True ) pdb_list =  pd. read_csv( 'data/pdb.csv' ) [ 'pdb_id' ] 
smiles_list =  pd. read_csv( 'data/pic50_greater_equal_9.0.csv' ) [ 'smiles' ] [ : 20 ] 
for  pdb_id,  smiles in  product( pdb_list,  smiles_list) : pdb_dir =  DATA /  f"data_ { pdb_id} " . mkdir( parents= True ,  exist_ok= True ) pdb_path =  pdb_dir /  f" { pdb_id} .pdb" if  not  pdb_path. exists( ) : print ( f" { pdb_id}  文件不存在,正在下载..." ) download_pdb( pdb_id,  pdb_path) structure =  Structure. from_string( pdb_path) protein_path =  pdb_dir /  "protein.pdb" extract_protein_to_pdb( structure,  protein_path) protein_pdbqt_path =  pdb_dir /  "protein.pdbqt" pdb_to_pdbqt( protein_path,  protein_pdbqt_path) all_ligands =  find_all_ligand_resnames( structure) print ( f" { pdb_id}  - Detected Ligands:  { all_ligands} " ) for  ligand_resname in  all_ligands: ligand_dir =  pdb_dir /  f"ligand_ { ligand_resname} " . mkdir( parents= True ,  exist_ok= True ) ligand =  structure. select_atoms( f"resname  { ligand_resname} " ) print ( f"Processing  { pdb_id}   { smiles}   { ligand_resname} " ) pocket_center =  ( ligand. positions. max ( axis= 0 )  +  ligand. positions. min ( axis= 0 ) )  /  2 pocket_size =  ligand. positions. max ( axis= 0 )  -  ligand. positions. min ( axis= 0 )  +  5 smiles_hash =  smiles. replace( "(" ,  "" ) . replace( ")" ,  "" ) . replace( "=" ,  "" ) . replace( "-" ,  "" ) . replace( "/" , "" ) . replace( "\\" ,  "" ) . replace( "." ,  "" ) . replace( "," ,  "" ) . replace( ":" ,  "" ) smiles_dir =  ligand_dir /  f"smiles_ { smiles_hash} " . mkdir( parents= True ,  exist_ok= True ) ligand_path =  smiles_dir /  "ligand.pdbqt" docking_out_path =  smiles_dir /  "ligand_docking.sdf" log_path =  smiles_dir /  "docking_log.txt" smiles_to_pdbqt( smiles,  ligand_path) docking_output =  run_smina( ligand_path,  protein_pdbqt_path,  docking_out_path,  pocket_center,  pocket_size) with  open ( log_path,  "w" )  as  log_file: log_file. write( f"PDB:  { pdb_id} \nSMILES:  { smiles} \nLigand Resname:  { ligand_resname} \n" ) log_file. write( f"Pocket Center:  { pocket_center} \nPocket Size:  { pocket_size} \n\nDocking Output:\n" ) log_file. write( docking_output) split_sdf_file( docking_out_path) dock_info =  parse_dock_info( docking_output) new_row =  { 'pdb' :  pdb_id,  'ligand_resnames' :  ligand_resname,  'smiles' :  smiles,  'dock_info' :  dock_info} result_df =  pd. read_csv( RESULT_CSV) result_df =  pd. concat( [ result_df,  pd. DataFrame( [ new_row] ) ] ,  ignore_index= True ) result_df. to_csv( RESULT_CSV,  index= False ) print ( "Docking workflow completed successfully!" )