Ubuntu 下用 Fortran 做文本分析的实用路线
一 环境准备与编译运行
sudo apt update && sudo apt install gfortrangfortran -O3 -o text_ana text_ana.f90(开启 -O3 优化以提速)./text_ana二 基础文本读写与行级解析
open 打开文件(如 newunit= 获取可用单元号),逐行 read 到定长字符串。index 查找分隔符位置,循环切分得到“词”数组。read 将“词”转换为数值;对每行做 trim 去除两端空白。program word_split_sum
implicit none
character(len=:), allocatable :: line
character(len=256), allocatable :: toks(:)
integer :: i, n, ios, ntok
real :: x, sum_x, mean_x
integer, parameter :: max_tok = 256
open(newunit=10, file='data.txt', status='old', action='read', iostat=ios)
if (ios /= 0) stop 'Cannot open data.txt'
sum_x = 0.0; n = 0
allocate(toks(max_tok))
do
read(10, '(A)', iostat=ios) line
if (ios /= 0) exit
line = trim(line)
! 简单空白切分
ntok = 0
do i = 1, len_trim(line)
if (line(i:i) /= ' ' .and. line(i:i) /= char(9)) then
if (ntok == 0 .or. index(' '//char(9), line(i:i)) /= 0) then
ntok = ntok + 1
toks(ntok) = ''
end if
toks(ntok) = trim(toks(ntok)) // line(i:i)
end if
end do
! 转数值并累计
do i = 1, ntok
read(toks(i), *, iostat=ios) x
if (ios == 0) then
sum_x = sum_x + x
n = n + 1
end if
end do
end do
close(10)
if (n > 0) then
mean_x = sum_x / n
print '("Count=",I0," Sum=",F12.6," Mean=",F12.6)', n, sum_x, mean_x
else
print *, 'No numbers found.'
end if
deallocate(toks)
end program word_split_sum
index(string, substring, back) 用于定位子串位置,便于按分隔符切分。iachar 判断可打印字符范围,可快速实现“去两端空白/可见字符边界”的实用函数。三 进阶处理与常用算法
ftlRegex(对 POSIX/PCRE 的封装)。sudo apt-get install libpcre3-dev,随后在 Fortran 中通过 iso_c_binding 调用其 C 接口完成匹配/提取。index 或“有限状态机”解析;CSV 需处理引号内逗号与转义。rewind、backspace 控制文件指针。四 实用建议与排错要点
index 足够高效。