-- | This module contains functions for reading the GSE one- and
-- two-electron integral data from a file, converting this data from
-- spatial to spin indices, and accessing the data.
-- 
-- The external interface consists of the type 'GSEData' and the
-- function 'load_gse_data'.
-- 
-- The Quipper distribution contains example data files
-- \"@h_1e_ascii@\" and \"@h_2e_ascii@\". These files contain enough
-- data for /M/ = 32 spin orbitals (corresponding to /M/\/2 = 16
-- spatial orbitals). Note that the example data was randomly
-- generated and is only a mock-up. In actual applications, physically
-- meaningful data should be substituted.

module Algorithms.GSE.GSEData where

import Data.Array
import Data.Bits
import Data.Char

-- * Data abstraction
  
-- | A data structure describing the GSE Data - the number
-- of integrals and the functions to access the data by index.
data GSEData = GSEData {
  
  -- | The number of spin orbitals /M/.
  gse_data_M :: Int,
  
  -- | 1-electron integrals /h/[sub p,q] in spin coordinates.
  gse_data_h1 :: (Int,Int) -> Double,
  
  -- | 2-electron integrals /h/[sub p,q,r,s] in spin coordinates.
  -- Follows the physics convention for the ordering of indices.
  gse_data_h2 :: (Int,Int,Int,Int) -> Double
  
  }

instance Show (GSEData) where
    show a = "GSEData { size = " ++ show (gse_data_M a) ++ " }"

-- ----------------------------------------------------------------------
-- * Reading GSE data from files

-- $ This section provides function for reading one- and two-electron
-- GSE data from files. The file formats are as follows. The file for
-- the one-electron data consists of lines of the form:
-- 
-- > ((i, j), h)
-- 
-- where /i/ and /j/ are integer indices in the range from /0/ to
-- /M/−1, and /h/ = /h/[sub i,j] is a real floating point number.
-- Please note that the file contains data for (/i/, /j/) and
-- (/j/, /i/), and that the indices /i/ and /j/ are in /spatial/
-- coordinates. The file data is sorted in order of increasing /i/,
-- then /j/.
-- 
-- The file for the two-electron data consists of lines of the form:
-- 
-- > ((i, j, k, l), h)
-- 
-- where /i/, /j/, /k/, and /l/ are integer indices in the range from
-- /0/ to /M/−1, and /h/ = /h/[sub i,k,l,j] is a real floating point
-- number. Please note that the indices /i/, /j/, /k/, and /l/ are in
-- /spatial/ coordinates, and the ordering of indices in the file
-- follows the /chemists'/ convention. Also, to save storage space,
-- the file only contains data for /i/ ≥ /j/, /k/ ≥ /l/, and either
-- /i/ > /k/, or /i/ = /k/ and /j/ ≥ /l/. The remaining data must be
-- inferred from symmetries. The file data is sorted in order of
-- increasing /i/, then /j/, then /k/, then /l/.
-- 
-- We also note that the data files, and the functions of this module
-- where noted, are the /only/ places where we use Chemists' notation
-- and spatial orbitals. The remainder of our implementation uses
-- physicists' notation and spin orbitals throughout.

-- | Read the 'GSEData' from two files. The first argument is /M/, the
-- number of spin orbitals. The second and third argument are the
-- filenames for the one-electron and two-electron data, respectively.
-- 
-- If the file contains data for more than /M/ spin orbitals, ignore
-- the excess data (this is useful for generating smaller problem
-- sizes for testing). In this case, only the necessary portion of the
-- file is read. If the file contains data for fewer than /M/ spin
-- orbitals, this is silently ignored, but will lead to an
-- \"undefined\" error later.
load_gse_data :: Int -> String -> String -> IO GSEData
load_gse_data size filename1 filename2 = do
  content1 <- readFile filename1
  content2 <- readFile filename2
  let spatial_size = (size + 1) `div` 2
  let spacial_data1 = parsefile1 spatial_size content1
  let spacial_data2 = parsefile2 spatial_size content2
  return (GSEData {
             gse_data_M = size,
             gse_data_h1 = spin1 $ access_1e spacial_data1,
             gse_data_h2 = spin2 $ access_2e spacial_data2
             })

-- ----------------------------------------------------------------------
-- * Low-level access functions

-- | Access 1-electron integral data. The indices are spatial, i.e.,
-- they run from 0 to /M/\/2 − 1.
access_1e :: Array (Int, Int) e -> (Int, Int) -> e
access_1e arr tuple = arr ! tuple

-- | Access 2-electron integral data. The input array is sparse (i.e.,
-- contains only one representative of each equivalence class), and
-- uses chemists' conventions. The output uses physicists'
-- conventions. The indices in both input and output are spatial,
-- i.e., they run from 0 to /M/\/2 − 1. 
access_2e :: Array (Int, Int, Int, Int) e -> (Int, Int, Int, Int) -> e
access_2e arr (i,k,l,j) = 
    -- The indices are not in correct order on purpose.  We 
    -- need to express the fact that h_prsq = h[pq|rs] = h[p,q,r,s] = h[i,j,k,l]
    arr ! (swap_ijkl $ swap_kl $ swap_ij (i,j,k,l))
        
    -- Note that because of symmetries, we have
        --   h2(i,j,k,l) = h2(j,i,k,l)
        --   h2(i,j,k,l) = h2(i,j,l,k)
        --   h2(i,j,k,l) = h2(k,l,i,j)
        -- For this reason, and to save space, the file only contains one 
        -- representative of each equivalence class.
        where
                swap_ij   (i,j,k,l) = if (i < j)                            then (j,i,k,l) else (i,j,k,l)
                swap_kl   (i,j,k,l) = if (k < l)                            then (i,j,l,k) else (i,j,k,l)
                swap_ijkl (i,j,k,l) = if ((i < k) || ((i == k) && (j < l))) then (k,l,i,j) else (i,j,k,l)

-- ----------------------------------------------------------------------
-- * Low-level parsing functions

-- | Decide whether a string is a comment. A comment is a line with
-- only whitespace characters, or where the first non-whitespace
-- character is \'\#\'.
is_comment :: String -> Bool
is_comment [] = True
is_comment ('#':t) = True
is_comment (h:t)
  | isSpace h = is_comment t
  | otherwise = False

-- | Extract an array from the one-electron file data. We do this
-- lazily, i.e., we stop reading as soon as enough data is found.
-- The resulting array uses spatial indices.
parsefile1 :: Int -> String -> Array (Int, Int) Double 
parsefile1 size content = 
  array ((0,0), (n, n)) list3
    where
      n = size-1
      list1 = [ read_line_h1 s | s <- lines content, not (is_comment s) ]
      list2 = takeWhile (\((i,j),h) -> i<size) list1
      list3 = filter in_range list2
      in_range ((i,j),h) = i<size && j<size

      read_line_h1 :: String -> ((Int,Int), Double)
      read_line_h1 s = case reads s of
        [(x, "")] -> x
        _ -> error ("Illegal line: " ++ s ++ " -- expected format ((int, int), double)")

-- | Extract an array from the two-electron file data. We do this
-- lazily, i.e., we stop reading as soon as enough data is found.  The
-- resulting array uses spatial indices in chemists' notation. Also,
-- the output array is sparse; it only contains as much data as the
-- file itself.
parsefile2 :: Int -> String -> Array (Int, Int, Int, Int) Double
parsefile2 size content =
  array ((0,0,0,0), (n,n,n,n)) list3
    where
      n = size-1
      list1 = [ read_line_h2 s | s <- lines content, not (is_comment s) ]
      list2 = takeWhile (\((i,j,k,l),h) -> i<size) list1
      list3 = filter in_range list2
      in_range ((i,j,k,l),h) = i<size && j<size && k<size && l<size

      read_line_h2 :: String -> ((Int,Int,Int,Int), Double)
      read_line_h2 s = case reads s of
        [(x, "")] -> x
        _ -> error ("Illegal line: " ++ s ++ " -- expected format ((int, int, int, int), double)")

-- ----------------------------------------------------------------------
-- * Conversion of spin to spatial indices

-- | In the molecule we have twice as many orbitals (spin orbitals)
-- than data in the integral file (spatial orbitals). This function
-- converts /h[sub 1]/ from spatial-orbitals (/M/\/2 = 104) to spin
-- orbitals (/M/ = 208).
-- 
-- Spin orbitals are indexed by /p/=(/i/, σ/[sub i]/), where /i/ is a spatial
-- index and σ/[sub i]/ is a spin (up or down). For two spin indices
-- /p/=(/i/, σ/[sub i]/) and /q/=(/j/, σ/[sub j]/), the transition integral
-- h[sub pq] is given by the following formula:
-- 
-- \[image spin1.png]
-- 
-- The Hamiltonian vanishes for σ/[sub i]/ ≠ σ/[sub j]/ because we
-- assume that there is no spin orbital coupling.
-- 
-- Given /M/\/2 spatial orbitals, we re-map the spin orbitals to
-- integers from 0 to /M/−1 using the formula /p/ = 2/i/+σ/[sub i]/,
-- where σ[sub i] is 0 or 1.
-- 
-- The function 'spin1' inputs (/h[sub ij]/), the table of 1-electron
-- integrals for /M/\/2 spatial orbitals, and outputs the
-- corresponding table (/h[sub pq]/) for /M/ spin orbitals.

spin1 :: ((Int,Int) -> Double) -> ((Int,Int) -> Double)
spin1 h1 (p,q) =
  if sigma_i == sigma_j
  then h1 (i,j) 
  else 0.0
    where 
      sigma_i = p .&. 1
      i = p `div` 2
      sigma_j = q .&. 1
      j = q `div` 2

-- | Like 'spin1', but for 2-electron integrals. Here, the transition
-- integrals in spin coordinates are given by:
-- 
-- \[image spin2.png]
-- 
-- The Hamiltonian vanishes for σ/[sub i]/ ≠ σ/[sub l]/ or σ/[sub j]/
-- ≠ σ/[sub k]/ because we assume that there is no spin orbital
-- coupling.
-- 
-- The function 'spin2' inputs (/h[sub ijkl]/), the table of
-- 2-electron transition amplitudes for /M/\/2 spatial orbitals, and
-- outputs the corresponding table (/h[sub pqrs]/) for /M/ spin
-- orbitals. Index ordering follows the physicists' convention.

spin2 :: ((Int, Int, Int, Int) -> Double) -> ((Int, Int, Int, Int) -> Double)
spin2 h2 (p,q,r,s) =
  if sigma_i == sigma_l && sigma_j == sigma_k
  then h2 (i,j,k,l)
  else 0.0
    where
      sigma_i = p .&. 1
      i = p `div` 2
      sigma_j = q .&. 1
      j = q `div` 2
      sigma_k = r .&. 1
      k = r `div` 2
      sigma_l = s .&. 1
      l = s `div` 2

-- * Testing

-- | Print the /h/[sub 1] data for 1-electron integrals.
print_1e :: GSEData -> String
print_1e gse_data = unlines $ [ inner_print i j | i <- list, j <- list]
        where list = [0..m-1]
              inner_print i j = show (i,j) ++ " : " ++ show (h1 (i, j))
              m = gse_data_M gse_data
              h1 = gse_data_h1 gse_data

-- | Print the /h/[sub 2] data for 2-electron integrals.
print_2e :: GSEData -> String
print_2e gse_data = unlines $ [ inner_print i j k l | i <- list, j <- list, k <- list, l <- list]
        where list = [0..m-1]
              inner_print i j k l = show (i,j,k,l) ++" : " ++ show (h2 (i, j, k, l))
              m = gse_data_M gse_data
              h2 = gse_data_h2 gse_data

-- | A main function to test the GSEData module.
gse_data_test :: Int -> IO ()           
gse_data_test n = do 
        gse_data <- load_gse_data n "h_1e_ascii" "h_2e_ascii"
        putStr $ print_1e gse_data
        putStr $ print_2e gse_data