repo_diff.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. package git
  2. import (
  3. "bufio"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "strconv"
  9. "strings"
  10. "time"
  11. )
  12. // DiffLineType represents the type of a line in diff.
  13. type DiffLineType uint8
  14. const (
  15. DIFF_LINE_PLAIN DiffLineType = iota + 1
  16. DIFF_LINE_ADD
  17. DIFF_LINE_DEL
  18. DIFF_LINE_SECTION
  19. )
  20. // DiffFileType represents the file status in diff.
  21. type DiffFileType uint8
  22. const (
  23. DIFF_FILE_ADD DiffFileType = iota + 1
  24. DIFF_FILE_CHANGE
  25. DIFF_FILE_DEL
  26. DIFF_FILE_RENAME
  27. )
  28. // DiffLine represents a line in diff.
  29. type DiffLine struct {
  30. LeftIdx int
  31. RightIdx int
  32. Type DiffLineType
  33. Content string
  34. }
  35. func (d *DiffLine) GetType() int {
  36. return int(d.Type)
  37. }
  38. // DiffSection represents a section in diff.
  39. type DiffSection struct {
  40. Name string
  41. Lines []*DiffLine
  42. }
  43. // Line returns a specific line by type (add or del) and file line number from a section.
  44. func (diffSection *DiffSection) Line(lineType DiffLineType, idx int) *DiffLine {
  45. var (
  46. difference = 0
  47. addCount = 0
  48. delCount = 0
  49. matchDiffLine *DiffLine
  50. )
  51. LOOP:
  52. for _, diffLine := range diffSection.Lines {
  53. switch diffLine.Type {
  54. case DIFF_LINE_ADD:
  55. addCount++
  56. case DIFF_LINE_DEL:
  57. delCount++
  58. default:
  59. if matchDiffLine != nil {
  60. break LOOP
  61. }
  62. difference = diffLine.RightIdx - diffLine.LeftIdx
  63. addCount = 0
  64. delCount = 0
  65. }
  66. switch lineType {
  67. case DIFF_LINE_DEL:
  68. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  69. matchDiffLine = diffLine
  70. }
  71. case DIFF_LINE_ADD:
  72. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  73. matchDiffLine = diffLine
  74. }
  75. }
  76. }
  77. if addCount == delCount {
  78. return matchDiffLine
  79. }
  80. return nil
  81. }
  82. // DiffFile represents a file in diff.
  83. type DiffFile struct {
  84. Name string
  85. OldName string
  86. Index string // 40-byte SHA, Changed/New: new SHA; Deleted: old SHA
  87. Addition, Deletion int
  88. Type DiffFileType
  89. IsCreated bool
  90. IsDeleted bool
  91. IsBin bool
  92. IsRenamed bool
  93. IsSubmodule bool
  94. Sections []*DiffSection
  95. IsIncomplete bool
  96. }
  97. func (diffFile *DiffFile) GetType() int {
  98. return int(diffFile.Type)
  99. }
  100. func (diffFile *DiffFile) NumSections() int {
  101. return len(diffFile.Sections)
  102. }
  103. // Diff contains all information of a specific diff output.
  104. type Diff struct {
  105. TotalAddition, TotalDeletion int
  106. Files []*DiffFile
  107. IsIncomplete bool
  108. }
  109. func (diff *Diff) NumFiles() int {
  110. return len(diff.Files)
  111. }
  112. const _DIFF_HEAD = "diff --git "
  113. // ParsePatch takes a reader and parses everything it receives in diff format.
  114. func ParsePatch(done chan<- error, maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) *Diff {
  115. var (
  116. diff = &Diff{Files: make([]*DiffFile, 0)}
  117. curFile *DiffFile
  118. curSection = &DiffSection{
  119. Lines: make([]*DiffLine, 0, 10),
  120. }
  121. leftLine, rightLine int
  122. lineCount int
  123. curFileLinesCount int
  124. )
  125. input := bufio.NewReader(reader)
  126. isEOF := false
  127. for !isEOF {
  128. // TODO: would input.ReadBytes be more memory-efficient?
  129. line, err := input.ReadString('\n')
  130. if err != nil {
  131. if err == io.EOF {
  132. isEOF = true
  133. } else {
  134. done <- fmt.Errorf("ReadString: %v", err)
  135. return nil
  136. }
  137. }
  138. if len(line) > 0 && line[len(line)-1] == '\n' {
  139. // Remove line break.
  140. line = line[:len(line)-1]
  141. }
  142. if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 {
  143. continue
  144. }
  145. curFileLinesCount++
  146. lineCount++
  147. // Diff data too large, we only show the first about maxlines lines
  148. if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres {
  149. curFile.IsIncomplete = true
  150. }
  151. switch {
  152. case line[0] == ' ':
  153. diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine}
  154. leftLine++
  155. rightLine++
  156. curSection.Lines = append(curSection.Lines, diffLine)
  157. continue
  158. case line[0] == '@':
  159. curSection = &DiffSection{}
  160. curFile.Sections = append(curFile.Sections, curSection)
  161. ss := strings.Split(line, "@@")
  162. diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line}
  163. curSection.Lines = append(curSection.Lines, diffLine)
  164. // Parse line number.
  165. ranges := strings.Split(ss[1][1:], " ")
  166. leftLine, _ = strconv.Atoi(strings.Split(ranges[0], ",")[0][1:])
  167. if len(ranges) > 1 {
  168. rightLine, _ = strconv.Atoi(strings.Split(ranges[1], ",")[0])
  169. } else {
  170. rightLine = leftLine
  171. }
  172. continue
  173. case line[0] == '+':
  174. curFile.Addition++
  175. diff.TotalAddition++
  176. diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine}
  177. rightLine++
  178. curSection.Lines = append(curSection.Lines, diffLine)
  179. continue
  180. case line[0] == '-':
  181. curFile.Deletion++
  182. diff.TotalDeletion++
  183. diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine}
  184. if leftLine > 0 {
  185. leftLine++
  186. }
  187. curSection.Lines = append(curSection.Lines, diffLine)
  188. case strings.HasPrefix(line, "Binary"):
  189. curFile.IsBin = true
  190. continue
  191. }
  192. // Get new file.
  193. if strings.HasPrefix(line, _DIFF_HEAD) {
  194. middle := -1
  195. // Note: In case file name is surrounded by double quotes (it happens only in git-shell).
  196. // e.g. diff --git "a/xxx" "b/xxx"
  197. hasQuote := line[len(_DIFF_HEAD)] == '"'
  198. if hasQuote {
  199. middle = strings.Index(line, ` "b/`)
  200. } else {
  201. middle = strings.Index(line, " b/")
  202. }
  203. beg := len(_DIFF_HEAD)
  204. a := line[beg+2 : middle]
  205. b := line[middle+3:]
  206. if hasQuote {
  207. a = string(UnescapeChars([]byte(a[1 : len(a)-1])))
  208. b = string(UnescapeChars([]byte(b[1 : len(b)-1])))
  209. }
  210. curFile = &DiffFile{
  211. Name: a,
  212. Type: DIFF_FILE_CHANGE,
  213. Sections: make([]*DiffSection, 0, 10),
  214. }
  215. diff.Files = append(diff.Files, curFile)
  216. if len(diff.Files) >= maxFiles {
  217. diff.IsIncomplete = true
  218. io.Copy(ioutil.Discard, reader)
  219. break
  220. }
  221. curFileLinesCount = 0
  222. // Check file diff type and submodule.
  223. CHECK_TYPE:
  224. for {
  225. line, err := input.ReadString('\n')
  226. if err != nil {
  227. if err == io.EOF {
  228. isEOF = true
  229. } else {
  230. done <- fmt.Errorf("ReadString: %v", err)
  231. return nil
  232. }
  233. }
  234. switch {
  235. case strings.HasPrefix(line, "new file"):
  236. curFile.Type = DIFF_FILE_ADD
  237. curFile.IsCreated = true
  238. curFile.IsSubmodule = strings.HasSuffix(line, " 160000\n")
  239. case strings.HasPrefix(line, "deleted"):
  240. curFile.Type = DIFF_FILE_DEL
  241. curFile.IsDeleted = true
  242. curFile.IsSubmodule = strings.HasSuffix(line, " 160000\n")
  243. case strings.HasPrefix(line, "index"):
  244. if curFile.IsDeleted {
  245. curFile.Index = line[6:46]
  246. } else if len(line) >= 88 {
  247. curFile.Index = line[49:88]
  248. } else {
  249. curFile.Index = curFile.Name
  250. }
  251. break CHECK_TYPE
  252. case strings.HasPrefix(line, "similarity index 100%"):
  253. curFile.Type = DIFF_FILE_RENAME
  254. curFile.IsRenamed = true
  255. curFile.OldName = curFile.Name
  256. curFile.Name = b
  257. curFile.Index = b
  258. break CHECK_TYPE
  259. case strings.HasPrefix(line, "old mode"):
  260. break CHECK_TYPE
  261. }
  262. }
  263. }
  264. }
  265. done <- nil
  266. return diff
  267. }
  268. // GetDiffRange returns a parsed diff object between given commits.
  269. func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  270. repo, err := OpenRepository(repoPath)
  271. if err != nil {
  272. return nil, err
  273. }
  274. commit, err := repo.GetCommit(afterCommitID)
  275. if err != nil {
  276. return nil, err
  277. }
  278. cmd := NewCommand()
  279. if len(beforeCommitID) == 0 {
  280. // First commit of repository
  281. if commit.ParentCount() == 0 {
  282. cmd.AddArguments("show", "--full-index", afterCommitID)
  283. } else {
  284. c, _ := commit.Parent(0)
  285. cmd.AddArguments("diff", "--full-index", "-M", c.ID.String(), afterCommitID)
  286. }
  287. } else {
  288. cmd.AddArguments("diff", "--full-index", "-M", beforeCommitID, afterCommitID)
  289. }
  290. stdout, w := io.Pipe()
  291. done := make(chan error)
  292. var diff *Diff
  293. go func() {
  294. diff = ParsePatch(done, maxLines, maxLineCharacteres, maxFiles, stdout)
  295. }()
  296. stderr := new(bytes.Buffer)
  297. err = cmd.RunInDirTimeoutPipeline(2*time.Minute, repoPath, w, stderr)
  298. w.Close() // Close writer to exit parsing goroutine
  299. if err != nil {
  300. return nil, concatenateError(err, stderr.String())
  301. }
  302. return diff, <-done
  303. }
  304. // RawDiffType represents the type of raw diff format.
  305. type RawDiffType string
  306. const (
  307. RAW_DIFF_NORMAL RawDiffType = "diff"
  308. RAW_DIFF_PATCH RawDiffType = "patch"
  309. )
  310. // GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
  311. func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
  312. repo, err := OpenRepository(repoPath)
  313. if err != nil {
  314. return fmt.Errorf("OpenRepository: %v", err)
  315. }
  316. commit, err := repo.GetCommit(commitID)
  317. if err != nil {
  318. return err
  319. }
  320. cmd := NewCommand()
  321. switch diffType {
  322. case RAW_DIFF_NORMAL:
  323. if commit.ParentCount() == 0 {
  324. cmd.AddArguments("show", commitID)
  325. } else {
  326. c, _ := commit.Parent(0)
  327. cmd.AddArguments("diff", "-M", c.ID.String(), commitID)
  328. }
  329. case RAW_DIFF_PATCH:
  330. if commit.ParentCount() == 0 {
  331. cmd.AddArguments("format-patch", "--no-signature", "--stdout", "--root", commitID)
  332. } else {
  333. c, _ := commit.Parent(0)
  334. query := fmt.Sprintf("%s...%s", commitID, c.ID.String())
  335. cmd.AddArguments("format-patch", "--no-signature", "--stdout", query)
  336. }
  337. default:
  338. return fmt.Errorf("invalid diffType: %s", diffType)
  339. }
  340. stderr := new(bytes.Buffer)
  341. if err = cmd.RunInDirPipeline(repoPath, writer, stderr); err != nil {
  342. return concatenateError(err, stderr.String())
  343. }
  344. return nil
  345. }
  346. // GetDiffCommit returns a parsed diff object of given commit.
  347. func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  348. return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacteres, maxFiles)
  349. }