123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- import git
- import os
- from datetime import datetime, timezone
- from neo4j import GraphDatabase
- import scriptBase.base
- # --- Neo4j 連線資訊 ---
- # 請替換為你自己的 Neo4j 伺服器地址、用戶名和密碼
- NEO4J_URI = "bolt://99.12.23.49:7687"
- NEO4J_USER = "neo4j"
- NEO4J_PASSWORD = "1234.cc-"
- # --- 新增功能:清除 Neo4j 資料庫 ---
- def clear_neo4j_database():
- """
- 使用 Cypher 查詢清除 Neo4j 資料庫中的所有節點和關係。
- 警告:此操作不可逆!
- """
- try:
- driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
- with driver.session() as session:
- # Cypher 查詢:DETACH DELETE 會在刪除節點的同時,先刪除與之相關的所有關係
- print("正在清空 Neo4j 資料庫,此操作無法撤銷...")
- session.run("MATCH (n) DETACH DELETE n")
- print("資料庫已成功清空。")
- driver.close()
- except Exception as e:
- print(f"清空 Neo4j 失敗: {e}")
- # --- 核心函數: Git 歷史提取與 Neo4j 匯入 ---
- def import_git_to_neo4j(repo_path, since=None):
- """
- 提取 Git 倉庫的提交歷史,並直接匯入到 Neo4j 圖資料庫中。
- Args:
- repo_path (str): Git 倉庫的本地路徑。
- since (datetime, optional): 獲取提交的起始時間。如果為 None,則全量獲取。
- """
- # 驗證 Git 倉庫路徑
- if not os.path.exists(repo_path) or not git.repo.base.is_git_dir(os.path.join(repo_path, '.git')):
- print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫路徑。")
- return
- try:
- repo = git.Repo(repo_path)
- except git.InvalidGitRepositoryError:
- print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫。請檢查路徑。")
- return
-
- # 連接 Neo4j 資料庫
- try:
- driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
- print("成功連接到 Neo4j 資料庫。")
- except Exception as e:
- print(f"連接 Neo4j 失敗: {e}")
- return
- # 提交事務函數
- def import_transaction(tx, commit_data):
- # 創建或匹配作者節點
- tx.run("""
- MERGE (a:Author {email: $author_email})
- ON CREATE SET a.name = $author_name
- """, author_email=commit_data['author_email'], author_name=commit_data['author_name'])
-
- # 創建提交節點
- tx.run("""
- MERGE (c:Commit {id: $commit_id})
- ON CREATE SET c.message = $message, c.timestamp = $timestamp
- """, commit_id=commit_data['commit_id'], message=commit_data['commit_message'], timestamp=commit_data['committed_datetime'])
-
- # 創建作者與提交之間的關係
- tx.run("""
- MATCH (a:Author {email: $author_email})
- MATCH (c:Commit {id: $commit_id})
- MERGE (a)-[:AUTHORED]->(c)
- """, author_email=commit_data['author_email'], commit_id=commit_data['commit_id'])
- # 創建父提交關係
- if commit_data['parent_id']:
- tx.run("""
- MATCH (parent:Commit {id: $parent_id})
- MATCH (child:Commit {id: $child_id})
- MERGE (child)-[:PARENT]->(parent)
- """, parent_id=commit_data['parent_id'], child_id=commit_data['commit_id'])
- # 創建文件變更關係
- for file_change in commit_data['file_changes']:
- # 創建或匹配文件節點
- tx.run("""
- MERGE (f:File {path: $file_path})
- """, file_path=file_change['new_path'])
-
- # 創建提交與文件之間的修改關係
- tx.run("""
- MATCH (c:Commit {id: $commit_id})
- MATCH (f:File {path: $file_path})
- MERGE (c)-[:CHANGED {
- change_type: $change_type,
- lines_added: $lines_added,
- lines_deleted: $lines_deleted
- }]->(f)
- """,
- commit_id=commit_data['commit_id'],
- file_path=file_change['new_path'],
- change_type=file_change['change_type'],
- lines_added=file_change['lines_added'],
- lines_deleted=file_change['lines_deleted'])
- # 根據 'since' 參數選擇提交迭代器
- if since:
- print(f"正在進行增量匯入,從 {since.isoformat()} 開始...")
- since_utc = since.astimezone(timezone.utc)
- commits = repo.iter_commits('--all', after=since_utc)
- else:
- print("正在進行全量匯入...")
- commits = repo.iter_commits('--all')
-
- with driver.session() as session:
- for commit in commits:
- # 排除合併提交
- if len(commit.parents) > 1:
- continue
- file_changes = []
- try:
- diff_index = commit.diff(commit.parents[0] if commit.parents else git.NULL_TREE, create_patch=True)
- for diff in diff_index:
- diff_text = diff.diff.decode('utf-8', errors='ignore')
- lines_added = diff_text.count('\n+') - diff_text.count('\n+++')
- lines_deleted = diff_text.count('\n-') - diff_text.count('\n---')
- change_type = diff.change_type if diff.change_type is not None else ""
- old_path = diff.a_path if diff.a_path is not None else ""
- new_path = diff.b_path if diff.b_path is not None else ""
-
- appendStr = {
- "change_type": change_type,
- "old_path": old_path,
- "new_path": new_path,
- "lines_added": lines_added,
- "lines_deleted": lines_deleted,
- }
- file_changes.append(appendStr)
- except Exception as e:
- print(f"警告: 處理提交 {commit.hexsha} 的文件變更時發生錯誤: {e}")
- continue
- parent_id = commit.parents[0].hexsha if commit.parents else None
- commit_data = {
- "commit_id": commit.hexsha,
- "parent_id": parent_id,
- "author_name": commit.author.name,
- "author_email": commit.author.email,
- "committed_datetime": commit.committed_datetime.isoformat(),
- "commit_message": commit.message.strip(),
- "file_changes": file_changes
- }
-
- try:
- session.execute_write(import_transaction, commit_data)
- print(f"已匯入提交: {commit.hexsha[:7]}, commit_data: {commit_data}")
- except Exception as e:
- print(f"警告: 匯入提交 {commit.hexsha[:7]} 時發生錯誤: {e}, commit_data: {commit_data}")
- driver.close()
- print("匯入完成,Neo4j 連接已關閉。")
- # --- 執行腳本 ---
- if __name__ == "__main__":
- # 替換為你的 Git 倉庫路徑
- repo_path = 'D:/mayun/LR04.02_RVCTerminalPlus'
- # 步驟1: 清空資料庫
- # 執行全量匯入前,通常會先清空資料庫以避免資料重複
- print("----- 開始清空資料庫 -----")
- clear_neo4j_database()
- print("----- 資料庫清空完成 -----")
-
- # 步驟2:執行全量匯入
- print("\n" + "="*50 + "\n")
- print("----- 執行全量匯入 -----")
- import_git_to_neo4j(repo_path)
- print("----- 全量匯入完成 -----")
-
- # 示例2:執行增量匯入
- # 設置增量獲取的起始時間,例如從 2024年7月1日 開始
- # start_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
- # print("\n" + "="*50 + "\n")
- # print("----- 執行增量匯入 -----")
- # import_git_to_neo4j(repo_path, since=start_time)
|